1<?php 2 3declare(strict_types=1); 4 5namespace Antlr\Antlr4\Runtime\Error; 6 7use Antlr\Antlr4\Runtime\Atn\States\ATNState; 8use Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition; 9use Antlr\Antlr4\Runtime\Error\Exceptions\FailedPredicateException; 10use Antlr\Antlr4\Runtime\Error\Exceptions\InputMismatchException; 11use Antlr\Antlr4\Runtime\Error\Exceptions\NoViableAltException; 12use Antlr\Antlr4\Runtime\Error\Exceptions\RecognitionException; 13use Antlr\Antlr4\Runtime\IntervalSet; 14use Antlr\Antlr4\Runtime\Parser; 15use Antlr\Antlr4\Runtime\ParserRuleContext; 16use Antlr\Antlr4\Runtime\Token; 17use Antlr\Antlr4\Runtime\Utils\Pair; 18use Antlr\Antlr4\Runtime\Utils\StringUtils; 19 20/** 21 * This is the default implementation of {@see ANTLRErrorStrategy} used for 22 * error reporting and recovery in ANTLR parsers. 23 */ 24class DefaultErrorStrategy implements ANTLRErrorStrategy 25{ 26 /** 27 * Indicates whether the error strategy is currently "recovering from an 28 * error". This is used to suppress reporting multiple error messages while 29 * attempting to recover from a detected syntax error. 30 * 31 * @see DefaultErrorStrategy::inErrorRecoveryMode() 32 * 33 * @var bool 34 */ 35 protected $errorRecoveryMode = false; 36 37 /** The index into the input stream where the last error occurred. 38 * This is used to prevent infinite loops where an error is found 39 * but no token is consumed during recovery...another error is found, 40 * ad nauseum. This is a failsafe mechanism to guarantee that at least 41 * one token/tree node is consumed for two errors. 42 * 43 * @var int 44 */ 45 protected $lastErrorIndex = -1; 46 47 /** @var IntervalSet|null */ 48 protected $lastErrorStates; 49 50 /** 51 * This field is used to propagate information about the lookahead following 52 * the previous match. Since prediction prefers completing the current rule 53 * to error recovery efforts, error reporting may occur later than the 54 * original point where it was discoverable. The original context is used to 55 * compute the true expected sets as though the reporting occurred as early 56 * as possible. 57 * 58 * @var ParserRuleContext|null 59 */ 60 protected $nextTokensContext; 61 62 /** 63 * @see DefaultErrorStrategy::$nextTokensContext 64 * 65 * @var int|null 66 */ 67 protected $nextTokensState; 68 69 /** 70 * {@inheritdoc} 71 * 72 * The default implementation simply calls 73 * {@see DefaultErrorStrategy::endErrorCondition()} to ensure that 74 * the handler is not in error recovery mode. 75 */ 76 public function reset(Parser $recognizer) : void 77 { 78 $this->endErrorCondition($recognizer); 79 } 80 81 /** 82 * This method is called to enter error recovery mode when a recognition 83 * exception is reported. 84 * 85 * @param Parser $recognizer The parser instance. 86 */ 87 protected function beginErrorCondition(Parser $recognizer) : void 88 { 89 $this->errorRecoveryMode = true; 90 } 91 92 public function inErrorRecoveryMode(Parser $recognizer) : bool 93 { 94 return $this->errorRecoveryMode; 95 } 96 97 /** 98 * This method is called to leave error recovery mode after recovering from 99 * a recognition exception. 100 */ 101 protected function endErrorCondition(Parser $recognizer) : void 102 { 103 $this->errorRecoveryMode = false; 104 $this->lastErrorStates = null; 105 $this->lastErrorIndex = -1; 106 } 107 108 /** 109 * {@inheritdoc} 110 * 111 * The default implementation simply calls 112 * {@see DefaultErrorStrategy::endErrorCondition()}. 113 */ 114 public function reportMatch(Parser $recognizer) : void 115 { 116 $this->endErrorCondition($recognizer); 117 } 118 119 /** 120 * {@inheritdoc} 121 * 122 * The default implementation returns immediately if the handler is already 123 * in error recovery mode. Otherwise, it calls 124 * {@see DefaultErrorStrategy::beginErrorCondition()} and dispatches 125 * the reporting task based on the runtime type of `e` according to 126 * the following table. 127 * 128 * - {@see NoViableAltException}: Dispatches the call to 129 * {@see reportNoViableAlternative} 130 * - {@see InputMismatchException}: Dispatches the call to 131 * {@see reportInputMismatch} 132 * - {@see FailedPredicateException}: Dispatches the call to 133 * {@see reportFailedPredicate} 134 * - All other types: calls {@see Parser#notifyErrorListeners} to report 135 * the exception 136 */ 137 public function reportError(Parser $recognizer, RecognitionException $e) : void 138 { 139 // if we've already reported an error and have not matched a token 140 // yet successfully, don't report any errors. 141 if ($this->inErrorRecoveryMode($recognizer)) { 142 // don't report spurious errors 143 return; 144 } 145 146 $this->beginErrorCondition($recognizer); 147 148 if ($e instanceof NoViableAltException) { 149 $this->reportNoViableAlternative($recognizer, $e); 150 } elseif ($e instanceof InputMismatchException) { 151 $this->reportInputMismatch($recognizer, $e); 152 } elseif ($e instanceof FailedPredicateException) { 153 $this->reportFailedPredicate($recognizer, $e); 154 } else { 155 $recognizer->notifyErrorListeners($e->getMessage(), $e->getOffendingToken(), $e); 156 } 157 } 158 159 /** 160 * {@inheritdoc} 161 * 162 * The default implementation resynchronizes the parser by consuming tokens 163 * until we find one in the resynchronization set--loosely the set of tokens 164 * that can follow the current rule. 165 */ 166 public function recover(Parser $recognizer, RecognitionException $e) : void 167 { 168 $inputStream = $recognizer->getInputStream(); 169 170 if ($inputStream === null) { 171 throw new \RuntimeException('Unexpected null input stream.'); 172 } 173 174 if ($this->lastErrorStates !== null 175 && $this->lastErrorIndex === $inputStream->getIndex() 176 && $this->lastErrorStates->contains($recognizer->getState()) 177 ) { 178 // uh oh, another error at same token index and previously-visited 179 // state in ATN; must be a case where LT(1) is in the recovery 180 // token set so nothing got consumed. Consume a single token 181 // at least to prevent an infinite loop; this is a failsafe. 182 $recognizer->consume(); 183 } 184 185 $this->lastErrorIndex = $inputStream->getIndex(); 186 187 if ($this->lastErrorStates === null) { 188 $this->lastErrorStates = new IntervalSet(); 189 } 190 191 $this->lastErrorStates->addOne($recognizer->getState()); 192 193 $followSet = $this->getErrorRecoverySet($recognizer); 194 195 $this->consumeUntil($recognizer, $followSet); 196 } 197 198 /** 199 * The default implementation of {@see ANTLRErrorStrategy::sync()} makes sure 200 * that the current lookahead symbol is consistent with what were expecting 201 * at this point in the ATN. You can call this anytime but ANTLR only 202 * generates code to check before subrules/loops and each iteration. 203 * 204 * Implements Jim Idle's magic sync mechanism in closures and optional 205 * subrules. E.g., 206 * 207 * a : sync ( stuff sync )* ; 208 * sync : {consume to what can follow sync} ; 209 * 210 * At the start of a sub rule upon error, {@see sync} performs single 211 * token deletion, if possible. If it can't do that, it bails on the current 212 * rule and uses the default error recovery, which consumes until the 213 * resynchronization set of the current rule. 214 * 215 * If the sub rule is optional (`(...)?`, `(...)*`, or block 216 * with an empty alternative), then the expected set includes what follows 217 * the subrule. 218 * 219 * During loop iteration, it consumes until it sees a token that can start a 220 * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to 221 * stay in the loop as long as possible. 222 * 223 * ORIGINS 224 * 225 * Previous versions of ANTLR did a poor job of their recovery within loops. 226 * A single mismatch token or missing token would force the parser to bail 227 * out of the entire rules surrounding the loop. So, for rule 228 * 229 * classDef : 'class' ID '{' member* '}' 230 * 231 * input with an extra token between members would force the parser to 232 * consume until it found the next class definition rather than the next 233 * member definition of the current class. 234 * 235 * This functionality cost a little bit of effort because the parser has to 236 * compare token set at the start of the loop and at each iteration. If for 237 * some reason speed is suffering for you, you can turn off this 238 * functionality by simply overriding this method as a blank { }. 239 * 240 * @throws RecognitionException 241 */ 242 public function sync(Parser $recognizer) : void 243 { 244 $interpreter = $recognizer->getInterpreter(); 245 246 if ($interpreter === null) { 247 throw new \RuntimeException('Unexpected null interpreter.'); 248 } 249 250 /** @var ATNState $s */ 251 $s = $interpreter->atn->states[$recognizer->getState()]; 252 253 // If already recovering, don't try to sync 254 if ($this->inErrorRecoveryMode($recognizer)) { 255 return; 256 } 257 258 $tokens = $recognizer->getInputStream(); 259 260 if ($tokens === null) { 261 throw new \RuntimeException('Unexpected null input stream.'); 262 } 263 264 $la = $tokens->LA(1); 265 266 // try cheaper subset first; might get lucky. seems to shave a wee bit off 267 $nextTokens = $recognizer->getATN()->nextTokens($s); 268 269 if ($nextTokens->contains($la)) { 270 // We are sure the token matches 271 $this->nextTokensContext = null; 272 $this->nextTokensState = ATNState::INVALID_STATE_NUMBER; 273 274 return; 275 } 276 277 if ($nextTokens->contains(Token::EPSILON)) { 278 if ($this->nextTokensContext === null) { 279 // It's possible the next token won't match; information tracked 280 // by sync is restricted for performance. 281 $this->nextTokensContext = $recognizer->getContext(); 282 $this->nextTokensState = $recognizer->getState(); 283 } 284 return; 285 } 286 287 switch ($s->getStateType()) { 288 case ATNState::BLOCK_START: 289 case ATNState::STAR_BLOCK_START: 290 case ATNState::PLUS_BLOCK_START: 291 case ATNState::STAR_LOOP_ENTRY: 292 // report error and recover if possible 293 if ($this->singleTokenDeletion($recognizer) !== null) { 294 return; 295 } 296 297 throw new InputMismatchException($recognizer); 298 299 case ATNState::PLUS_LOOP_BACK: 300 case ATNState::STAR_LOOP_BACK: 301 $this->reportUnwantedToken($recognizer); 302 $expecting = $recognizer->getExpectedTokens(); 303 $whatFollowsLoopIterationOrRule = $expecting->orSet($this->getErrorRecoverySet($recognizer)); 304 $this->consumeUntil($recognizer, $whatFollowsLoopIterationOrRule); 305 break; 306 307 default: 308 // do nothing if we can't identify the exact kind of ATN state 309 break; 310 } 311 } 312 313 /** 314 * This is called by {@see DefaultErrorStrategy::reportError()} when 315 * the exception is a {@see NoViableAltException}. 316 * 317 * @param Parser $recognizer The parser instance. 318 * @param NoViableAltException $e The recognition exception. 319 * 320 * @see DefaultErrorStrategy::reportError() 321 */ 322 protected function reportNoViableAlternative(Parser $recognizer, NoViableAltException $e) : void 323 { 324 $tokens = $recognizer->getTokenStream(); 325 326 $input = '<unknown input>'; 327 328 if ($tokens !== null) { 329 $startToken = $e->getStartToken(); 330 331 if ($startToken === null) { 332 throw new \RuntimeException('Unexpected null start token.'); 333 } 334 335 if ($startToken->getType() === Token::EOF) { 336 $input = '<EOF>'; 337 } else { 338 $input = $tokens->getTextByTokens($e->getStartToken(), $e->getOffendingToken()); 339 } 340 } 341 342 $msg = \sprintf('no viable alternative at input %s', $this->escapeWSAndQuote($input)); 343 344 $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); 345 } 346 347 /** 348 * This is called by {@see DefaultErrorStrategy::reportError()} when 349 * the exception is an {@see InputMismatchException}. 350 * 351 * @param Parser $recognizer The parser instance. 352 * @param InputMismatchException $e The recognition exception. 353 * 354 * @see DefaultErrorStrategy::reportError() 355 */ 356 protected function reportInputMismatch(Parser $recognizer, InputMismatchException $e) : void 357 { 358 $expectedTokens = $e->getExpectedTokens(); 359 360 if ($expectedTokens === null) { 361 throw new \RuntimeException('Unexpected null expected tokens.'); 362 } 363 364 $msg = \sprintf( 365 'mismatched input %s expecting %s', 366 $this->getTokenErrorDisplay($e->getOffendingToken()), 367 $expectedTokens->toStringVocabulary($recognizer->getVocabulary()) 368 ); 369 370 $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); 371 } 372 373 /** 374 * This is called by {@see DefaultErrorStrategy::reportError()} when 375 * the exception is a {@see FailedPredicateException}. 376 * 377 * @param Parser $recognizer The parser instance. 378 * @param FailedPredicateException $e The recognition exception. 379 * 380 * @see DefaultErrorStrategy::reportError() 381 */ 382 protected function reportFailedPredicate(Parser $recognizer, FailedPredicateException $e) : void 383 { 384 $msg = \sprintf('rule %s %s', $recognizer->getCurrentRuleName(), $e->getMessage()); 385 386 $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); 387 } 388 389 /** 390 * This method is called to report a syntax error which requires the removal 391 * of a token from the input stream. At the time this method is called, the 392 * erroneous symbol is current `LT(1)` symbol and has not yet been 393 * removed from the input stream. When this method returns, 394 * `$recognizer` is in error recovery mode. 395 * 396 * This method is called when {@see DefaultErrorStrategy::singleTokenDeletion()} 397 * identifies single-token deletion as a viable recovery strategy for 398 * a mismatched input error. 399 * 400 * The default implementation simply returns if the handler is already in 401 * error recovery mode. Otherwise, it calls 402 * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error 403 * recovery mode, followed by calling {@see Parser::notifyErrorListeners}. 404 * 405 * @param Parser $recognizer The parser instance. 406 */ 407 protected function reportUnwantedToken(Parser $recognizer) : void 408 { 409 if ($this->inErrorRecoveryMode($recognizer)) { 410 return; 411 } 412 413 $this->beginErrorCondition($recognizer); 414 415 $t = $recognizer->getCurrentToken(); 416 $tokenName = $this->getTokenErrorDisplay($t); 417 $expecting = $this->getExpectedTokens($recognizer); 418 419 $msg = \sprintf( 420 'extraneous input %s expecting %s', 421 $tokenName, 422 $expecting->toStringVocabulary($recognizer->getVocabulary()) 423 ); 424 425 $recognizer->notifyErrorListeners($msg, $t); 426 } 427 428 /** 429 * This method is called to report a syntax error which requires the 430 * insertion of a missing token into the input stream. At the time this 431 * method is called, the missing token has not yet been inserted. When this 432 * method returns, `$recognizer` is in error recovery mode. 433 * 434 * This method is called when {@see DefaultErrorStrategy::singleTokenInsertion()} 435 * identifies single-token insertion as a viable recovery strategy for 436 * a mismatched input error. 437 * 438 * The default implementation simply returns if the handler is already in 439 * error recovery mode. Otherwise, it calls 440 * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error 441 * recovery mode, followed by calling {@see Parser::notifyErrorListeners()}. 442 * 443 * @param Parser $recognizer the parser instance 444 */ 445 protected function reportMissingToken(Parser $recognizer) : void 446 { 447 if ($this->inErrorRecoveryMode($recognizer)) { 448 return; 449 } 450 451 $this->beginErrorCondition($recognizer); 452 453 $t = $recognizer->getCurrentToken(); 454 $expecting = $this->getExpectedTokens($recognizer); 455 456 $msg = \sprintf( 457 'missing %s at %s', 458 $expecting->toStringVocabulary($recognizer->getVocabulary()), 459 $this->getTokenErrorDisplay($t) 460 ); 461 462 $recognizer->notifyErrorListeners($msg, $t); 463 } 464 465 /** 466 * {@inheritdoc} 467 * 468 * The default implementation attempts to recover from the mismatched input 469 * by using single token insertion and deletion as described below. If the 470 * recovery attempt fails, this method throws an 471 * {@see InputMismatchException}. 472 * 473 * EXTRA TOKEN (single token deletion) 474 * 475 * `LA(1)` is not what we are looking for. If `LA(2)` has the 476 * right token, however, then assume `LA(1)` is some extra spurious 477 * token and delete it. Then consume and return the next token (which was 478 * the `LA(2)` token) as the successful result of the match operation. 479 * 480 * This recovery strategy is implemented by 481 * {@see DefaultErrorStrategy::singleTokenDeletion()}. 482 * 483 * MISSING TOKEN (single token insertion) 484 * 485 * If current token (at `LA(1)`) is consistent with what could come 486 * after the expected `LA(1)` token, then assume the token is missing 487 * and use the parser's {@see TokenFactory} to create it on the fly. The 488 * "insertion" is performed by returning the created token as the successful 489 * result of the match operation. 490 * 491 * This recovery strategy is implemented by 492 * {@see DefaultErrorStrategy::singleTokenInsertion()}. 493 * 494 * EXAMPLE 495 * 496 * For example, Input `i=(3;` is clearly missing the `')'`. When 497 * the parser returns from the nested call to `expr`, it will have 498 * call chain: 499 * 500 * stat → expr → atom 501 * 502 * and it will be trying to match the `')'` at this point in the 503 * derivation: 504 * 505 * => ID '=' '(' INT ')' ('+' atom)* ';' 506 * ^ 507 * 508 * The attempt to match `')'` will fail when it sees `';'` and call 509 * {@see DefaultErrorStrategy::recoverInline()}. To recover, it sees that 510 * `LA(1)==';'` is in the set of tokens that can follow the `')'` token 511 * reference in rule `atom`. It can assume that you forgot the `')'`. 512 * 513 * @throws RecognitionException 514 */ 515 public function recoverInline(Parser $recognizer) : Token 516 { 517 // SINGLE TOKEN DELETION 518 $matchedSymbol = $this->singleTokenDeletion($recognizer); 519 520 if ($matchedSymbol !== null) { 521 // we have deleted the extra token. 522 // now, move past ttype token as if all were ok 523 $recognizer->consume(); 524 525 return $matchedSymbol; 526 } 527 528 // SINGLE TOKEN INSERTION 529 if ($this->singleTokenInsertion($recognizer)) { 530 return $this->getMissingSymbol($recognizer); 531 } 532 533 // even that didn't work; must throw the exception 534 if ($this->nextTokensContext === null) { 535 throw new InputMismatchException($recognizer); 536 } 537 538 throw new InputMismatchException($recognizer, $this->nextTokensState, $this->nextTokensContext); 539 } 540 541 /** 542 * This method implements the single-token insertion inline error recovery 543 * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()} 544 * if the single-token deletion strategy fails to recover from the mismatched 545 * input. If this method returns `true`, `$recognizer` will be in error 546 * recovery mode. 547 * 548 * This method determines whether or not single-token insertion is viable by 549 * checking if the `LA(1)` input symbol could be successfully matched 550 * if it were instead the `LA(2)` symbol. If this method returns 551 * `true`, the caller is responsible for creating and inserting a 552 * token with the correct type to produce this behavior. 553 * 554 * @param Parser $recognizer The parser instance. 555 * 556 * @return bool `true` If single-token insertion is a viable recovery 557 * strategy for the current mismatched input, otherwise `false`. 558 */ 559 protected function singleTokenInsertion(Parser $recognizer) : bool 560 { 561 $stream = $recognizer->getInputStream(); 562 563 if ($stream === null) { 564 throw new \RuntimeException('Unexpected null input stream.'); 565 } 566 567 $interpreter = $recognizer->getInterpreter(); 568 569 if ($interpreter === null) { 570 throw new \RuntimeException('Unexpected null interpreter.'); 571 } 572 573 $currentSymbolType = $stream->LA(1); 574 575 // if current token is consistent with what could come after current 576 // ATN state, then we know we're missing a token; error recovery 577 // is free to conjure up and insert the missing token 578 579 $atn = $interpreter->atn; 580 /** @var ATNState $currentState */ 581 $currentState = $atn->states[$recognizer->getState()]; 582 $next = $currentState->getTransition(0)->target; 583 $expectingAtLL2 = $atn->nextTokensInContext($next, $recognizer->getContext()); 584 585 if ($expectingAtLL2->contains($currentSymbolType)) { 586 $this->reportMissingToken($recognizer); 587 588 return true; 589 } 590 591 return false; 592 } 593 594 /** 595 * This method implements the single-token deletion inline error recovery 596 * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()} 597 * to attempt to recover from mismatched input. If this method returns null, 598 * the parser and error handler state will not have changed. If this method 599 * returns non-null, `$recognizer` will _not_ be in error recovery mode 600 * since the returned token was a successful match. 601 * 602 * If the single-token deletion is successful, this method calls 603 * {@see DefaultErrorStrategy::reportUnwantedToken()} to report the error, 604 * followed by {@see Parser::consume()} to actually "delete" the extraneous 605 * token. Then, before returning {@see DefaultErrorStrategy::reportMatch()} 606 * is called to signal a successful match. 607 * 608 * @param Parser $recognizer The parser instance. 609 * 610 * @return Token The successfully matched {@see Token} instance if 611 * single-token deletion successfully recovers from 612 * the mismatched input, otherwise `null`. 613 */ 614 protected function singleTokenDeletion(Parser $recognizer) : ?Token 615 { 616 $inputStream = $recognizer->getInputStream(); 617 618 if ($inputStream === null) { 619 throw new \RuntimeException('Unexpected null input stream.'); 620 } 621 622 $nextTokenType = $inputStream->LA(2); 623 $expecting = $this->getExpectedTokens($recognizer); 624 625 if ($expecting->contains($nextTokenType)) { 626 $this->reportUnwantedToken($recognizer); 627 $recognizer->consume(); // simply delete extra token 628 // we want to return the token we're actually matching 629 $matchedSymbol = $recognizer->getCurrentToken(); 630 $this->reportMatch($recognizer); // we know current token is correct 631 632 return $matchedSymbol; 633 } 634 635 return null; 636 } 637 638 /** Conjure up a missing token during error recovery. 639 * 640 * The recognizer attempts to recover from single missing 641 * symbols. But, actions might refer to that missing symbol. 642 * For example, x=ID {f($x);}. The action clearly assumes 643 * that there has been an identifier matched previously and that 644 * $x points at that token. If that token is missing, but 645 * the next token in the stream is what we want we assume that 646 * this token is missing and we keep going. Because we 647 * have to return some token to replace the missing token, 648 * we have to conjure one up. This method gives the user control 649 * over the tokens returned for missing tokens. Mostly, 650 * you will want to create something special for identifier 651 * tokens. For literals such as '{' and ',', the default 652 * action in the parser or tree parser works. It simply creates 653 * a CommonToken of the appropriate type. The text will be the token. 654 * If you change what tokens must be created by the lexer, 655 * override this method to create the appropriate tokens. 656 */ 657 protected function getMissingSymbol(Parser $recognizer) : Token 658 { 659 $currentSymbol = $recognizer->getCurrentToken(); 660 661 if ($currentSymbol === null) { 662 throw new \RuntimeException('Unexpected null current token.'); 663 } 664 665 $inputStream = $recognizer->getInputStream(); 666 667 if ($inputStream === null) { 668 throw new \RuntimeException('Unexpected null input stream.'); 669 } 670 671 $tokenSource = $currentSymbol->getTokenSource(); 672 673 if ($tokenSource === null) { 674 throw new \RuntimeException('Unexpected null token source.'); 675 } 676 677 $expecting = $this->getExpectedTokens($recognizer); 678 679 $expectedTokenType = Token::INVALID_TYPE; 680 681 if (!$expecting->isNull()) { 682 $expectedTokenType = $expecting->getMinElement(); // get any element 683 } 684 685 if ($expectedTokenType === Token::EOF) { 686 $tokenText = '<missing EOF>'; 687 } else { 688 $tokenText = \sprintf('<missing %s>', $recognizer->getVocabulary()->getDisplayName($expectedTokenType)); 689 } 690 691 $current = $currentSymbol; 692 $lookback = $inputStream->LT(-1); 693 694 if ($current->getType() === Token::EOF && $lookback !== null) { 695 $current = $lookback; 696 } 697 698 return $recognizer->getTokenFactory()->createEx( 699 new Pair( 700 $tokenSource, 701 $tokenSource->getInputStream() 702 ), 703 $expectedTokenType, 704 $tokenText, 705 Token::DEFAULT_CHANNEL, 706 -1, 707 -1, 708 $current->getLine(), 709 $current->getCharPositionInLine() 710 ); 711 } 712 713 protected function getExpectedTokens(Parser $recognizer) : IntervalSet 714 { 715 return $recognizer->getExpectedTokens(); 716 } 717 718 /** 719 * How should a token be displayed in an error message? The default 720 * is to display just the text, but during development you might 721 * want to have a lot of information spit out. Override in that case 722 * to use (string) (which, for CommonToken, dumps everything about 723 * the token). This is better than forcing you to override a method in 724 * your token objects because you don't have to go modify your lexer 725 * so that it creates a new Java type. 726 */ 727 protected function getTokenErrorDisplay(?Token $t) : string 728 { 729 if ($t === null) { 730 return '<no token>'; 731 } 732 733 $s = $this->getSymbolText($t); 734 735 if ($s === null) { 736 if ($this->getSymbolType($t) === Token::EOF) { 737 $s = '<EOF>'; 738 } else { 739 $s = '<' . $this->getSymbolType($t) . '>'; 740 } 741 } 742 743 return $this->escapeWSAndQuote($s); 744 } 745 746 protected function getSymbolText(Token $symbol) : ?string 747 { 748 return $symbol->getText(); 749 } 750 751 protected function getSymbolType(Token $symbol) : int 752 { 753 return $symbol->getType(); 754 } 755 756 protected function escapeWSAndQuote(string $s) : string 757 { 758 return "'" . StringUtils::escapeWhitespace($s) . "'"; 759 } 760 761 /** 762 * Compute the error recovery set for the current rule. During 763 * rule invocation, the parser pushes the set of tokens that can 764 * follow that rule reference on the stack; this amounts to 765 * computing FIRST of what follows the rule reference in the 766 * enclosing rule. See LinearApproximator::FIRST. 767 * This local follow set only includes tokens 768 * from within the rule; i.e., the FIRST computation done by 769 * ANTLR stops at the end of a rule. 770 * 771 * EXAMPLE 772 * 773 * When you find a "no viable alt exception", the input is not 774 * consistent with any of the alternatives for rule r. The best 775 * thing to do is to consume tokens until you see something that 776 * can legally follow a call to r *or* any rule that called r. 777 * You don't want the exact set of viable next tokens because the 778 * input might just be missing a token--you might consume the 779 * rest of the input looking for one of the missing tokens. 780 * 781 * Consider grammar: 782 * 783 * a : '[' b ']' 784 * | '(' b ')' 785 * ; 786 * b : c '^' INT ; 787 * c : ID 788 * | INT 789 * ; 790 * 791 * At each rule invocation, the set of tokens that could follow 792 * that rule is pushed on a stack. Here are the various 793 * context-sensitive follow sets: 794 * 795 * FOLLOW(b1_in_a) = FIRST(']') = ']' 796 * FOLLOW(b2_in_a) = FIRST(')') = ')' 797 * FOLLOW(c_in_b) = FIRST('^') = '^' 798 * 799 * Upon erroneous input "[]", the call chain is 800 * 801 * a -> b -> c 802 * 803 * and, hence, the follow context stack is: 804 * 805 * depth | follow set | start of rule execution 806 * ------|------------|------------------------- 807 * 0 | <EOF> | a (from main()) 808 * 1 | ']' | b 809 * 2 | '^' | c 810 * 811 * Notice that ')' is not included, because b would have to have 812 * been called from a different context in rule a for ')' to be 813 * included. 814 * 815 * For error recovery, we cannot consider FOLLOW(c) 816 * (context-sensitive or otherwise). We need the combined set of 817 * all context-sensitive FOLLOW sets--the set of all tokens that 818 * could follow any reference in the call chain. We need to 819 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 820 * we resync'd to that token, we'd consume until EOF. We need to 821 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 822 * In this case, for input "[]", LA(1) is ']' and in the set, so we would 823 * not consume anything. After printing an error, rule c would 824 * return normally. Rule b would not find the required '^' though. 825 * At this point, it gets a mismatched token error and throws an 826 * exception (since LA(1) is not in the viable following token 827 * set). The rule exception handler tries to recover, but finds 828 * the same recovery set and doesn't consume anything. Rule b 829 * exits normally returning to rule a. Now it finds the ']' (and 830 * with the successful match exits errorRecovery mode). 831 * 832 * So, you can see that the parser walks up the call chain looking 833 * for the token that was a member of the recovery set. 834 * 835 * Errors are not generated in errorRecovery mode. 836 * 837 * ANTLR's error recovery mechanism is based upon original ideas: 838 * 839 * "Algorithms + Data Structures = Programs" by Niklaus Wirth 840 * 841 * and 842 * 843 * "A note on error recovery in recursive descent parsers": 844 * http://portal.acm.org/citation.cfm?id=947902.947905 845 * 846 * Later, Josef Grosch had some good ideas: 847 * 848 * "Efficient and Comfortable Error Recovery in Recursive Descent 849 * Parsers": 850 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 851 * 852 * Like Grosch I implement context-sensitive FOLLOW sets that are combined 853 * at run-time upon error to avoid overhead during parsing. 854 */ 855 protected function getErrorRecoverySet(Parser $recognizer) : IntervalSet 856 { 857 $interpreter = $recognizer->getInterpreter(); 858 859 if ($interpreter === null) { 860 throw new \RuntimeException('Unexpected null interpreter.'); 861 } 862 863 $atn = $interpreter->atn; 864 $ctx = $recognizer->getContext(); 865 $recoverSet = new IntervalSet(); 866 867 while ($ctx !== null && $ctx->invokingState >= 0) { 868 // compute what follows who invoked us 869 /** @var ATNState $invokingState */ 870 $invokingState = $atn->states[$ctx->invokingState]; 871 /** @var RuleTransition $rt */ 872 $rt = $invokingState->getTransition(0); 873 $follow = $atn->nextTokens($rt->followState); 874 $recoverSet->addSet($follow); 875 $ctx = $ctx->getParent(); 876 } 877 878 $recoverSet->removeOne(Token::EPSILON); 879 880 return $recoverSet; 881 } 882 883 /** 884 * Consume tokens until one matches the given token set. 885 */ 886 protected function consumeUntil(Parser $recognizer, IntervalSet $set) : void 887 { 888 $inputStream = $recognizer->getInputStream(); 889 890 if ($inputStream === null) { 891 throw new \RuntimeException('Unexpected null input stream.'); 892 } 893 894 $ttype = $inputStream->LA(1); 895 896 while ($ttype !== Token::EOF && !$set->contains($ttype)) { 897 $recognizer->consume(); 898 $ttype = $inputStream->LA(1); 899 } 900 } 901} 902