1*37748cd8SNickeau<?php 2*37748cd8SNickeau 3*37748cd8SNickeaudeclare(strict_types=1); 4*37748cd8SNickeau 5*37748cd8SNickeaunamespace Antlr\Antlr4\Runtime\Error; 6*37748cd8SNickeau 7*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\States\ATNState; 8*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition; 9*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Error\Exceptions\FailedPredicateException; 10*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Error\Exceptions\InputMismatchException; 11*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Error\Exceptions\NoViableAltException; 12*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Error\Exceptions\RecognitionException; 13*37748cd8SNickeauuse Antlr\Antlr4\Runtime\IntervalSet; 14*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Parser; 15*37748cd8SNickeauuse Antlr\Antlr4\Runtime\ParserRuleContext; 16*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Token; 17*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Utils\Pair; 18*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Utils\StringUtils; 19*37748cd8SNickeau 20*37748cd8SNickeau/** 21*37748cd8SNickeau * This is the default implementation of {@see ANTLRErrorStrategy} used for 22*37748cd8SNickeau * error reporting and recovery in ANTLR parsers. 23*37748cd8SNickeau */ 24*37748cd8SNickeauclass DefaultErrorStrategy implements ANTLRErrorStrategy 25*37748cd8SNickeau{ 26*37748cd8SNickeau /** 27*37748cd8SNickeau * Indicates whether the error strategy is currently "recovering from an 28*37748cd8SNickeau * error". This is used to suppress reporting multiple error messages while 29*37748cd8SNickeau * attempting to recover from a detected syntax error. 30*37748cd8SNickeau * 31*37748cd8SNickeau * @see DefaultErrorStrategy::inErrorRecoveryMode() 32*37748cd8SNickeau * 33*37748cd8SNickeau * @var bool 34*37748cd8SNickeau */ 35*37748cd8SNickeau protected $errorRecoveryMode = false; 36*37748cd8SNickeau 37*37748cd8SNickeau /** The index into the input stream where the last error occurred. 38*37748cd8SNickeau * This is used to prevent infinite loops where an error is found 39*37748cd8SNickeau * but no token is consumed during recovery...another error is found, 40*37748cd8SNickeau * ad nauseum. This is a failsafe mechanism to guarantee that at least 41*37748cd8SNickeau * one token/tree node is consumed for two errors. 42*37748cd8SNickeau * 43*37748cd8SNickeau * @var int 44*37748cd8SNickeau */ 45*37748cd8SNickeau protected $lastErrorIndex = -1; 46*37748cd8SNickeau 47*37748cd8SNickeau /** @var IntervalSet|null */ 48*37748cd8SNickeau protected $lastErrorStates; 49*37748cd8SNickeau 50*37748cd8SNickeau /** 51*37748cd8SNickeau * This field is used to propagate information about the lookahead following 52*37748cd8SNickeau * the previous match. Since prediction prefers completing the current rule 53*37748cd8SNickeau * to error recovery efforts, error reporting may occur later than the 54*37748cd8SNickeau * original point where it was discoverable. The original context is used to 55*37748cd8SNickeau * compute the true expected sets as though the reporting occurred as early 56*37748cd8SNickeau * as possible. 57*37748cd8SNickeau * 58*37748cd8SNickeau * @var ParserRuleContext|null 59*37748cd8SNickeau */ 60*37748cd8SNickeau protected $nextTokensContext; 61*37748cd8SNickeau 62*37748cd8SNickeau /** 63*37748cd8SNickeau * @see DefaultErrorStrategy::$nextTokensContext 64*37748cd8SNickeau * 65*37748cd8SNickeau * @var int|null 66*37748cd8SNickeau */ 67*37748cd8SNickeau protected $nextTokensState; 68*37748cd8SNickeau 69*37748cd8SNickeau /** 70*37748cd8SNickeau * {@inheritdoc} 71*37748cd8SNickeau * 72*37748cd8SNickeau * The default implementation simply calls 73*37748cd8SNickeau * {@see DefaultErrorStrategy::endErrorCondition()} to ensure that 74*37748cd8SNickeau * the handler is not in error recovery mode. 75*37748cd8SNickeau */ 76*37748cd8SNickeau public function reset(Parser $recognizer) : void 77*37748cd8SNickeau { 78*37748cd8SNickeau $this->endErrorCondition($recognizer); 79*37748cd8SNickeau } 80*37748cd8SNickeau 81*37748cd8SNickeau /** 82*37748cd8SNickeau * This method is called to enter error recovery mode when a recognition 83*37748cd8SNickeau * exception is reported. 84*37748cd8SNickeau * 85*37748cd8SNickeau * @param Parser $recognizer The parser instance. 86*37748cd8SNickeau */ 87*37748cd8SNickeau protected function beginErrorCondition(Parser $recognizer) : void 88*37748cd8SNickeau { 89*37748cd8SNickeau $this->errorRecoveryMode = true; 90*37748cd8SNickeau } 91*37748cd8SNickeau 92*37748cd8SNickeau public function inErrorRecoveryMode(Parser $recognizer) : bool 93*37748cd8SNickeau { 94*37748cd8SNickeau return $this->errorRecoveryMode; 95*37748cd8SNickeau } 96*37748cd8SNickeau 97*37748cd8SNickeau /** 98*37748cd8SNickeau * This method is called to leave error recovery mode after recovering from 99*37748cd8SNickeau * a recognition exception. 100*37748cd8SNickeau */ 101*37748cd8SNickeau protected function endErrorCondition(Parser $recognizer) : void 102*37748cd8SNickeau { 103*37748cd8SNickeau $this->errorRecoveryMode = false; 104*37748cd8SNickeau $this->lastErrorStates = null; 105*37748cd8SNickeau $this->lastErrorIndex = -1; 106*37748cd8SNickeau } 107*37748cd8SNickeau 108*37748cd8SNickeau /** 109*37748cd8SNickeau * {@inheritdoc} 110*37748cd8SNickeau * 111*37748cd8SNickeau * The default implementation simply calls 112*37748cd8SNickeau * {@see DefaultErrorStrategy::endErrorCondition()}. 113*37748cd8SNickeau */ 114*37748cd8SNickeau public function reportMatch(Parser $recognizer) : void 115*37748cd8SNickeau { 116*37748cd8SNickeau $this->endErrorCondition($recognizer); 117*37748cd8SNickeau } 118*37748cd8SNickeau 119*37748cd8SNickeau /** 120*37748cd8SNickeau * {@inheritdoc} 121*37748cd8SNickeau * 122*37748cd8SNickeau * The default implementation returns immediately if the handler is already 123*37748cd8SNickeau * in error recovery mode. Otherwise, it calls 124*37748cd8SNickeau * {@see DefaultErrorStrategy::beginErrorCondition()} and dispatches 125*37748cd8SNickeau * the reporting task based on the runtime type of `e` according to 126*37748cd8SNickeau * the following table. 127*37748cd8SNickeau * 128*37748cd8SNickeau * - {@see NoViableAltException}: Dispatches the call to 129*37748cd8SNickeau * {@see reportNoViableAlternative} 130*37748cd8SNickeau * - {@see InputMismatchException}: Dispatches the call to 131*37748cd8SNickeau * {@see reportInputMismatch} 132*37748cd8SNickeau * - {@see FailedPredicateException}: Dispatches the call to 133*37748cd8SNickeau * {@see reportFailedPredicate} 134*37748cd8SNickeau * - All other types: calls {@see Parser#notifyErrorListeners} to report 135*37748cd8SNickeau * the exception 136*37748cd8SNickeau */ 137*37748cd8SNickeau public function reportError(Parser $recognizer, RecognitionException $e) : void 138*37748cd8SNickeau { 139*37748cd8SNickeau // if we've already reported an error and have not matched a token 140*37748cd8SNickeau // yet successfully, don't report any errors. 141*37748cd8SNickeau if ($this->inErrorRecoveryMode($recognizer)) { 142*37748cd8SNickeau // don't report spurious errors 143*37748cd8SNickeau return; 144*37748cd8SNickeau } 145*37748cd8SNickeau 146*37748cd8SNickeau $this->beginErrorCondition($recognizer); 147*37748cd8SNickeau 148*37748cd8SNickeau if ($e instanceof NoViableAltException) { 149*37748cd8SNickeau $this->reportNoViableAlternative($recognizer, $e); 150*37748cd8SNickeau } elseif ($e instanceof InputMismatchException) { 151*37748cd8SNickeau $this->reportInputMismatch($recognizer, $e); 152*37748cd8SNickeau } elseif ($e instanceof FailedPredicateException) { 153*37748cd8SNickeau $this->reportFailedPredicate($recognizer, $e); 154*37748cd8SNickeau } else { 155*37748cd8SNickeau $recognizer->notifyErrorListeners($e->getMessage(), $e->getOffendingToken(), $e); 156*37748cd8SNickeau } 157*37748cd8SNickeau } 158*37748cd8SNickeau 159*37748cd8SNickeau /** 160*37748cd8SNickeau * {@inheritdoc} 161*37748cd8SNickeau * 162*37748cd8SNickeau * The default implementation resynchronizes the parser by consuming tokens 163*37748cd8SNickeau * until we find one in the resynchronization set--loosely the set of tokens 164*37748cd8SNickeau * that can follow the current rule. 165*37748cd8SNickeau */ 166*37748cd8SNickeau public function recover(Parser $recognizer, RecognitionException $e) : void 167*37748cd8SNickeau { 168*37748cd8SNickeau $inputStream = $recognizer->getInputStream(); 169*37748cd8SNickeau 170*37748cd8SNickeau if ($inputStream === null) { 171*37748cd8SNickeau throw new \RuntimeException('Unexpected null input stream.'); 172*37748cd8SNickeau } 173*37748cd8SNickeau 174*37748cd8SNickeau if ($this->lastErrorStates !== null 175*37748cd8SNickeau && $this->lastErrorIndex === $inputStream->getIndex() 176*37748cd8SNickeau && $this->lastErrorStates->contains($recognizer->getState()) 177*37748cd8SNickeau ) { 178*37748cd8SNickeau // uh oh, another error at same token index and previously-visited 179*37748cd8SNickeau // state in ATN; must be a case where LT(1) is in the recovery 180*37748cd8SNickeau // token set so nothing got consumed. Consume a single token 181*37748cd8SNickeau // at least to prevent an infinite loop; this is a failsafe. 182*37748cd8SNickeau $recognizer->consume(); 183*37748cd8SNickeau } 184*37748cd8SNickeau 185*37748cd8SNickeau $this->lastErrorIndex = $inputStream->getIndex(); 186*37748cd8SNickeau 187*37748cd8SNickeau if ($this->lastErrorStates === null) { 188*37748cd8SNickeau $this->lastErrorStates = new IntervalSet(); 189*37748cd8SNickeau } 190*37748cd8SNickeau 191*37748cd8SNickeau $this->lastErrorStates->addOne($recognizer->getState()); 192*37748cd8SNickeau 193*37748cd8SNickeau $followSet = $this->getErrorRecoverySet($recognizer); 194*37748cd8SNickeau 195*37748cd8SNickeau $this->consumeUntil($recognizer, $followSet); 196*37748cd8SNickeau } 197*37748cd8SNickeau 198*37748cd8SNickeau /** 199*37748cd8SNickeau * The default implementation of {@see ANTLRErrorStrategy::sync()} makes sure 200*37748cd8SNickeau * that the current lookahead symbol is consistent with what were expecting 201*37748cd8SNickeau * at this point in the ATN. You can call this anytime but ANTLR only 202*37748cd8SNickeau * generates code to check before subrules/loops and each iteration. 203*37748cd8SNickeau * 204*37748cd8SNickeau * Implements Jim Idle's magic sync mechanism in closures and optional 205*37748cd8SNickeau * subrules. E.g., 206*37748cd8SNickeau * 207*37748cd8SNickeau * a : sync ( stuff sync )* ; 208*37748cd8SNickeau * sync : {consume to what can follow sync} ; 209*37748cd8SNickeau * 210*37748cd8SNickeau * At the start of a sub rule upon error, {@see sync} performs single 211*37748cd8SNickeau * token deletion, if possible. If it can't do that, it bails on the current 212*37748cd8SNickeau * rule and uses the default error recovery, which consumes until the 213*37748cd8SNickeau * resynchronization set of the current rule. 214*37748cd8SNickeau * 215*37748cd8SNickeau * If the sub rule is optional (`(...)?`, `(...)*`, or block 216*37748cd8SNickeau * with an empty alternative), then the expected set includes what follows 217*37748cd8SNickeau * the subrule. 218*37748cd8SNickeau * 219*37748cd8SNickeau * During loop iteration, it consumes until it sees a token that can start a 220*37748cd8SNickeau * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to 221*37748cd8SNickeau * stay in the loop as long as possible. 222*37748cd8SNickeau * 223*37748cd8SNickeau * ORIGINS 224*37748cd8SNickeau * 225*37748cd8SNickeau * Previous versions of ANTLR did a poor job of their recovery within loops. 226*37748cd8SNickeau * A single mismatch token or missing token would force the parser to bail 227*37748cd8SNickeau * out of the entire rules surrounding the loop. So, for rule 228*37748cd8SNickeau * 229*37748cd8SNickeau * classDef : 'class' ID '{' member* '}' 230*37748cd8SNickeau * 231*37748cd8SNickeau * input with an extra token between members would force the parser to 232*37748cd8SNickeau * consume until it found the next class definition rather than the next 233*37748cd8SNickeau * member definition of the current class. 234*37748cd8SNickeau * 235*37748cd8SNickeau * This functionality cost a little bit of effort because the parser has to 236*37748cd8SNickeau * compare token set at the start of the loop and at each iteration. If for 237*37748cd8SNickeau * some reason speed is suffering for you, you can turn off this 238*37748cd8SNickeau * functionality by simply overriding this method as a blank { }. 239*37748cd8SNickeau * 240*37748cd8SNickeau * @throws RecognitionException 241*37748cd8SNickeau */ 242*37748cd8SNickeau public function sync(Parser $recognizer) : void 243*37748cd8SNickeau { 244*37748cd8SNickeau $interpreter = $recognizer->getInterpreter(); 245*37748cd8SNickeau 246*37748cd8SNickeau if ($interpreter === null) { 247*37748cd8SNickeau throw new \RuntimeException('Unexpected null interpreter.'); 248*37748cd8SNickeau } 249*37748cd8SNickeau 250*37748cd8SNickeau /** @var ATNState $s */ 251*37748cd8SNickeau $s = $interpreter->atn->states[$recognizer->getState()]; 252*37748cd8SNickeau 253*37748cd8SNickeau // If already recovering, don't try to sync 254*37748cd8SNickeau if ($this->inErrorRecoveryMode($recognizer)) { 255*37748cd8SNickeau return; 256*37748cd8SNickeau } 257*37748cd8SNickeau 258*37748cd8SNickeau $tokens = $recognizer->getInputStream(); 259*37748cd8SNickeau 260*37748cd8SNickeau if ($tokens === null) { 261*37748cd8SNickeau throw new \RuntimeException('Unexpected null input stream.'); 262*37748cd8SNickeau } 263*37748cd8SNickeau 264*37748cd8SNickeau $la = $tokens->LA(1); 265*37748cd8SNickeau 266*37748cd8SNickeau // try cheaper subset first; might get lucky. seems to shave a wee bit off 267*37748cd8SNickeau $nextTokens = $recognizer->getATN()->nextTokens($s); 268*37748cd8SNickeau 269*37748cd8SNickeau if ($nextTokens->contains($la)) { 270*37748cd8SNickeau // We are sure the token matches 271*37748cd8SNickeau $this->nextTokensContext = null; 272*37748cd8SNickeau $this->nextTokensState = ATNState::INVALID_STATE_NUMBER; 273*37748cd8SNickeau 274*37748cd8SNickeau return; 275*37748cd8SNickeau } 276*37748cd8SNickeau 277*37748cd8SNickeau if ($nextTokens->contains(Token::EPSILON)) { 278*37748cd8SNickeau if ($this->nextTokensContext === null) { 279*37748cd8SNickeau // It's possible the next token won't match; information tracked 280*37748cd8SNickeau // by sync is restricted for performance. 281*37748cd8SNickeau $this->nextTokensContext = $recognizer->getContext(); 282*37748cd8SNickeau $this->nextTokensState = $recognizer->getState(); 283*37748cd8SNickeau } 284*37748cd8SNickeau return; 285*37748cd8SNickeau } 286*37748cd8SNickeau 287*37748cd8SNickeau switch ($s->getStateType()) { 288*37748cd8SNickeau case ATNState::BLOCK_START: 289*37748cd8SNickeau case ATNState::STAR_BLOCK_START: 290*37748cd8SNickeau case ATNState::PLUS_BLOCK_START: 291*37748cd8SNickeau case ATNState::STAR_LOOP_ENTRY: 292*37748cd8SNickeau // report error and recover if possible 293*37748cd8SNickeau if ($this->singleTokenDeletion($recognizer) !== null) { 294*37748cd8SNickeau return; 295*37748cd8SNickeau } 296*37748cd8SNickeau 297*37748cd8SNickeau throw new InputMismatchException($recognizer); 298*37748cd8SNickeau 299*37748cd8SNickeau case ATNState::PLUS_LOOP_BACK: 300*37748cd8SNickeau case ATNState::STAR_LOOP_BACK: 301*37748cd8SNickeau $this->reportUnwantedToken($recognizer); 302*37748cd8SNickeau $expecting = $recognizer->getExpectedTokens(); 303*37748cd8SNickeau $whatFollowsLoopIterationOrRule = $expecting->orSet($this->getErrorRecoverySet($recognizer)); 304*37748cd8SNickeau $this->consumeUntil($recognizer, $whatFollowsLoopIterationOrRule); 305*37748cd8SNickeau break; 306*37748cd8SNickeau 307*37748cd8SNickeau default: 308*37748cd8SNickeau // do nothing if we can't identify the exact kind of ATN state 309*37748cd8SNickeau break; 310*37748cd8SNickeau } 311*37748cd8SNickeau } 312*37748cd8SNickeau 313*37748cd8SNickeau /** 314*37748cd8SNickeau * This is called by {@see DefaultErrorStrategy::reportError()} when 315*37748cd8SNickeau * the exception is a {@see NoViableAltException}. 316*37748cd8SNickeau * 317*37748cd8SNickeau * @param Parser $recognizer The parser instance. 318*37748cd8SNickeau * @param NoViableAltException $e The recognition exception. 319*37748cd8SNickeau * 320*37748cd8SNickeau * @see DefaultErrorStrategy::reportError() 321*37748cd8SNickeau */ 322*37748cd8SNickeau protected function reportNoViableAlternative(Parser $recognizer, NoViableAltException $e) : void 323*37748cd8SNickeau { 324*37748cd8SNickeau $tokens = $recognizer->getTokenStream(); 325*37748cd8SNickeau 326*37748cd8SNickeau $input = '<unknown input>'; 327*37748cd8SNickeau 328*37748cd8SNickeau if ($tokens !== null) { 329*37748cd8SNickeau $startToken = $e->getStartToken(); 330*37748cd8SNickeau 331*37748cd8SNickeau if ($startToken === null) { 332*37748cd8SNickeau throw new \RuntimeException('Unexpected null start token.'); 333*37748cd8SNickeau } 334*37748cd8SNickeau 335*37748cd8SNickeau if ($startToken->getType() === Token::EOF) { 336*37748cd8SNickeau $input = '<EOF>'; 337*37748cd8SNickeau } else { 338*37748cd8SNickeau $input = $tokens->getTextByTokens($e->getStartToken(), $e->getOffendingToken()); 339*37748cd8SNickeau } 340*37748cd8SNickeau } 341*37748cd8SNickeau 342*37748cd8SNickeau $msg = \sprintf('no viable alternative at input %s', $this->escapeWSAndQuote($input)); 343*37748cd8SNickeau 344*37748cd8SNickeau $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); 345*37748cd8SNickeau } 346*37748cd8SNickeau 347*37748cd8SNickeau /** 348*37748cd8SNickeau * This is called by {@see DefaultErrorStrategy::reportError()} when 349*37748cd8SNickeau * the exception is an {@see InputMismatchException}. 350*37748cd8SNickeau * 351*37748cd8SNickeau * @param Parser $recognizer The parser instance. 352*37748cd8SNickeau * @param InputMismatchException $e The recognition exception. 353*37748cd8SNickeau * 354*37748cd8SNickeau * @see DefaultErrorStrategy::reportError() 355*37748cd8SNickeau */ 356*37748cd8SNickeau protected function reportInputMismatch(Parser $recognizer, InputMismatchException $e) : void 357*37748cd8SNickeau { 358*37748cd8SNickeau $expectedTokens = $e->getExpectedTokens(); 359*37748cd8SNickeau 360*37748cd8SNickeau if ($expectedTokens === null) { 361*37748cd8SNickeau throw new \RuntimeException('Unexpected null expected tokens.'); 362*37748cd8SNickeau } 363*37748cd8SNickeau 364*37748cd8SNickeau $msg = \sprintf( 365*37748cd8SNickeau 'mismatched input %s expecting %s', 366*37748cd8SNickeau $this->getTokenErrorDisplay($e->getOffendingToken()), 367*37748cd8SNickeau $expectedTokens->toStringVocabulary($recognizer->getVocabulary()) 368*37748cd8SNickeau ); 369*37748cd8SNickeau 370*37748cd8SNickeau $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); 371*37748cd8SNickeau } 372*37748cd8SNickeau 373*37748cd8SNickeau /** 374*37748cd8SNickeau * This is called by {@see DefaultErrorStrategy::reportError()} when 375*37748cd8SNickeau * the exception is a {@see FailedPredicateException}. 376*37748cd8SNickeau * 377*37748cd8SNickeau * @param Parser $recognizer The parser instance. 378*37748cd8SNickeau * @param FailedPredicateException $e The recognition exception. 379*37748cd8SNickeau * 380*37748cd8SNickeau * @see DefaultErrorStrategy::reportError() 381*37748cd8SNickeau */ 382*37748cd8SNickeau protected function reportFailedPredicate(Parser $recognizer, FailedPredicateException $e) : void 383*37748cd8SNickeau { 384*37748cd8SNickeau $msg = \sprintf('rule %s %s', $recognizer->getCurrentRuleName(), $e->getMessage()); 385*37748cd8SNickeau 386*37748cd8SNickeau $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); 387*37748cd8SNickeau } 388*37748cd8SNickeau 389*37748cd8SNickeau /** 390*37748cd8SNickeau * This method is called to report a syntax error which requires the removal 391*37748cd8SNickeau * of a token from the input stream. At the time this method is called, the 392*37748cd8SNickeau * erroneous symbol is current `LT(1)` symbol and has not yet been 393*37748cd8SNickeau * removed from the input stream. When this method returns, 394*37748cd8SNickeau * `$recognizer` is in error recovery mode. 395*37748cd8SNickeau * 396*37748cd8SNickeau * This method is called when {@see DefaultErrorStrategy::singleTokenDeletion()} 397*37748cd8SNickeau * identifies single-token deletion as a viable recovery strategy for 398*37748cd8SNickeau * a mismatched input error. 399*37748cd8SNickeau * 400*37748cd8SNickeau * The default implementation simply returns if the handler is already in 401*37748cd8SNickeau * error recovery mode. Otherwise, it calls 402*37748cd8SNickeau * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error 403*37748cd8SNickeau * recovery mode, followed by calling {@see Parser::notifyErrorListeners}. 404*37748cd8SNickeau * 405*37748cd8SNickeau * @param Parser $recognizer The parser instance. 406*37748cd8SNickeau */ 407*37748cd8SNickeau protected function reportUnwantedToken(Parser $recognizer) : void 408*37748cd8SNickeau { 409*37748cd8SNickeau if ($this->inErrorRecoveryMode($recognizer)) { 410*37748cd8SNickeau return; 411*37748cd8SNickeau } 412*37748cd8SNickeau 413*37748cd8SNickeau $this->beginErrorCondition($recognizer); 414*37748cd8SNickeau 415*37748cd8SNickeau $t = $recognizer->getCurrentToken(); 416*37748cd8SNickeau $tokenName = $this->getTokenErrorDisplay($t); 417*37748cd8SNickeau $expecting = $this->getExpectedTokens($recognizer); 418*37748cd8SNickeau 419*37748cd8SNickeau $msg = \sprintf( 420*37748cd8SNickeau 'extraneous input %s expecting %s', 421*37748cd8SNickeau $tokenName, 422*37748cd8SNickeau $expecting->toStringVocabulary($recognizer->getVocabulary()) 423*37748cd8SNickeau ); 424*37748cd8SNickeau 425*37748cd8SNickeau $recognizer->notifyErrorListeners($msg, $t); 426*37748cd8SNickeau } 427*37748cd8SNickeau 428*37748cd8SNickeau /** 429*37748cd8SNickeau * This method is called to report a syntax error which requires the 430*37748cd8SNickeau * insertion of a missing token into the input stream. At the time this 431*37748cd8SNickeau * method is called, the missing token has not yet been inserted. When this 432*37748cd8SNickeau * method returns, `$recognizer` is in error recovery mode. 433*37748cd8SNickeau * 434*37748cd8SNickeau * This method is called when {@see DefaultErrorStrategy::singleTokenInsertion()} 435*37748cd8SNickeau * identifies single-token insertion as a viable recovery strategy for 436*37748cd8SNickeau * a mismatched input error. 437*37748cd8SNickeau * 438*37748cd8SNickeau * The default implementation simply returns if the handler is already in 439*37748cd8SNickeau * error recovery mode. Otherwise, it calls 440*37748cd8SNickeau * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error 441*37748cd8SNickeau * recovery mode, followed by calling {@see Parser::notifyErrorListeners()}. 442*37748cd8SNickeau * 443*37748cd8SNickeau * @param Parser $recognizer the parser instance 444*37748cd8SNickeau */ 445*37748cd8SNickeau protected function reportMissingToken(Parser $recognizer) : void 446*37748cd8SNickeau { 447*37748cd8SNickeau if ($this->inErrorRecoveryMode($recognizer)) { 448*37748cd8SNickeau return; 449*37748cd8SNickeau } 450*37748cd8SNickeau 451*37748cd8SNickeau $this->beginErrorCondition($recognizer); 452*37748cd8SNickeau 453*37748cd8SNickeau $t = $recognizer->getCurrentToken(); 454*37748cd8SNickeau $expecting = $this->getExpectedTokens($recognizer); 455*37748cd8SNickeau 456*37748cd8SNickeau $msg = \sprintf( 457*37748cd8SNickeau 'missing %s at %s', 458*37748cd8SNickeau $expecting->toStringVocabulary($recognizer->getVocabulary()), 459*37748cd8SNickeau $this->getTokenErrorDisplay($t) 460*37748cd8SNickeau ); 461*37748cd8SNickeau 462*37748cd8SNickeau $recognizer->notifyErrorListeners($msg, $t); 463*37748cd8SNickeau } 464*37748cd8SNickeau 465*37748cd8SNickeau /** 466*37748cd8SNickeau * {@inheritdoc} 467*37748cd8SNickeau * 468*37748cd8SNickeau * The default implementation attempts to recover from the mismatched input 469*37748cd8SNickeau * by using single token insertion and deletion as described below. If the 470*37748cd8SNickeau * recovery attempt fails, this method throws an 471*37748cd8SNickeau * {@see InputMismatchException}. 472*37748cd8SNickeau * 473*37748cd8SNickeau * EXTRA TOKEN (single token deletion) 474*37748cd8SNickeau * 475*37748cd8SNickeau * `LA(1)` is not what we are looking for. If `LA(2)` has the 476*37748cd8SNickeau * right token, however, then assume `LA(1)` is some extra spurious 477*37748cd8SNickeau * token and delete it. Then consume and return the next token (which was 478*37748cd8SNickeau * the `LA(2)` token) as the successful result of the match operation. 479*37748cd8SNickeau * 480*37748cd8SNickeau * This recovery strategy is implemented by 481*37748cd8SNickeau * {@see DefaultErrorStrategy::singleTokenDeletion()}. 482*37748cd8SNickeau * 483*37748cd8SNickeau * MISSING TOKEN (single token insertion) 484*37748cd8SNickeau * 485*37748cd8SNickeau * If current token (at `LA(1)`) is consistent with what could come 486*37748cd8SNickeau * after the expected `LA(1)` token, then assume the token is missing 487*37748cd8SNickeau * and use the parser's {@see TokenFactory} to create it on the fly. The 488*37748cd8SNickeau * "insertion" is performed by returning the created token as the successful 489*37748cd8SNickeau * result of the match operation. 490*37748cd8SNickeau * 491*37748cd8SNickeau * This recovery strategy is implemented by 492*37748cd8SNickeau * {@see DefaultErrorStrategy::singleTokenInsertion()}. 493*37748cd8SNickeau * 494*37748cd8SNickeau * EXAMPLE 495*37748cd8SNickeau * 496*37748cd8SNickeau * For example, Input `i=(3;` is clearly missing the `')'`. When 497*37748cd8SNickeau * the parser returns from the nested call to `expr`, it will have 498*37748cd8SNickeau * call chain: 499*37748cd8SNickeau * 500*37748cd8SNickeau * stat → expr → atom 501*37748cd8SNickeau * 502*37748cd8SNickeau * and it will be trying to match the `')'` at this point in the 503*37748cd8SNickeau * derivation: 504*37748cd8SNickeau * 505*37748cd8SNickeau * => ID '=' '(' INT ')' ('+' atom)* ';' 506*37748cd8SNickeau * ^ 507*37748cd8SNickeau * 508*37748cd8SNickeau * The attempt to match `')'` will fail when it sees `';'` and call 509*37748cd8SNickeau * {@see DefaultErrorStrategy::recoverInline()}. To recover, it sees that 510*37748cd8SNickeau * `LA(1)==';'` is in the set of tokens that can follow the `')'` token 511*37748cd8SNickeau * reference in rule `atom`. It can assume that you forgot the `')'`. 512*37748cd8SNickeau * 513*37748cd8SNickeau * @throws RecognitionException 514*37748cd8SNickeau */ 515*37748cd8SNickeau public function recoverInline(Parser $recognizer) : Token 516*37748cd8SNickeau { 517*37748cd8SNickeau // SINGLE TOKEN DELETION 518*37748cd8SNickeau $matchedSymbol = $this->singleTokenDeletion($recognizer); 519*37748cd8SNickeau 520*37748cd8SNickeau if ($matchedSymbol !== null) { 521*37748cd8SNickeau // we have deleted the extra token. 522*37748cd8SNickeau // now, move past ttype token as if all were ok 523*37748cd8SNickeau $recognizer->consume(); 524*37748cd8SNickeau 525*37748cd8SNickeau return $matchedSymbol; 526*37748cd8SNickeau } 527*37748cd8SNickeau 528*37748cd8SNickeau // SINGLE TOKEN INSERTION 529*37748cd8SNickeau if ($this->singleTokenInsertion($recognizer)) { 530*37748cd8SNickeau return $this->getMissingSymbol($recognizer); 531*37748cd8SNickeau } 532*37748cd8SNickeau 533*37748cd8SNickeau // even that didn't work; must throw the exception 534*37748cd8SNickeau if ($this->nextTokensContext === null) { 535*37748cd8SNickeau throw new InputMismatchException($recognizer); 536*37748cd8SNickeau } 537*37748cd8SNickeau 538*37748cd8SNickeau throw new InputMismatchException($recognizer, $this->nextTokensState, $this->nextTokensContext); 539*37748cd8SNickeau } 540*37748cd8SNickeau 541*37748cd8SNickeau /** 542*37748cd8SNickeau * This method implements the single-token insertion inline error recovery 543*37748cd8SNickeau * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()} 544*37748cd8SNickeau * if the single-token deletion strategy fails to recover from the mismatched 545*37748cd8SNickeau * input. If this method returns `true`, `$recognizer` will be in error 546*37748cd8SNickeau * recovery mode. 547*37748cd8SNickeau * 548*37748cd8SNickeau * This method determines whether or not single-token insertion is viable by 549*37748cd8SNickeau * checking if the `LA(1)` input symbol could be successfully matched 550*37748cd8SNickeau * if it were instead the `LA(2)` symbol. If this method returns 551*37748cd8SNickeau * `true`, the caller is responsible for creating and inserting a 552*37748cd8SNickeau * token with the correct type to produce this behavior. 553*37748cd8SNickeau * 554*37748cd8SNickeau * @param Parser $recognizer The parser instance. 555*37748cd8SNickeau * 556*37748cd8SNickeau * @return bool `true` If single-token insertion is a viable recovery 557*37748cd8SNickeau * strategy for the current mismatched input, otherwise `false`. 558*37748cd8SNickeau */ 559*37748cd8SNickeau protected function singleTokenInsertion(Parser $recognizer) : bool 560*37748cd8SNickeau { 561*37748cd8SNickeau $stream = $recognizer->getInputStream(); 562*37748cd8SNickeau 563*37748cd8SNickeau if ($stream === null) { 564*37748cd8SNickeau throw new \RuntimeException('Unexpected null input stream.'); 565*37748cd8SNickeau } 566*37748cd8SNickeau 567*37748cd8SNickeau $interpreter = $recognizer->getInterpreter(); 568*37748cd8SNickeau 569*37748cd8SNickeau if ($interpreter === null) { 570*37748cd8SNickeau throw new \RuntimeException('Unexpected null interpreter.'); 571*37748cd8SNickeau } 572*37748cd8SNickeau 573*37748cd8SNickeau $currentSymbolType = $stream->LA(1); 574*37748cd8SNickeau 575*37748cd8SNickeau // if current token is consistent with what could come after current 576*37748cd8SNickeau // ATN state, then we know we're missing a token; error recovery 577*37748cd8SNickeau // is free to conjure up and insert the missing token 578*37748cd8SNickeau 579*37748cd8SNickeau $atn = $interpreter->atn; 580*37748cd8SNickeau /** @var ATNState $currentState */ 581*37748cd8SNickeau $currentState = $atn->states[$recognizer->getState()]; 582*37748cd8SNickeau $next = $currentState->getTransition(0)->target; 583*37748cd8SNickeau $expectingAtLL2 = $atn->nextTokensInContext($next, $recognizer->getContext()); 584*37748cd8SNickeau 585*37748cd8SNickeau if ($expectingAtLL2->contains($currentSymbolType)) { 586*37748cd8SNickeau $this->reportMissingToken($recognizer); 587*37748cd8SNickeau 588*37748cd8SNickeau return true; 589*37748cd8SNickeau } 590*37748cd8SNickeau 591*37748cd8SNickeau return false; 592*37748cd8SNickeau } 593*37748cd8SNickeau 594*37748cd8SNickeau /** 595*37748cd8SNickeau * This method implements the single-token deletion inline error recovery 596*37748cd8SNickeau * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()} 597*37748cd8SNickeau * to attempt to recover from mismatched input. If this method returns null, 598*37748cd8SNickeau * the parser and error handler state will not have changed. If this method 599*37748cd8SNickeau * returns non-null, `$recognizer` will _not_ be in error recovery mode 600*37748cd8SNickeau * since the returned token was a successful match. 601*37748cd8SNickeau * 602*37748cd8SNickeau * If the single-token deletion is successful, this method calls 603*37748cd8SNickeau * {@see DefaultErrorStrategy::reportUnwantedToken()} to report the error, 604*37748cd8SNickeau * followed by {@see Parser::consume()} to actually "delete" the extraneous 605*37748cd8SNickeau * token. Then, before returning {@see DefaultErrorStrategy::reportMatch()} 606*37748cd8SNickeau * is called to signal a successful match. 607*37748cd8SNickeau * 608*37748cd8SNickeau * @param Parser $recognizer The parser instance. 609*37748cd8SNickeau * 610*37748cd8SNickeau * @return Token The successfully matched {@see Token} instance if 611*37748cd8SNickeau * single-token deletion successfully recovers from 612*37748cd8SNickeau * the mismatched input, otherwise `null`. 613*37748cd8SNickeau */ 614*37748cd8SNickeau protected function singleTokenDeletion(Parser $recognizer) : ?Token 615*37748cd8SNickeau { 616*37748cd8SNickeau $inputStream = $recognizer->getInputStream(); 617*37748cd8SNickeau 618*37748cd8SNickeau if ($inputStream === null) { 619*37748cd8SNickeau throw new \RuntimeException('Unexpected null input stream.'); 620*37748cd8SNickeau } 621*37748cd8SNickeau 622*37748cd8SNickeau $nextTokenType = $inputStream->LA(2); 623*37748cd8SNickeau $expecting = $this->getExpectedTokens($recognizer); 624*37748cd8SNickeau 625*37748cd8SNickeau if ($expecting->contains($nextTokenType)) { 626*37748cd8SNickeau $this->reportUnwantedToken($recognizer); 627*37748cd8SNickeau $recognizer->consume(); // simply delete extra token 628*37748cd8SNickeau // we want to return the token we're actually matching 629*37748cd8SNickeau $matchedSymbol = $recognizer->getCurrentToken(); 630*37748cd8SNickeau $this->reportMatch($recognizer); // we know current token is correct 631*37748cd8SNickeau 632*37748cd8SNickeau return $matchedSymbol; 633*37748cd8SNickeau } 634*37748cd8SNickeau 635*37748cd8SNickeau return null; 636*37748cd8SNickeau } 637*37748cd8SNickeau 638*37748cd8SNickeau /** Conjure up a missing token during error recovery. 639*37748cd8SNickeau * 640*37748cd8SNickeau * The recognizer attempts to recover from single missing 641*37748cd8SNickeau * symbols. But, actions might refer to that missing symbol. 642*37748cd8SNickeau * For example, x=ID {f($x);}. The action clearly assumes 643*37748cd8SNickeau * that there has been an identifier matched previously and that 644*37748cd8SNickeau * $x points at that token. If that token is missing, but 645*37748cd8SNickeau * the next token in the stream is what we want we assume that 646*37748cd8SNickeau * this token is missing and we keep going. Because we 647*37748cd8SNickeau * have to return some token to replace the missing token, 648*37748cd8SNickeau * we have to conjure one up. This method gives the user control 649*37748cd8SNickeau * over the tokens returned for missing tokens. Mostly, 650*37748cd8SNickeau * you will want to create something special for identifier 651*37748cd8SNickeau * tokens. For literals such as '{' and ',', the default 652*37748cd8SNickeau * action in the parser or tree parser works. It simply creates 653*37748cd8SNickeau * a CommonToken of the appropriate type. The text will be the token. 654*37748cd8SNickeau * If you change what tokens must be created by the lexer, 655*37748cd8SNickeau * override this method to create the appropriate tokens. 656*37748cd8SNickeau */ 657*37748cd8SNickeau protected function getMissingSymbol(Parser $recognizer) : Token 658*37748cd8SNickeau { 659*37748cd8SNickeau $currentSymbol = $recognizer->getCurrentToken(); 660*37748cd8SNickeau 661*37748cd8SNickeau if ($currentSymbol === null) { 662*37748cd8SNickeau throw new \RuntimeException('Unexpected null current token.'); 663*37748cd8SNickeau } 664*37748cd8SNickeau 665*37748cd8SNickeau $inputStream = $recognizer->getInputStream(); 666*37748cd8SNickeau 667*37748cd8SNickeau if ($inputStream === null) { 668*37748cd8SNickeau throw new \RuntimeException('Unexpected null input stream.'); 669*37748cd8SNickeau } 670*37748cd8SNickeau 671*37748cd8SNickeau $tokenSource = $currentSymbol->getTokenSource(); 672*37748cd8SNickeau 673*37748cd8SNickeau if ($tokenSource === null) { 674*37748cd8SNickeau throw new \RuntimeException('Unexpected null token source.'); 675*37748cd8SNickeau } 676*37748cd8SNickeau 677*37748cd8SNickeau $expecting = $this->getExpectedTokens($recognizer); 678*37748cd8SNickeau 679*37748cd8SNickeau $expectedTokenType = Token::INVALID_TYPE; 680*37748cd8SNickeau 681*37748cd8SNickeau if (!$expecting->isNull()) { 682*37748cd8SNickeau $expectedTokenType = $expecting->getMinElement(); // get any element 683*37748cd8SNickeau } 684*37748cd8SNickeau 685*37748cd8SNickeau if ($expectedTokenType === Token::EOF) { 686*37748cd8SNickeau $tokenText = '<missing EOF>'; 687*37748cd8SNickeau } else { 688*37748cd8SNickeau $tokenText = \sprintf('<missing %s>', $recognizer->getVocabulary()->getDisplayName($expectedTokenType)); 689*37748cd8SNickeau } 690*37748cd8SNickeau 691*37748cd8SNickeau $current = $currentSymbol; 692*37748cd8SNickeau $lookback = $inputStream->LT(-1); 693*37748cd8SNickeau 694*37748cd8SNickeau if ($current->getType() === Token::EOF && $lookback !== null) { 695*37748cd8SNickeau $current = $lookback; 696*37748cd8SNickeau } 697*37748cd8SNickeau 698*37748cd8SNickeau return $recognizer->getTokenFactory()->createEx( 699*37748cd8SNickeau new Pair( 700*37748cd8SNickeau $tokenSource, 701*37748cd8SNickeau $tokenSource->getInputStream() 702*37748cd8SNickeau ), 703*37748cd8SNickeau $expectedTokenType, 704*37748cd8SNickeau $tokenText, 705*37748cd8SNickeau Token::DEFAULT_CHANNEL, 706*37748cd8SNickeau -1, 707*37748cd8SNickeau -1, 708*37748cd8SNickeau $current->getLine(), 709*37748cd8SNickeau $current->getCharPositionInLine() 710*37748cd8SNickeau ); 711*37748cd8SNickeau } 712*37748cd8SNickeau 713*37748cd8SNickeau protected function getExpectedTokens(Parser $recognizer) : IntervalSet 714*37748cd8SNickeau { 715*37748cd8SNickeau return $recognizer->getExpectedTokens(); 716*37748cd8SNickeau } 717*37748cd8SNickeau 718*37748cd8SNickeau /** 719*37748cd8SNickeau * How should a token be displayed in an error message? The default 720*37748cd8SNickeau * is to display just the text, but during development you might 721*37748cd8SNickeau * want to have a lot of information spit out. Override in that case 722*37748cd8SNickeau * to use (string) (which, for CommonToken, dumps everything about 723*37748cd8SNickeau * the token). This is better than forcing you to override a method in 724*37748cd8SNickeau * your token objects because you don't have to go modify your lexer 725*37748cd8SNickeau * so that it creates a new Java type. 726*37748cd8SNickeau */ 727*37748cd8SNickeau protected function getTokenErrorDisplay(?Token $t) : string 728*37748cd8SNickeau { 729*37748cd8SNickeau if ($t === null) { 730*37748cd8SNickeau return '<no token>'; 731*37748cd8SNickeau } 732*37748cd8SNickeau 733*37748cd8SNickeau $s = $this->getSymbolText($t); 734*37748cd8SNickeau 735*37748cd8SNickeau if ($s === null) { 736*37748cd8SNickeau if ($this->getSymbolType($t) === Token::EOF) { 737*37748cd8SNickeau $s = '<EOF>'; 738*37748cd8SNickeau } else { 739*37748cd8SNickeau $s = '<' . $this->getSymbolType($t) . '>'; 740*37748cd8SNickeau } 741*37748cd8SNickeau } 742*37748cd8SNickeau 743*37748cd8SNickeau return $this->escapeWSAndQuote($s); 744*37748cd8SNickeau } 745*37748cd8SNickeau 746*37748cd8SNickeau protected function getSymbolText(Token $symbol) : ?string 747*37748cd8SNickeau { 748*37748cd8SNickeau return $symbol->getText(); 749*37748cd8SNickeau } 750*37748cd8SNickeau 751*37748cd8SNickeau protected function getSymbolType(Token $symbol) : int 752*37748cd8SNickeau { 753*37748cd8SNickeau return $symbol->getType(); 754*37748cd8SNickeau } 755*37748cd8SNickeau 756*37748cd8SNickeau protected function escapeWSAndQuote(string $s) : string 757*37748cd8SNickeau { 758*37748cd8SNickeau return "'" . StringUtils::escapeWhitespace($s) . "'"; 759*37748cd8SNickeau } 760*37748cd8SNickeau 761*37748cd8SNickeau /** 762*37748cd8SNickeau * Compute the error recovery set for the current rule. During 763*37748cd8SNickeau * rule invocation, the parser pushes the set of tokens that can 764*37748cd8SNickeau * follow that rule reference on the stack; this amounts to 765*37748cd8SNickeau * computing FIRST of what follows the rule reference in the 766*37748cd8SNickeau * enclosing rule. See LinearApproximator::FIRST. 767*37748cd8SNickeau * This local follow set only includes tokens 768*37748cd8SNickeau * from within the rule; i.e., the FIRST computation done by 769*37748cd8SNickeau * ANTLR stops at the end of a rule. 770*37748cd8SNickeau * 771*37748cd8SNickeau * EXAMPLE 772*37748cd8SNickeau * 773*37748cd8SNickeau * When you find a "no viable alt exception", the input is not 774*37748cd8SNickeau * consistent with any of the alternatives for rule r. The best 775*37748cd8SNickeau * thing to do is to consume tokens until you see something that 776*37748cd8SNickeau * can legally follow a call to r *or* any rule that called r. 777*37748cd8SNickeau * You don't want the exact set of viable next tokens because the 778*37748cd8SNickeau * input might just be missing a token--you might consume the 779*37748cd8SNickeau * rest of the input looking for one of the missing tokens. 780*37748cd8SNickeau * 781*37748cd8SNickeau * Consider grammar: 782*37748cd8SNickeau * 783*37748cd8SNickeau * a : '[' b ']' 784*37748cd8SNickeau * | '(' b ')' 785*37748cd8SNickeau * ; 786*37748cd8SNickeau * b : c '^' INT ; 787*37748cd8SNickeau * c : ID 788*37748cd8SNickeau * | INT 789*37748cd8SNickeau * ; 790*37748cd8SNickeau * 791*37748cd8SNickeau * At each rule invocation, the set of tokens that could follow 792*37748cd8SNickeau * that rule is pushed on a stack. Here are the various 793*37748cd8SNickeau * context-sensitive follow sets: 794*37748cd8SNickeau * 795*37748cd8SNickeau * FOLLOW(b1_in_a) = FIRST(']') = ']' 796*37748cd8SNickeau * FOLLOW(b2_in_a) = FIRST(')') = ')' 797*37748cd8SNickeau * FOLLOW(c_in_b) = FIRST('^') = '^' 798*37748cd8SNickeau * 799*37748cd8SNickeau * Upon erroneous input "[]", the call chain is 800*37748cd8SNickeau * 801*37748cd8SNickeau * a -> b -> c 802*37748cd8SNickeau * 803*37748cd8SNickeau * and, hence, the follow context stack is: 804*37748cd8SNickeau * 805*37748cd8SNickeau * depth | follow set | start of rule execution 806*37748cd8SNickeau * ------|------------|------------------------- 807*37748cd8SNickeau * 0 | <EOF> | a (from main()) 808*37748cd8SNickeau * 1 | ']' | b 809*37748cd8SNickeau * 2 | '^' | c 810*37748cd8SNickeau * 811*37748cd8SNickeau * Notice that ')' is not included, because b would have to have 812*37748cd8SNickeau * been called from a different context in rule a for ')' to be 813*37748cd8SNickeau * included. 814*37748cd8SNickeau * 815*37748cd8SNickeau * For error recovery, we cannot consider FOLLOW(c) 816*37748cd8SNickeau * (context-sensitive or otherwise). We need the combined set of 817*37748cd8SNickeau * all context-sensitive FOLLOW sets--the set of all tokens that 818*37748cd8SNickeau * could follow any reference in the call chain. We need to 819*37748cd8SNickeau * resync to one of those tokens. Note that FOLLOW(c)='^' and if 820*37748cd8SNickeau * we resync'd to that token, we'd consume until EOF. We need to 821*37748cd8SNickeau * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 822*37748cd8SNickeau * In this case, for input "[]", LA(1) is ']' and in the set, so we would 823*37748cd8SNickeau * not consume anything. After printing an error, rule c would 824*37748cd8SNickeau * return normally. Rule b would not find the required '^' though. 825*37748cd8SNickeau * At this point, it gets a mismatched token error and throws an 826*37748cd8SNickeau * exception (since LA(1) is not in the viable following token 827*37748cd8SNickeau * set). The rule exception handler tries to recover, but finds 828*37748cd8SNickeau * the same recovery set and doesn't consume anything. Rule b 829*37748cd8SNickeau * exits normally returning to rule a. Now it finds the ']' (and 830*37748cd8SNickeau * with the successful match exits errorRecovery mode). 831*37748cd8SNickeau * 832*37748cd8SNickeau * So, you can see that the parser walks up the call chain looking 833*37748cd8SNickeau * for the token that was a member of the recovery set. 834*37748cd8SNickeau * 835*37748cd8SNickeau * Errors are not generated in errorRecovery mode. 836*37748cd8SNickeau * 837*37748cd8SNickeau * ANTLR's error recovery mechanism is based upon original ideas: 838*37748cd8SNickeau * 839*37748cd8SNickeau * "Algorithms + Data Structures = Programs" by Niklaus Wirth 840*37748cd8SNickeau * 841*37748cd8SNickeau * and 842*37748cd8SNickeau * 843*37748cd8SNickeau * "A note on error recovery in recursive descent parsers": 844*37748cd8SNickeau * http://portal.acm.org/citation.cfm?id=947902.947905 845*37748cd8SNickeau * 846*37748cd8SNickeau * Later, Josef Grosch had some good ideas: 847*37748cd8SNickeau * 848*37748cd8SNickeau * "Efficient and Comfortable Error Recovery in Recursive Descent 849*37748cd8SNickeau * Parsers": 850*37748cd8SNickeau * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 851*37748cd8SNickeau * 852*37748cd8SNickeau * Like Grosch I implement context-sensitive FOLLOW sets that are combined 853*37748cd8SNickeau * at run-time upon error to avoid overhead during parsing. 854*37748cd8SNickeau */ 855*37748cd8SNickeau protected function getErrorRecoverySet(Parser $recognizer) : IntervalSet 856*37748cd8SNickeau { 857*37748cd8SNickeau $interpreter = $recognizer->getInterpreter(); 858*37748cd8SNickeau 859*37748cd8SNickeau if ($interpreter === null) { 860*37748cd8SNickeau throw new \RuntimeException('Unexpected null interpreter.'); 861*37748cd8SNickeau } 862*37748cd8SNickeau 863*37748cd8SNickeau $atn = $interpreter->atn; 864*37748cd8SNickeau $ctx = $recognizer->getContext(); 865*37748cd8SNickeau $recoverSet = new IntervalSet(); 866*37748cd8SNickeau 867*37748cd8SNickeau while ($ctx !== null && $ctx->invokingState >= 0) { 868*37748cd8SNickeau // compute what follows who invoked us 869*37748cd8SNickeau /** @var ATNState $invokingState */ 870*37748cd8SNickeau $invokingState = $atn->states[$ctx->invokingState]; 871*37748cd8SNickeau /** @var RuleTransition $rt */ 872*37748cd8SNickeau $rt = $invokingState->getTransition(0); 873*37748cd8SNickeau $follow = $atn->nextTokens($rt->followState); 874*37748cd8SNickeau $recoverSet->addSet($follow); 875*37748cd8SNickeau $ctx = $ctx->getParent(); 876*37748cd8SNickeau } 877*37748cd8SNickeau 878*37748cd8SNickeau $recoverSet->removeOne(Token::EPSILON); 879*37748cd8SNickeau 880*37748cd8SNickeau return $recoverSet; 881*37748cd8SNickeau } 882*37748cd8SNickeau 883*37748cd8SNickeau /** 884*37748cd8SNickeau * Consume tokens until one matches the given token set. 885*37748cd8SNickeau */ 886*37748cd8SNickeau protected function consumeUntil(Parser $recognizer, IntervalSet $set) : void 887*37748cd8SNickeau { 888*37748cd8SNickeau $inputStream = $recognizer->getInputStream(); 889*37748cd8SNickeau 890*37748cd8SNickeau if ($inputStream === null) { 891*37748cd8SNickeau throw new \RuntimeException('Unexpected null input stream.'); 892*37748cd8SNickeau } 893*37748cd8SNickeau 894*37748cd8SNickeau $ttype = $inputStream->LA(1); 895*37748cd8SNickeau 896*37748cd8SNickeau while ($ttype !== Token::EOF && !$set->contains($ttype)) { 897*37748cd8SNickeau $recognizer->consume(); 898*37748cd8SNickeau $ttype = $inputStream->LA(1); 899*37748cd8SNickeau } 900*37748cd8SNickeau } 901*37748cd8SNickeau} 902