endErrorCondition($recognizer); } /** * This method is called to enter error recovery mode when a recognition * exception is reported. * * @param Parser $recognizer The parser instance. */ protected function beginErrorCondition(Parser $recognizer) : void { $this->errorRecoveryMode = true; } public function inErrorRecoveryMode(Parser $recognizer) : bool { return $this->errorRecoveryMode; } /** * This method is called to leave error recovery mode after recovering from * a recognition exception. */ protected function endErrorCondition(Parser $recognizer) : void { $this->errorRecoveryMode = false; $this->lastErrorStates = null; $this->lastErrorIndex = -1; } /** * {@inheritdoc} * * The default implementation simply calls * {@see DefaultErrorStrategy::endErrorCondition()}. */ public function reportMatch(Parser $recognizer) : void { $this->endErrorCondition($recognizer); } /** * {@inheritdoc} * * The default implementation returns immediately if the handler is already * in error recovery mode. Otherwise, it calls * {@see DefaultErrorStrategy::beginErrorCondition()} and dispatches * the reporting task based on the runtime type of `e` according to * the following table. * * - {@see NoViableAltException}: Dispatches the call to * {@see reportNoViableAlternative} * - {@see InputMismatchException}: Dispatches the call to * {@see reportInputMismatch} * - {@see FailedPredicateException}: Dispatches the call to * {@see reportFailedPredicate} * - All other types: calls {@see Parser#notifyErrorListeners} to report * the exception */ public function reportError(Parser $recognizer, RecognitionException $e) : void { // if we've already reported an error and have not matched a token // yet successfully, don't report any errors. if ($this->inErrorRecoveryMode($recognizer)) { // don't report spurious errors return; } $this->beginErrorCondition($recognizer); if ($e instanceof NoViableAltException) { $this->reportNoViableAlternative($recognizer, $e); } elseif ($e instanceof InputMismatchException) { $this->reportInputMismatch($recognizer, $e); } elseif ($e instanceof FailedPredicateException) { $this->reportFailedPredicate($recognizer, $e); } else { $recognizer->notifyErrorListeners($e->getMessage(), $e->getOffendingToken(), $e); } } /** * {@inheritdoc} * * The default implementation resynchronizes the parser by consuming tokens * until we find one in the resynchronization set--loosely the set of tokens * that can follow the current rule. */ public function recover(Parser $recognizer, RecognitionException $e) : void { $inputStream = $recognizer->getInputStream(); if ($inputStream === null) { throw new \RuntimeException('Unexpected null input stream.'); } if ($this->lastErrorStates !== null && $this->lastErrorIndex === $inputStream->getIndex() && $this->lastErrorStates->contains($recognizer->getState()) ) { // uh oh, another error at same token index and previously-visited // state in ATN; must be a case where LT(1) is in the recovery // token set so nothing got consumed. Consume a single token // at least to prevent an infinite loop; this is a failsafe. $recognizer->consume(); } $this->lastErrorIndex = $inputStream->getIndex(); if ($this->lastErrorStates === null) { $this->lastErrorStates = new IntervalSet(); } $this->lastErrorStates->addOne($recognizer->getState()); $followSet = $this->getErrorRecoverySet($recognizer); $this->consumeUntil($recognizer, $followSet); } /** * The default implementation of {@see ANTLRErrorStrategy::sync()} makes sure * that the current lookahead symbol is consistent with what were expecting * at this point in the ATN. You can call this anytime but ANTLR only * generates code to check before subrules/loops and each iteration. * * Implements Jim Idle's magic sync mechanism in closures and optional * subrules. E.g., * * a : sync ( stuff sync )* ; * sync : {consume to what can follow sync} ; * * At the start of a sub rule upon error, {@see sync} performs single * token deletion, if possible. If it can't do that, it bails on the current * rule and uses the default error recovery, which consumes until the * resynchronization set of the current rule. * * If the sub rule is optional (`(...)?`, `(...)*`, or block * with an empty alternative), then the expected set includes what follows * the subrule. * * During loop iteration, it consumes until it sees a token that can start a * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to * stay in the loop as long as possible. * * ORIGINS * * Previous versions of ANTLR did a poor job of their recovery within loops. * A single mismatch token or missing token would force the parser to bail * out of the entire rules surrounding the loop. So, for rule * * classDef : 'class' ID '{' member* '}' * * input with an extra token between members would force the parser to * consume until it found the next class definition rather than the next * member definition of the current class. * * This functionality cost a little bit of effort because the parser has to * compare token set at the start of the loop and at each iteration. If for * some reason speed is suffering for you, you can turn off this * functionality by simply overriding this method as a blank { }. * * @throws RecognitionException */ public function sync(Parser $recognizer) : void { $interpreter = $recognizer->getInterpreter(); if ($interpreter === null) { throw new \RuntimeException('Unexpected null interpreter.'); } /** @var ATNState $s */ $s = $interpreter->atn->states[$recognizer->getState()]; // If already recovering, don't try to sync if ($this->inErrorRecoveryMode($recognizer)) { return; } $tokens = $recognizer->getInputStream(); if ($tokens === null) { throw new \RuntimeException('Unexpected null input stream.'); } $la = $tokens->LA(1); // try cheaper subset first; might get lucky. seems to shave a wee bit off $nextTokens = $recognizer->getATN()->nextTokens($s); if ($nextTokens->contains($la)) { // We are sure the token matches $this->nextTokensContext = null; $this->nextTokensState = ATNState::INVALID_STATE_NUMBER; return; } if ($nextTokens->contains(Token::EPSILON)) { if ($this->nextTokensContext === null) { // It's possible the next token won't match; information tracked // by sync is restricted for performance. $this->nextTokensContext = $recognizer->getContext(); $this->nextTokensState = $recognizer->getState(); } return; } switch ($s->getStateType()) { case ATNState::BLOCK_START: case ATNState::STAR_BLOCK_START: case ATNState::PLUS_BLOCK_START: case ATNState::STAR_LOOP_ENTRY: // report error and recover if possible if ($this->singleTokenDeletion($recognizer) !== null) { return; } throw new InputMismatchException($recognizer); case ATNState::PLUS_LOOP_BACK: case ATNState::STAR_LOOP_BACK: $this->reportUnwantedToken($recognizer); $expecting = $recognizer->getExpectedTokens(); $whatFollowsLoopIterationOrRule = $expecting->orSet($this->getErrorRecoverySet($recognizer)); $this->consumeUntil($recognizer, $whatFollowsLoopIterationOrRule); break; default: // do nothing if we can't identify the exact kind of ATN state break; } } /** * This is called by {@see DefaultErrorStrategy::reportError()} when * the exception is a {@see NoViableAltException}. * * @param Parser $recognizer The parser instance. * @param NoViableAltException $e The recognition exception. * * @see DefaultErrorStrategy::reportError() */ protected function reportNoViableAlternative(Parser $recognizer, NoViableAltException $e) : void { $tokens = $recognizer->getTokenStream(); $input = ''; if ($tokens !== null) { $startToken = $e->getStartToken(); if ($startToken === null) { throw new \RuntimeException('Unexpected null start token.'); } if ($startToken->getType() === Token::EOF) { $input = ''; } else { $input = $tokens->getTextByTokens($e->getStartToken(), $e->getOffendingToken()); } } $msg = \sprintf('no viable alternative at input %s', $this->escapeWSAndQuote($input)); $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); } /** * This is called by {@see DefaultErrorStrategy::reportError()} when * the exception is an {@see InputMismatchException}. * * @param Parser $recognizer The parser instance. * @param InputMismatchException $e The recognition exception. * * @see DefaultErrorStrategy::reportError() */ protected function reportInputMismatch(Parser $recognizer, InputMismatchException $e) : void { $expectedTokens = $e->getExpectedTokens(); if ($expectedTokens === null) { throw new \RuntimeException('Unexpected null expected tokens.'); } $msg = \sprintf( 'mismatched input %s expecting %s', $this->getTokenErrorDisplay($e->getOffendingToken()), $expectedTokens->toStringVocabulary($recognizer->getVocabulary()) ); $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); } /** * This is called by {@see DefaultErrorStrategy::reportError()} when * the exception is a {@see FailedPredicateException}. * * @param Parser $recognizer The parser instance. * @param FailedPredicateException $e The recognition exception. * * @see DefaultErrorStrategy::reportError() */ protected function reportFailedPredicate(Parser $recognizer, FailedPredicateException $e) : void { $msg = \sprintf('rule %s %s', $recognizer->getCurrentRuleName(), $e->getMessage()); $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); } /** * This method is called to report a syntax error which requires the removal * of a token from the input stream. At the time this method is called, the * erroneous symbol is current `LT(1)` symbol and has not yet been * removed from the input stream. When this method returns, * `$recognizer` is in error recovery mode. * * This method is called when {@see DefaultErrorStrategy::singleTokenDeletion()} * identifies single-token deletion as a viable recovery strategy for * a mismatched input error. * * The default implementation simply returns if the handler is already in * error recovery mode. Otherwise, it calls * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error * recovery mode, followed by calling {@see Parser::notifyErrorListeners}. * * @param Parser $recognizer The parser instance. */ protected function reportUnwantedToken(Parser $recognizer) : void { if ($this->inErrorRecoveryMode($recognizer)) { return; } $this->beginErrorCondition($recognizer); $t = $recognizer->getCurrentToken(); $tokenName = $this->getTokenErrorDisplay($t); $expecting = $this->getExpectedTokens($recognizer); $msg = \sprintf( 'extraneous input %s expecting %s', $tokenName, $expecting->toStringVocabulary($recognizer->getVocabulary()) ); $recognizer->notifyErrorListeners($msg, $t); } /** * This method is called to report a syntax error which requires the * insertion of a missing token into the input stream. At the time this * method is called, the missing token has not yet been inserted. When this * method returns, `$recognizer` is in error recovery mode. * * This method is called when {@see DefaultErrorStrategy::singleTokenInsertion()} * identifies single-token insertion as a viable recovery strategy for * a mismatched input error. * * The default implementation simply returns if the handler is already in * error recovery mode. Otherwise, it calls * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error * recovery mode, followed by calling {@see Parser::notifyErrorListeners()}. * * @param Parser $recognizer the parser instance */ protected function reportMissingToken(Parser $recognizer) : void { if ($this->inErrorRecoveryMode($recognizer)) { return; } $this->beginErrorCondition($recognizer); $t = $recognizer->getCurrentToken(); $expecting = $this->getExpectedTokens($recognizer); $msg = \sprintf( 'missing %s at %s', $expecting->toStringVocabulary($recognizer->getVocabulary()), $this->getTokenErrorDisplay($t) ); $recognizer->notifyErrorListeners($msg, $t); } /** * {@inheritdoc} * * The default implementation attempts to recover from the mismatched input * by using single token insertion and deletion as described below. If the * recovery attempt fails, this method throws an * {@see InputMismatchException}. * * EXTRA TOKEN (single token deletion) * * `LA(1)` is not what we are looking for. If `LA(2)` has the * right token, however, then assume `LA(1)` is some extra spurious * token and delete it. Then consume and return the next token (which was * the `LA(2)` token) as the successful result of the match operation. * * This recovery strategy is implemented by * {@see DefaultErrorStrategy::singleTokenDeletion()}. * * MISSING TOKEN (single token insertion) * * If current token (at `LA(1)`) is consistent with what could come * after the expected `LA(1)` token, then assume the token is missing * and use the parser's {@see TokenFactory} to create it on the fly. The * "insertion" is performed by returning the created token as the successful * result of the match operation. * * This recovery strategy is implemented by * {@see DefaultErrorStrategy::singleTokenInsertion()}. * * EXAMPLE * * For example, Input `i=(3;` is clearly missing the `')'`. When * the parser returns from the nested call to `expr`, it will have * call chain: * * stat → expr → atom * * and it will be trying to match the `')'` at this point in the * derivation: * * => ID '=' '(' INT ')' ('+' atom)* ';' * ^ * * The attempt to match `')'` will fail when it sees `';'` and call * {@see DefaultErrorStrategy::recoverInline()}. To recover, it sees that * `LA(1)==';'` is in the set of tokens that can follow the `')'` token * reference in rule `atom`. It can assume that you forgot the `')'`. * * @throws RecognitionException */ public function recoverInline(Parser $recognizer) : Token { // SINGLE TOKEN DELETION $matchedSymbol = $this->singleTokenDeletion($recognizer); if ($matchedSymbol !== null) { // we have deleted the extra token. // now, move past ttype token as if all were ok $recognizer->consume(); return $matchedSymbol; } // SINGLE TOKEN INSERTION if ($this->singleTokenInsertion($recognizer)) { return $this->getMissingSymbol($recognizer); } // even that didn't work; must throw the exception if ($this->nextTokensContext === null) { throw new InputMismatchException($recognizer); } throw new InputMismatchException($recognizer, $this->nextTokensState, $this->nextTokensContext); } /** * This method implements the single-token insertion inline error recovery * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()} * if the single-token deletion strategy fails to recover from the mismatched * input. If this method returns `true`, `$recognizer` will be in error * recovery mode. * * This method determines whether or not single-token insertion is viable by * checking if the `LA(1)` input symbol could be successfully matched * if it were instead the `LA(2)` symbol. If this method returns * `true`, the caller is responsible for creating and inserting a * token with the correct type to produce this behavior. * * @param Parser $recognizer The parser instance. * * @return bool `true` If single-token insertion is a viable recovery * strategy for the current mismatched input, otherwise `false`. */ protected function singleTokenInsertion(Parser $recognizer) : bool { $stream = $recognizer->getInputStream(); if ($stream === null) { throw new \RuntimeException('Unexpected null input stream.'); } $interpreter = $recognizer->getInterpreter(); if ($interpreter === null) { throw new \RuntimeException('Unexpected null interpreter.'); } $currentSymbolType = $stream->LA(1); // if current token is consistent with what could come after current // ATN state, then we know we're missing a token; error recovery // is free to conjure up and insert the missing token $atn = $interpreter->atn; /** @var ATNState $currentState */ $currentState = $atn->states[$recognizer->getState()]; $next = $currentState->getTransition(0)->target; $expectingAtLL2 = $atn->nextTokensInContext($next, $recognizer->getContext()); if ($expectingAtLL2->contains($currentSymbolType)) { $this->reportMissingToken($recognizer); return true; } return false; } /** * This method implements the single-token deletion inline error recovery * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()} * to attempt to recover from mismatched input. If this method returns null, * the parser and error handler state will not have changed. If this method * returns non-null, `$recognizer` will _not_ be in error recovery mode * since the returned token was a successful match. * * If the single-token deletion is successful, this method calls * {@see DefaultErrorStrategy::reportUnwantedToken()} to report the error, * followed by {@see Parser::consume()} to actually "delete" the extraneous * token. Then, before returning {@see DefaultErrorStrategy::reportMatch()} * is called to signal a successful match. * * @param Parser $recognizer The parser instance. * * @return Token The successfully matched {@see Token} instance if * single-token deletion successfully recovers from * the mismatched input, otherwise `null`. */ protected function singleTokenDeletion(Parser $recognizer) : ?Token { $inputStream = $recognizer->getInputStream(); if ($inputStream === null) { throw new \RuntimeException('Unexpected null input stream.'); } $nextTokenType = $inputStream->LA(2); $expecting = $this->getExpectedTokens($recognizer); if ($expecting->contains($nextTokenType)) { $this->reportUnwantedToken($recognizer); $recognizer->consume(); // simply delete extra token // we want to return the token we're actually matching $matchedSymbol = $recognizer->getCurrentToken(); $this->reportMatch($recognizer); // we know current token is correct return $matchedSymbol; } return null; } /** Conjure up a missing token during error recovery. * * The recognizer attempts to recover from single missing * symbols. But, actions might refer to that missing symbol. * For example, x=ID {f($x);}. The action clearly assumes * that there has been an identifier matched previously and that * $x points at that token. If that token is missing, but * the next token in the stream is what we want we assume that * this token is missing and we keep going. Because we * have to return some token to replace the missing token, * we have to conjure one up. This method gives the user control * over the tokens returned for missing tokens. Mostly, * you will want to create something special for identifier * tokens. For literals such as '{' and ',', the default * action in the parser or tree parser works. It simply creates * a CommonToken of the appropriate type. The text will be the token. * If you change what tokens must be created by the lexer, * override this method to create the appropriate tokens. */ protected function getMissingSymbol(Parser $recognizer) : Token { $currentSymbol = $recognizer->getCurrentToken(); if ($currentSymbol === null) { throw new \RuntimeException('Unexpected null current token.'); } $inputStream = $recognizer->getInputStream(); if ($inputStream === null) { throw new \RuntimeException('Unexpected null input stream.'); } $tokenSource = $currentSymbol->getTokenSource(); if ($tokenSource === null) { throw new \RuntimeException('Unexpected null token source.'); } $expecting = $this->getExpectedTokens($recognizer); $expectedTokenType = Token::INVALID_TYPE; if (!$expecting->isNull()) { $expectedTokenType = $expecting->getMinElement(); // get any element } if ($expectedTokenType === Token::EOF) { $tokenText = ''; } else { $tokenText = \sprintf('', $recognizer->getVocabulary()->getDisplayName($expectedTokenType)); } $current = $currentSymbol; $lookback = $inputStream->LT(-1); if ($current->getType() === Token::EOF && $lookback !== null) { $current = $lookback; } return $recognizer->getTokenFactory()->createEx( new Pair( $tokenSource, $tokenSource->getInputStream() ), $expectedTokenType, $tokenText, Token::DEFAULT_CHANNEL, -1, -1, $current->getLine(), $current->getCharPositionInLine() ); } protected function getExpectedTokens(Parser $recognizer) : IntervalSet { return $recognizer->getExpectedTokens(); } /** * How should a token be displayed in an error message? The default * is to display just the text, but during development you might * want to have a lot of information spit out. Override in that case * to use (string) (which, for CommonToken, dumps everything about * the token). This is better than forcing you to override a method in * your token objects because you don't have to go modify your lexer * so that it creates a new Java type. */ protected function getTokenErrorDisplay(?Token $t) : string { if ($t === null) { return ''; } $s = $this->getSymbolText($t); if ($s === null) { if ($this->getSymbolType($t) === Token::EOF) { $s = ''; } else { $s = '<' . $this->getSymbolType($t) . '>'; } } return $this->escapeWSAndQuote($s); } protected function getSymbolText(Token $symbol) : ?string { return $symbol->getText(); } protected function getSymbolType(Token $symbol) : int { return $symbol->getType(); } protected function escapeWSAndQuote(string $s) : string { return "'" . StringUtils::escapeWhitespace($s) . "'"; } /** * Compute the error recovery set for the current rule. During * rule invocation, the parser pushes the set of tokens that can * follow that rule reference on the stack; this amounts to * computing FIRST of what follows the rule reference in the * enclosing rule. See LinearApproximator::FIRST. * This local follow set only includes tokens * from within the rule; i.e., the FIRST computation done by * ANTLR stops at the end of a rule. * * EXAMPLE * * When you find a "no viable alt exception", the input is not * consistent with any of the alternatives for rule r. The best * thing to do is to consume tokens until you see something that * can legally follow a call to r *or* any rule that called r. * You don't want the exact set of viable next tokens because the * input might just be missing a token--you might consume the * rest of the input looking for one of the missing tokens. * * Consider grammar: * * a : '[' b ']' * | '(' b ')' * ; * b : c '^' INT ; * c : ID * | INT * ; * * At each rule invocation, the set of tokens that could follow * that rule is pushed on a stack. Here are the various * context-sensitive follow sets: * * FOLLOW(b1_in_a) = FIRST(']') = ']' * FOLLOW(b2_in_a) = FIRST(')') = ')' * FOLLOW(c_in_b) = FIRST('^') = '^' * * Upon erroneous input "[]", the call chain is * * a -> b -> c * * and, hence, the follow context stack is: * * depth | follow set | start of rule execution * ------|------------|------------------------- * 0 | | a (from main()) * 1 | ']' | b * 2 | '^' | c * * Notice that ')' is not included, because b would have to have * been called from a different context in rule a for ')' to be * included. * * For error recovery, we cannot consider FOLLOW(c) * (context-sensitive or otherwise). We need the combined set of * all context-sensitive FOLLOW sets--the set of all tokens that * could follow any reference in the call chain. We need to * resync to one of those tokens. Note that FOLLOW(c)='^' and if * we resync'd to that token, we'd consume until EOF. We need to * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. * In this case, for input "[]", LA(1) is ']' and in the set, so we would * not consume anything. After printing an error, rule c would * return normally. Rule b would not find the required '^' though. * At this point, it gets a mismatched token error and throws an * exception (since LA(1) is not in the viable following token * set). The rule exception handler tries to recover, but finds * the same recovery set and doesn't consume anything. Rule b * exits normally returning to rule a. Now it finds the ']' (and * with the successful match exits errorRecovery mode). * * So, you can see that the parser walks up the call chain looking * for the token that was a member of the recovery set. * * Errors are not generated in errorRecovery mode. * * ANTLR's error recovery mechanism is based upon original ideas: * * "Algorithms + Data Structures = Programs" by Niklaus Wirth * * and * * "A note on error recovery in recursive descent parsers": * http://portal.acm.org/citation.cfm?id=947902.947905 * * Later, Josef Grosch had some good ideas: * * "Efficient and Comfortable Error Recovery in Recursive Descent * Parsers": * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip * * Like Grosch I implement context-sensitive FOLLOW sets that are combined * at run-time upon error to avoid overhead during parsing. */ protected function getErrorRecoverySet(Parser $recognizer) : IntervalSet { $interpreter = $recognizer->getInterpreter(); if ($interpreter === null) { throw new \RuntimeException('Unexpected null interpreter.'); } $atn = $interpreter->atn; $ctx = $recognizer->getContext(); $recoverSet = new IntervalSet(); while ($ctx !== null && $ctx->invokingState >= 0) { // compute what follows who invoked us /** @var ATNState $invokingState */ $invokingState = $atn->states[$ctx->invokingState]; /** @var RuleTransition $rt */ $rt = $invokingState->getTransition(0); $follow = $atn->nextTokens($rt->followState); $recoverSet->addSet($follow); $ctx = $ctx->getParent(); } $recoverSet->removeOne(Token::EPSILON); return $recoverSet; } /** * Consume tokens until one matches the given token set. */ protected function consumeUntil(Parser $recognizer, IntervalSet $set) : void { $inputStream = $recognizer->getInputStream(); if ($inputStream === null) { throw new \RuntimeException('Unexpected null input stream.'); } $ttype = $inputStream->LA(1); while ($ttype !== Token::EOF && !$set->contains($ttype)) { $recognizer->consume(); $ttype = $inputStream->LA(1); } } }