1<?php
2
3declare(strict_types=1);
4
5namespace Antlr\Antlr4\Runtime\Error;
6
7use Antlr\Antlr4\Runtime\Atn\States\ATNState;
8use Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition;
9use Antlr\Antlr4\Runtime\Error\Exceptions\FailedPredicateException;
10use Antlr\Antlr4\Runtime\Error\Exceptions\InputMismatchException;
11use Antlr\Antlr4\Runtime\Error\Exceptions\NoViableAltException;
12use Antlr\Antlr4\Runtime\Error\Exceptions\RecognitionException;
13use Antlr\Antlr4\Runtime\IntervalSet;
14use Antlr\Antlr4\Runtime\Parser;
15use Antlr\Antlr4\Runtime\ParserRuleContext;
16use Antlr\Antlr4\Runtime\Token;
17use Antlr\Antlr4\Runtime\Utils\Pair;
18use Antlr\Antlr4\Runtime\Utils\StringUtils;
19
20/**
21 * This is the default implementation of {@see ANTLRErrorStrategy} used for
22 * error reporting and recovery in ANTLR parsers.
23 */
24class DefaultErrorStrategy implements ANTLRErrorStrategy
25{
26    /**
27     * Indicates whether the error strategy is currently "recovering from an
28     * error". This is used to suppress reporting multiple error messages while
29     * attempting to recover from a detected syntax error.
30     *
31     * @see DefaultErrorStrategy::inErrorRecoveryMode()
32     *
33     * @var bool
34     */
35    protected $errorRecoveryMode = false;
36
37    /** The index into the input stream where the last error occurred.
38     *  This is used to prevent infinite loops where an error is found
39     *  but no token is consumed during recovery...another error is found,
40     *  ad nauseum. This is a failsafe mechanism to guarantee that at least
41     *  one token/tree node is consumed for two errors.
42     *
43     * @var int
44     */
45    protected $lastErrorIndex = -1;
46
47    /** @var IntervalSet|null */
48    protected $lastErrorStates;
49
50    /**
51     * This field is used to propagate information about the lookahead following
52     * the previous match. Since prediction prefers completing the current rule
53     * to error recovery efforts, error reporting may occur later than the
54     * original point where it was discoverable. The original context is used to
55     * compute the true expected sets as though the reporting occurred as early
56     * as possible.
57     *
58     * @var ParserRuleContext|null
59     */
60    protected $nextTokensContext;
61
62    /**
63     * @see DefaultErrorStrategy::$nextTokensContext
64     *
65     * @var int|null
66     */
67    protected $nextTokensState;
68
69    /**
70     * {@inheritdoc}
71     *
72     * The default implementation simply calls
73     * {@see DefaultErrorStrategy::endErrorCondition()} to ensure that
74     * the handler is not in error recovery mode.
75     */
76    public function reset(Parser $recognizer) : void
77    {
78        $this->endErrorCondition($recognizer);
79    }
80
81    /**
82     * This method is called to enter error recovery mode when a recognition
83     * exception is reported.
84     *
85     * @param Parser $recognizer The parser instance.
86     */
87    protected function beginErrorCondition(Parser $recognizer) : void
88    {
89        $this->errorRecoveryMode = true;
90    }
91
92    public function inErrorRecoveryMode(Parser $recognizer) : bool
93    {
94        return $this->errorRecoveryMode;
95    }
96
97    /**
98     * This method is called to leave error recovery mode after recovering from
99     * a recognition exception.
100     */
101    protected function endErrorCondition(Parser $recognizer) : void
102    {
103        $this->errorRecoveryMode = false;
104        $this->lastErrorStates = null;
105        $this->lastErrorIndex = -1;
106    }
107
108    /**
109     * {@inheritdoc}
110     *
111     * The default implementation simply calls
112     * {@see DefaultErrorStrategy::endErrorCondition()}.
113     */
114    public function reportMatch(Parser $recognizer) : void
115    {
116        $this->endErrorCondition($recognizer);
117    }
118
119    /**
120     * {@inheritdoc}
121     *
122     * The default implementation returns immediately if the handler is already
123     * in error recovery mode. Otherwise, it calls
124     * {@see DefaultErrorStrategy::beginErrorCondition()} and dispatches
125     * the reporting task based on the runtime type of `e` according to
126     * the following table.
127     *
128     * - {@see NoViableAltException}: Dispatches the call to
129     * {@see reportNoViableAlternative}
130     * - {@see InputMismatchException}: Dispatches the call to
131     * {@see reportInputMismatch}
132     * - {@see FailedPredicateException}: Dispatches the call to
133     * {@see reportFailedPredicate}
134     * - All other types: calls {@see Parser#notifyErrorListeners} to report
135     * the exception
136     */
137    public function reportError(Parser $recognizer, RecognitionException $e) : void
138    {
139        // if we've already reported an error and have not matched a token
140        // yet successfully, don't report any errors.
141        if ($this->inErrorRecoveryMode($recognizer)) {
142            // don't report spurious errors
143            return;
144        }
145
146        $this->beginErrorCondition($recognizer);
147
148        if ($e instanceof NoViableAltException) {
149            $this->reportNoViableAlternative($recognizer, $e);
150        } elseif ($e instanceof InputMismatchException) {
151            $this->reportInputMismatch($recognizer, $e);
152        } elseif ($e instanceof FailedPredicateException) {
153            $this->reportFailedPredicate($recognizer, $e);
154        } else {
155            $recognizer->notifyErrorListeners($e->getMessage(), $e->getOffendingToken(), $e);
156        }
157    }
158
159    /**
160     * {@inheritdoc}
161     *
162     * The default implementation resynchronizes the parser by consuming tokens
163     * until we find one in the resynchronization set--loosely the set of tokens
164     * that can follow the current rule.
165     */
166    public function recover(Parser $recognizer, RecognitionException $e) : void
167    {
168        $inputStream = $recognizer->getInputStream();
169
170        if ($inputStream === null) {
171            throw new \RuntimeException('Unexpected null input stream.');
172        }
173
174        if ($this->lastErrorStates !== null
175            && $this->lastErrorIndex === $inputStream->getIndex()
176            && $this->lastErrorStates->contains($recognizer->getState())
177        ) {
178            // uh oh, another error at same token index and previously-visited
179            // state in ATN; must be a case where LT(1) is in the recovery
180            // token set so nothing got consumed. Consume a single token
181            // at least to prevent an infinite loop; this is a failsafe.
182            $recognizer->consume();
183        }
184
185        $this->lastErrorIndex = $inputStream->getIndex();
186
187        if ($this->lastErrorStates === null) {
188            $this->lastErrorStates = new IntervalSet();
189        }
190
191        $this->lastErrorStates->addOne($recognizer->getState());
192
193        $followSet = $this->getErrorRecoverySet($recognizer);
194
195        $this->consumeUntil($recognizer, $followSet);
196    }
197
198    /**
199     * The default implementation of {@see ANTLRErrorStrategy::sync()} makes sure
200     * that the current lookahead symbol is consistent with what were expecting
201     * at this point in the ATN. You can call this anytime but ANTLR only
202     * generates code to check before subrules/loops and each iteration.
203     *
204     * Implements Jim Idle's magic sync mechanism in closures and optional
205     * subrules. E.g.,
206     *
207     *     a : sync ( stuff sync )* ;
208     *     sync : {consume to what can follow sync} ;
209     *
210     * At the start of a sub rule upon error, {@see sync} performs single
211     * token deletion, if possible. If it can't do that, it bails on the current
212     * rule and uses the default error recovery, which consumes until the
213     * resynchronization set of the current rule.
214     *
215     * If the sub rule is optional (`(...)?`, `(...)*`, or block
216     * with an empty alternative), then the expected set includes what follows
217     * the subrule.
218     *
219     * During loop iteration, it consumes until it sees a token that can start a
220     * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
221     * stay in the loop as long as possible.
222     *
223     * ORIGINS
224     *
225     * Previous versions of ANTLR did a poor job of their recovery within loops.
226     * A single mismatch token or missing token would force the parser to bail
227     * out of the entire rules surrounding the loop. So, for rule
228     *
229     *     classDef : 'class' ID '{' member* '}'
230     *
231     * input with an extra token between members would force the parser to
232     * consume until it found the next class definition rather than the next
233     * member definition of the current class.
234     *
235     * This functionality cost a little bit of effort because the parser has to
236     * compare token set at the start of the loop and at each iteration. If for
237     * some reason speed is suffering for you, you can turn off this
238     * functionality by simply overriding this method as a blank { }.
239     *
240     * @throws RecognitionException
241     */
242    public function sync(Parser $recognizer) : void
243    {
244        $interpreter = $recognizer->getInterpreter();
245
246        if ($interpreter === null) {
247            throw new \RuntimeException('Unexpected null interpreter.');
248        }
249
250        /** @var ATNState $s */
251        $s = $interpreter->atn->states[$recognizer->getState()];
252
253        // If already recovering, don't try to sync
254        if ($this->inErrorRecoveryMode($recognizer)) {
255            return;
256        }
257
258        $tokens = $recognizer->getInputStream();
259
260        if ($tokens === null) {
261            throw new \RuntimeException('Unexpected null input stream.');
262        }
263
264        $la = $tokens->LA(1);
265
266        // try cheaper subset first; might get lucky. seems to shave a wee bit off
267        $nextTokens = $recognizer->getATN()->nextTokens($s);
268
269        if ($nextTokens->contains($la)) {
270            // We are sure the token matches
271            $this->nextTokensContext = null;
272            $this->nextTokensState = ATNState::INVALID_STATE_NUMBER;
273
274            return;
275        }
276
277        if ($nextTokens->contains(Token::EPSILON)) {
278            if ($this->nextTokensContext === null) {
279                // It's possible the next token won't match; information tracked
280                // by sync is restricted for performance.
281                $this->nextTokensContext = $recognizer->getContext();
282                $this->nextTokensState = $recognizer->getState();
283            }
284            return;
285        }
286
287        switch ($s->getStateType()) {
288            case ATNState::BLOCK_START:
289            case ATNState::STAR_BLOCK_START:
290            case ATNState::PLUS_BLOCK_START:
291            case ATNState::STAR_LOOP_ENTRY:
292                // report error and recover if possible
293                if ($this->singleTokenDeletion($recognizer) !== null) {
294                    return;
295                }
296
297                throw new InputMismatchException($recognizer);
298
299            case ATNState::PLUS_LOOP_BACK:
300            case ATNState::STAR_LOOP_BACK:
301                $this->reportUnwantedToken($recognizer);
302                $expecting = $recognizer->getExpectedTokens();
303                $whatFollowsLoopIterationOrRule = $expecting->orSet($this->getErrorRecoverySet($recognizer));
304                $this->consumeUntil($recognizer, $whatFollowsLoopIterationOrRule);
305                break;
306
307            default:
308                // do nothing if we can't identify the exact kind of ATN state
309                break;
310        }
311    }
312
313    /**
314     * This is called by {@see DefaultErrorStrategy::reportError()} when
315     * the exception is a {@see NoViableAltException}.
316     *
317     * @param Parser               $recognizer The parser instance.
318     * @param NoViableAltException $e          The recognition exception.
319     *
320     * @see DefaultErrorStrategy::reportError()
321     */
322    protected function reportNoViableAlternative(Parser $recognizer, NoViableAltException $e) : void
323    {
324        $tokens = $recognizer->getTokenStream();
325
326        $input = '<unknown input>';
327
328        if ($tokens !== null) {
329            $startToken = $e->getStartToken();
330
331            if ($startToken === null) {
332                throw new \RuntimeException('Unexpected null start token.');
333            }
334
335            if ($startToken->getType() === Token::EOF) {
336                $input = '<EOF>';
337            } else {
338                $input = $tokens->getTextByTokens($e->getStartToken(), $e->getOffendingToken());
339            }
340        }
341
342        $msg = \sprintf('no viable alternative at input %s', $this->escapeWSAndQuote($input));
343
344        $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e);
345    }
346
347    /**
348     * This is called by {@see DefaultErrorStrategy::reportError()} when
349     * the exception is an {@see InputMismatchException}.
350     *
351     * @param Parser                 $recognizer The parser instance.
352     * @param InputMismatchException $e          The recognition exception.
353     *
354     * @see DefaultErrorStrategy::reportError()
355     */
356    protected function reportInputMismatch(Parser $recognizer, InputMismatchException $e) : void
357    {
358        $expectedTokens = $e->getExpectedTokens();
359
360        if ($expectedTokens === null) {
361            throw new \RuntimeException('Unexpected null expected tokens.');
362        }
363
364        $msg = \sprintf(
365            'mismatched input %s expecting %s',
366            $this->getTokenErrorDisplay($e->getOffendingToken()),
367            $expectedTokens->toStringVocabulary($recognizer->getVocabulary())
368        );
369
370        $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e);
371    }
372
373    /**
374     * This is called by {@see DefaultErrorStrategy::reportError()} when
375     * the exception is a {@see FailedPredicateException}.
376     *
377     * @param Parser                   $recognizer The parser instance.
378     * @param FailedPredicateException $e          The recognition exception.
379     *
380     * @see DefaultErrorStrategy::reportError()
381     */
382    protected function reportFailedPredicate(Parser $recognizer, FailedPredicateException $e) : void
383    {
384        $msg = \sprintf('rule %s %s', $recognizer->getCurrentRuleName(), $e->getMessage());
385
386        $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e);
387    }
388
389    /**
390     * This method is called to report a syntax error which requires the removal
391     * of a token from the input stream. At the time this method is called, the
392     * erroneous symbol is current `LT(1)` symbol and has not yet been
393     * removed from the input stream. When this method returns,
394     * `$recognizer` is in error recovery mode.
395     *
396     * This method is called when {@see DefaultErrorStrategy::singleTokenDeletion()}
397     * identifies single-token deletion as a viable recovery strategy for
398     * a mismatched input error.
399     *
400     * The default implementation simply returns if the handler is already in
401     * error recovery mode. Otherwise, it calls
402     * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error
403     * recovery mode, followed by calling {@see Parser::notifyErrorListeners}.
404     *
405     * @param Parser $recognizer The parser instance.
406     */
407    protected function reportUnwantedToken(Parser $recognizer) : void
408    {
409        if ($this->inErrorRecoveryMode($recognizer)) {
410            return;
411        }
412
413        $this->beginErrorCondition($recognizer);
414
415        $t = $recognizer->getCurrentToken();
416        $tokenName = $this->getTokenErrorDisplay($t);
417        $expecting = $this->getExpectedTokens($recognizer);
418
419        $msg = \sprintf(
420            'extraneous input %s expecting %s',
421            $tokenName,
422            $expecting->toStringVocabulary($recognizer->getVocabulary())
423        );
424
425        $recognizer->notifyErrorListeners($msg, $t);
426    }
427
428    /**
429     * This method is called to report a syntax error which requires the
430     * insertion of a missing token into the input stream. At the time this
431     * method is called, the missing token has not yet been inserted. When this
432     * method returns, `$recognizer` is in error recovery mode.
433     *
434     * This method is called when {@see DefaultErrorStrategy::singleTokenInsertion()}
435     * identifies single-token insertion as a viable recovery strategy for
436     * a mismatched input error.
437     *
438     * The default implementation simply returns if the handler is already in
439     * error recovery mode. Otherwise, it calls
440     * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error
441     * recovery mode, followed by calling {@see Parser::notifyErrorListeners()}.
442     *
443     * @param Parser $recognizer the parser instance
444     */
445    protected function reportMissingToken(Parser $recognizer) : void
446    {
447        if ($this->inErrorRecoveryMode($recognizer)) {
448            return;
449        }
450
451        $this->beginErrorCondition($recognizer);
452
453        $t = $recognizer->getCurrentToken();
454        $expecting = $this->getExpectedTokens($recognizer);
455
456        $msg = \sprintf(
457            'missing %s at %s',
458            $expecting->toStringVocabulary($recognizer->getVocabulary()),
459            $this->getTokenErrorDisplay($t)
460        );
461
462        $recognizer->notifyErrorListeners($msg, $t);
463    }
464
465    /**
466     * {@inheritdoc}
467     *
468     * The default implementation attempts to recover from the mismatched input
469     * by using single token insertion and deletion as described below. If the
470     * recovery attempt fails, this method throws an
471     * {@see InputMismatchException}.
472     *
473     * EXTRA TOKEN (single token deletion)
474     *
475     * `LA(1)` is not what we are looking for. If `LA(2)` has the
476     * right token, however, then assume `LA(1)` is some extra spurious
477     * token and delete it. Then consume and return the next token (which was
478     * the `LA(2)` token) as the successful result of the match operation.
479     *
480     * This recovery strategy is implemented by
481     * {@see DefaultErrorStrategy::singleTokenDeletion()}.
482     *
483     * MISSING TOKEN (single token insertion)
484     *
485     * If current token (at `LA(1)`) is consistent with what could come
486     * after the expected `LA(1)` token, then assume the token is missing
487     * and use the parser's {@see TokenFactory} to create it on the fly. The
488     * "insertion" is performed by returning the created token as the successful
489     * result of the match operation.
490     *
491     * This recovery strategy is implemented by
492     * {@see DefaultErrorStrategy::singleTokenInsertion()}.
493     *
494     * EXAMPLE
495     *
496     * For example, Input `i=(3;` is clearly missing the `')'`. When
497     * the parser returns from the nested call to `expr`, it will have
498     * call chain:
499     *
500     *     stat &rarr; expr &rarr; atom
501     *
502     * and it will be trying to match the `')'` at this point in the
503     * derivation:
504     *
505     *     =&gt; ID '=' '(' INT ')' ('+' atom)* ';'
506     *                        ^
507     *
508     * The attempt to match `')'` will fail when it sees `';'` and call
509     * {@see DefaultErrorStrategy::recoverInline()}. To recover, it sees that
510     * `LA(1)==';'` is in the set of tokens that can follow the `')'` token
511     * reference in rule `atom`. It can assume that you forgot the `')'`.
512     *
513     * @throws RecognitionException
514     */
515    public function recoverInline(Parser $recognizer) : Token
516    {
517        // SINGLE TOKEN DELETION
518        $matchedSymbol = $this->singleTokenDeletion($recognizer);
519
520        if ($matchedSymbol !== null) {
521            // we have deleted the extra token.
522            // now, move past ttype token as if all were ok
523            $recognizer->consume();
524
525            return $matchedSymbol;
526        }
527
528        // SINGLE TOKEN INSERTION
529        if ($this->singleTokenInsertion($recognizer)) {
530            return $this->getMissingSymbol($recognizer);
531        }
532
533        // even that didn't work; must throw the exception
534        if ($this->nextTokensContext === null) {
535            throw new InputMismatchException($recognizer);
536        }
537
538        throw new InputMismatchException($recognizer, $this->nextTokensState, $this->nextTokensContext);
539    }
540
541    /**
542     * This method implements the single-token insertion inline error recovery
543     * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()}
544     * if the single-token deletion strategy fails to recover from the mismatched
545     * input. If this method returns `true`, `$recognizer` will be in error
546     * recovery mode.
547     *
548     * This method determines whether or not single-token insertion is viable by
549     * checking if the `LA(1)` input symbol could be successfully matched
550     * if it were instead the `LA(2)` symbol. If this method returns
551     * `true`, the caller is responsible for creating and inserting a
552     * token with the correct type to produce this behavior.
553     *
554     * @param Parser $recognizer The parser instance.
555     *
556     * @return bool `true` If single-token insertion is a viable recovery
557     *              strategy for the current mismatched input, otherwise `false`.
558     */
559    protected function singleTokenInsertion(Parser $recognizer) : bool
560    {
561        $stream = $recognizer->getInputStream();
562
563        if ($stream === null) {
564            throw new \RuntimeException('Unexpected null input stream.');
565        }
566
567        $interpreter = $recognizer->getInterpreter();
568
569        if ($interpreter === null) {
570            throw new \RuntimeException('Unexpected null interpreter.');
571        }
572
573        $currentSymbolType = $stream->LA(1);
574
575        // if current token is consistent with what could come after current
576        // ATN state, then we know we're missing a token; error recovery
577        // is free to conjure up and insert the missing token
578
579        $atn = $interpreter->atn;
580        /** @var ATNState $currentState */
581        $currentState = $atn->states[$recognizer->getState()];
582        $next = $currentState->getTransition(0)->target;
583        $expectingAtLL2 = $atn->nextTokensInContext($next, $recognizer->getContext());
584
585        if ($expectingAtLL2->contains($currentSymbolType)) {
586            $this->reportMissingToken($recognizer);
587
588            return true;
589        }
590
591        return false;
592    }
593
594    /**
595     * This method implements the single-token deletion inline error recovery
596     * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()}
597     * to attempt to recover from mismatched input. If this method returns null,
598     * the parser and error handler state will not have changed. If this method
599     * returns non-null, `$recognizer` will _not_ be in error recovery mode
600     * since the returned token was a successful match.
601     *
602     * If the single-token deletion is successful, this method calls
603     * {@see DefaultErrorStrategy::reportUnwantedToken()} to report the error,
604     * followed by {@see Parser::consume()} to actually "delete" the extraneous
605     * token. Then, before returning {@see DefaultErrorStrategy::reportMatch()}
606     * is called to signal a successful match.
607     *
608     * @param Parser $recognizer The parser instance.
609     *
610     * @return Token The successfully matched {@see Token} instance if
611     *               single-token deletion successfully recovers from
612     *               the mismatched input, otherwise `null`.
613     */
614    protected function singleTokenDeletion(Parser $recognizer) : ?Token
615    {
616        $inputStream = $recognizer->getInputStream();
617
618        if ($inputStream === null) {
619            throw new \RuntimeException('Unexpected null input stream.');
620        }
621
622        $nextTokenType = $inputStream->LA(2);
623        $expecting = $this->getExpectedTokens($recognizer);
624
625        if ($expecting->contains($nextTokenType)) {
626            $this->reportUnwantedToken($recognizer);
627            $recognizer->consume(); // simply delete extra token
628            // we want to return the token we're actually matching
629            $matchedSymbol = $recognizer->getCurrentToken();
630            $this->reportMatch($recognizer);  // we know current token is correct
631
632            return $matchedSymbol;
633        }
634
635        return null;
636    }
637
638    /** Conjure up a missing token during error recovery.
639     *
640     *  The recognizer attempts to recover from single missing
641     *  symbols. But, actions might refer to that missing symbol.
642     *  For example, x=ID {f($x);}. The action clearly assumes
643     *  that there has been an identifier matched previously and that
644     *  $x points at that token. If that token is missing, but
645     *  the next token in the stream is what we want we assume that
646     *  this token is missing and we keep going. Because we
647     *  have to return some token to replace the missing token,
648     *  we have to conjure one up. This method gives the user control
649     *  over the tokens returned for missing tokens. Mostly,
650     *  you will want to create something special for identifier
651     *  tokens. For literals such as '{' and ',', the default
652     *  action in the parser or tree parser works. It simply creates
653     *  a CommonToken of the appropriate type. The text will be the token.
654     *  If you change what tokens must be created by the lexer,
655     *  override this method to create the appropriate tokens.
656     */
657    protected function getMissingSymbol(Parser $recognizer) : Token
658    {
659        $currentSymbol = $recognizer->getCurrentToken();
660
661        if ($currentSymbol === null) {
662            throw new \RuntimeException('Unexpected null current token.');
663        }
664
665        $inputStream = $recognizer->getInputStream();
666
667        if ($inputStream === null) {
668            throw new \RuntimeException('Unexpected null input stream.');
669        }
670
671        $tokenSource = $currentSymbol->getTokenSource();
672
673        if ($tokenSource === null) {
674            throw new \RuntimeException('Unexpected null token source.');
675        }
676
677        $expecting = $this->getExpectedTokens($recognizer);
678
679        $expectedTokenType = Token::INVALID_TYPE;
680
681        if (!$expecting->isNull()) {
682            $expectedTokenType = $expecting->getMinElement(); // get any element
683        }
684
685        if ($expectedTokenType === Token::EOF) {
686            $tokenText = '<missing EOF>';
687        } else {
688            $tokenText = \sprintf('<missing %s>', $recognizer->getVocabulary()->getDisplayName($expectedTokenType));
689        }
690
691        $current = $currentSymbol;
692        $lookback = $inputStream->LT(-1);
693
694        if ($current->getType() === Token::EOF && $lookback !== null) {
695            $current = $lookback;
696        }
697
698        return $recognizer->getTokenFactory()->createEx(
699            new Pair(
700                $tokenSource,
701                $tokenSource->getInputStream()
702            ),
703            $expectedTokenType,
704            $tokenText,
705            Token::DEFAULT_CHANNEL,
706            -1,
707            -1,
708            $current->getLine(),
709            $current->getCharPositionInLine()
710        );
711    }
712
713    protected function getExpectedTokens(Parser $recognizer) : IntervalSet
714    {
715        return $recognizer->getExpectedTokens();
716    }
717
718    /**
719     * How should a token be displayed in an error message? The default
720     * is to display just the text, but during development you might
721     * want to have a lot of information spit out.  Override in that case
722     * to use (string) (which, for CommonToken, dumps everything about
723     * the token). This is better than forcing you to override a method in
724     * your token objects because you don't have to go modify your lexer
725     * so that it creates a new Java type.
726     */
727    protected function getTokenErrorDisplay(?Token $t) : string
728    {
729        if ($t === null) {
730            return '<no token>';
731        }
732
733        $s = $this->getSymbolText($t);
734
735        if ($s === null) {
736            if ($this->getSymbolType($t) === Token::EOF) {
737                $s = '<EOF>';
738            } else {
739                $s = '<' . $this->getSymbolType($t) . '>';
740            }
741        }
742
743        return $this->escapeWSAndQuote($s);
744    }
745
746    protected function getSymbolText(Token $symbol) : ?string
747    {
748        return $symbol->getText();
749    }
750
751    protected function getSymbolType(Token $symbol) : int
752    {
753        return $symbol->getType();
754    }
755
756    protected function escapeWSAndQuote(string $s) : string
757    {
758        return "'" . StringUtils::escapeWhitespace($s) . "'";
759    }
760
761    /**
762     * Compute the error recovery set for the current rule.  During
763     * rule invocation, the parser pushes the set of tokens that can
764     * follow that rule reference on the stack; this amounts to
765     * computing FIRST of what follows the rule reference in the
766     * enclosing rule. See LinearApproximator::FIRST.
767     * This local follow set only includes tokens
768     * from within the rule; i.e., the FIRST computation done by
769     * ANTLR stops at the end of a rule.
770     *
771     * EXAMPLE
772     *
773     * When you find a "no viable alt exception", the input is not
774     * consistent with any of the alternatives for rule r.  The best
775     * thing to do is to consume tokens until you see something that
776     * can legally follow a call to r *or* any rule that called r.
777     * You don't want the exact set of viable next tokens because the
778     * input might just be missing a token--you might consume the
779     * rest of the input looking for one of the missing tokens.
780     *
781     * Consider grammar:
782     *
783     *     a : '[' b ']'
784     *       | '(' b ')'
785     *       ;
786     *     b : c '^' INT ;
787     *     c : ID
788     *       | INT
789     *       ;
790     *
791     * At each rule invocation, the set of tokens that could follow
792     * that rule is pushed on a stack.  Here are the various
793     * context-sensitive follow sets:
794     *
795     *     FOLLOW(b1_in_a) = FIRST(']') = ']'
796     *     FOLLOW(b2_in_a) = FIRST(')') = ')'
797     *     FOLLOW(c_in_b) = FIRST('^') = '^'
798     *
799     * Upon erroneous input "[]", the call chain is
800     *
801     *     a -> b -> c
802     *
803     * and, hence, the follow context stack is:
804     *
805     * depth | follow set | start of rule execution
806     * ------|------------|-------------------------
807     *   0   |   <EOF>    |    a (from main())
808     *   1   |   ']'      |          b
809     *   2   |   '^'      |          c
810     *
811     * Notice that ')' is not included, because b would have to have
812     * been called from a different context in rule a for ')' to be
813     * included.
814     *
815     * For error recovery, we cannot consider FOLLOW(c)
816     * (context-sensitive or otherwise).  We need the combined set of
817     * all context-sensitive FOLLOW sets--the set of all tokens that
818     * could follow any reference in the call chain.  We need to
819     * resync to one of those tokens.  Note that FOLLOW(c)='^' and if
820     * we resync'd to that token, we'd consume until EOF.  We need to
821     * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
822     * In this case, for input "[]", LA(1) is ']' and in the set, so we would
823     * not consume anything. After printing an error, rule c would
824     * return normally.  Rule b would not find the required '^' though.
825     * At this point, it gets a mismatched token error and throws an
826     * exception (since LA(1) is not in the viable following token
827     * set).  The rule exception handler tries to recover, but finds
828     * the same recovery set and doesn't consume anything.  Rule b
829     * exits normally returning to rule a.  Now it finds the ']' (and
830     * with the successful match exits errorRecovery mode).
831     *
832     * So, you can see that the parser walks up the call chain looking
833     * for the token that was a member of the recovery set.
834     *
835     * Errors are not generated in errorRecovery mode.
836     *
837     * ANTLR's error recovery mechanism is based upon original ideas:
838     *
839     * "Algorithms + Data Structures = Programs" by Niklaus Wirth
840     *
841     * and
842     *
843     * "A note on error recovery in recursive descent parsers":
844     * http://portal.acm.org/citation.cfm?id=947902.947905
845     *
846     * Later, Josef Grosch had some good ideas:
847     *
848     * "Efficient and Comfortable Error Recovery in Recursive Descent
849     * Parsers":
850     * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
851     *
852     * Like Grosch I implement context-sensitive FOLLOW sets that are combined
853     * at run-time upon error to avoid overhead during parsing.
854     */
855    protected function getErrorRecoverySet(Parser $recognizer) : IntervalSet
856    {
857        $interpreter = $recognizer->getInterpreter();
858
859        if ($interpreter === null) {
860            throw new \RuntimeException('Unexpected null interpreter.');
861        }
862
863        $atn = $interpreter->atn;
864        $ctx = $recognizer->getContext();
865        $recoverSet = new IntervalSet();
866
867        while ($ctx !== null && $ctx->invokingState >= 0) {
868            // compute what follows who invoked us
869            /** @var ATNState $invokingState */
870            $invokingState = $atn->states[$ctx->invokingState];
871            /** @var RuleTransition $rt */
872            $rt = $invokingState->getTransition(0);
873            $follow = $atn->nextTokens($rt->followState);
874            $recoverSet->addSet($follow);
875            $ctx = $ctx->getParent();
876        }
877
878        $recoverSet->removeOne(Token::EPSILON);
879
880        return $recoverSet;
881    }
882
883    /**
884     * Consume tokens until one matches the given token set.
885     */
886    protected function consumeUntil(Parser $recognizer, IntervalSet $set) : void
887    {
888        $inputStream = $recognizer->getInputStream();
889
890        if ($inputStream === null) {
891            throw new \RuntimeException('Unexpected null input stream.');
892        }
893
894        $ttype = $inputStream->LA(1);
895
896        while ($ttype !== Token::EOF && !$set->contains($ttype)) {
897            $recognizer->consume();
898            $ttype = $inputStream->LA(1);
899        }
900    }
901}
902