xref: /template/strap/vendor/antlr/antlr4-php-runtime/src/Error/DefaultErrorStrategy.php (revision 37748cd8654635afbeca80942126742f0f4cc346)
1*37748cd8SNickeau<?php
2*37748cd8SNickeau
3*37748cd8SNickeaudeclare(strict_types=1);
4*37748cd8SNickeau
5*37748cd8SNickeaunamespace Antlr\Antlr4\Runtime\Error;
6*37748cd8SNickeau
7*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\States\ATNState;
8*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition;
9*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Error\Exceptions\FailedPredicateException;
10*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Error\Exceptions\InputMismatchException;
11*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Error\Exceptions\NoViableAltException;
12*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Error\Exceptions\RecognitionException;
13*37748cd8SNickeauuse Antlr\Antlr4\Runtime\IntervalSet;
14*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Parser;
15*37748cd8SNickeauuse Antlr\Antlr4\Runtime\ParserRuleContext;
16*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Token;
17*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Utils\Pair;
18*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Utils\StringUtils;
19*37748cd8SNickeau
20*37748cd8SNickeau/**
21*37748cd8SNickeau * This is the default implementation of {@see ANTLRErrorStrategy} used for
22*37748cd8SNickeau * error reporting and recovery in ANTLR parsers.
23*37748cd8SNickeau */
24*37748cd8SNickeauclass DefaultErrorStrategy implements ANTLRErrorStrategy
25*37748cd8SNickeau{
26*37748cd8SNickeau    /**
27*37748cd8SNickeau     * Indicates whether the error strategy is currently "recovering from an
28*37748cd8SNickeau     * error". This is used to suppress reporting multiple error messages while
29*37748cd8SNickeau     * attempting to recover from a detected syntax error.
30*37748cd8SNickeau     *
31*37748cd8SNickeau     * @see DefaultErrorStrategy::inErrorRecoveryMode()
32*37748cd8SNickeau     *
33*37748cd8SNickeau     * @var bool
34*37748cd8SNickeau     */
35*37748cd8SNickeau    protected $errorRecoveryMode = false;
36*37748cd8SNickeau
37*37748cd8SNickeau    /** The index into the input stream where the last error occurred.
38*37748cd8SNickeau     *  This is used to prevent infinite loops where an error is found
39*37748cd8SNickeau     *  but no token is consumed during recovery...another error is found,
40*37748cd8SNickeau     *  ad nauseum. This is a failsafe mechanism to guarantee that at least
41*37748cd8SNickeau     *  one token/tree node is consumed for two errors.
42*37748cd8SNickeau     *
43*37748cd8SNickeau     * @var int
44*37748cd8SNickeau     */
45*37748cd8SNickeau    protected $lastErrorIndex = -1;
46*37748cd8SNickeau
47*37748cd8SNickeau    /** @var IntervalSet|null */
48*37748cd8SNickeau    protected $lastErrorStates;
49*37748cd8SNickeau
50*37748cd8SNickeau    /**
51*37748cd8SNickeau     * This field is used to propagate information about the lookahead following
52*37748cd8SNickeau     * the previous match. Since prediction prefers completing the current rule
53*37748cd8SNickeau     * to error recovery efforts, error reporting may occur later than the
54*37748cd8SNickeau     * original point where it was discoverable. The original context is used to
55*37748cd8SNickeau     * compute the true expected sets as though the reporting occurred as early
56*37748cd8SNickeau     * as possible.
57*37748cd8SNickeau     *
58*37748cd8SNickeau     * @var ParserRuleContext|null
59*37748cd8SNickeau     */
60*37748cd8SNickeau    protected $nextTokensContext;
61*37748cd8SNickeau
62*37748cd8SNickeau    /**
63*37748cd8SNickeau     * @see DefaultErrorStrategy::$nextTokensContext
64*37748cd8SNickeau     *
65*37748cd8SNickeau     * @var int|null
66*37748cd8SNickeau     */
67*37748cd8SNickeau    protected $nextTokensState;
68*37748cd8SNickeau
69*37748cd8SNickeau    /**
70*37748cd8SNickeau     * {@inheritdoc}
71*37748cd8SNickeau     *
72*37748cd8SNickeau     * The default implementation simply calls
73*37748cd8SNickeau     * {@see DefaultErrorStrategy::endErrorCondition()} to ensure that
74*37748cd8SNickeau     * the handler is not in error recovery mode.
75*37748cd8SNickeau     */
76*37748cd8SNickeau    public function reset(Parser $recognizer) : void
77*37748cd8SNickeau    {
78*37748cd8SNickeau        $this->endErrorCondition($recognizer);
79*37748cd8SNickeau    }
80*37748cd8SNickeau
81*37748cd8SNickeau    /**
82*37748cd8SNickeau     * This method is called to enter error recovery mode when a recognition
83*37748cd8SNickeau     * exception is reported.
84*37748cd8SNickeau     *
85*37748cd8SNickeau     * @param Parser $recognizer The parser instance.
86*37748cd8SNickeau     */
87*37748cd8SNickeau    protected function beginErrorCondition(Parser $recognizer) : void
88*37748cd8SNickeau    {
89*37748cd8SNickeau        $this->errorRecoveryMode = true;
90*37748cd8SNickeau    }
91*37748cd8SNickeau
92*37748cd8SNickeau    public function inErrorRecoveryMode(Parser $recognizer) : bool
93*37748cd8SNickeau    {
94*37748cd8SNickeau        return $this->errorRecoveryMode;
95*37748cd8SNickeau    }
96*37748cd8SNickeau
97*37748cd8SNickeau    /**
98*37748cd8SNickeau     * This method is called to leave error recovery mode after recovering from
99*37748cd8SNickeau     * a recognition exception.
100*37748cd8SNickeau     */
101*37748cd8SNickeau    protected function endErrorCondition(Parser $recognizer) : void
102*37748cd8SNickeau    {
103*37748cd8SNickeau        $this->errorRecoveryMode = false;
104*37748cd8SNickeau        $this->lastErrorStates = null;
105*37748cd8SNickeau        $this->lastErrorIndex = -1;
106*37748cd8SNickeau    }
107*37748cd8SNickeau
108*37748cd8SNickeau    /**
109*37748cd8SNickeau     * {@inheritdoc}
110*37748cd8SNickeau     *
111*37748cd8SNickeau     * The default implementation simply calls
112*37748cd8SNickeau     * {@see DefaultErrorStrategy::endErrorCondition()}.
113*37748cd8SNickeau     */
114*37748cd8SNickeau    public function reportMatch(Parser $recognizer) : void
115*37748cd8SNickeau    {
116*37748cd8SNickeau        $this->endErrorCondition($recognizer);
117*37748cd8SNickeau    }
118*37748cd8SNickeau
119*37748cd8SNickeau    /**
120*37748cd8SNickeau     * {@inheritdoc}
121*37748cd8SNickeau     *
122*37748cd8SNickeau     * The default implementation returns immediately if the handler is already
123*37748cd8SNickeau     * in error recovery mode. Otherwise, it calls
124*37748cd8SNickeau     * {@see DefaultErrorStrategy::beginErrorCondition()} and dispatches
125*37748cd8SNickeau     * the reporting task based on the runtime type of `e` according to
126*37748cd8SNickeau     * the following table.
127*37748cd8SNickeau     *
128*37748cd8SNickeau     * - {@see NoViableAltException}: Dispatches the call to
129*37748cd8SNickeau     * {@see reportNoViableAlternative}
130*37748cd8SNickeau     * - {@see InputMismatchException}: Dispatches the call to
131*37748cd8SNickeau     * {@see reportInputMismatch}
132*37748cd8SNickeau     * - {@see FailedPredicateException}: Dispatches the call to
133*37748cd8SNickeau     * {@see reportFailedPredicate}
134*37748cd8SNickeau     * - All other types: calls {@see Parser#notifyErrorListeners} to report
135*37748cd8SNickeau     * the exception
136*37748cd8SNickeau     */
137*37748cd8SNickeau    public function reportError(Parser $recognizer, RecognitionException $e) : void
138*37748cd8SNickeau    {
139*37748cd8SNickeau        // if we've already reported an error and have not matched a token
140*37748cd8SNickeau        // yet successfully, don't report any errors.
141*37748cd8SNickeau        if ($this->inErrorRecoveryMode($recognizer)) {
142*37748cd8SNickeau            // don't report spurious errors
143*37748cd8SNickeau            return;
144*37748cd8SNickeau        }
145*37748cd8SNickeau
146*37748cd8SNickeau        $this->beginErrorCondition($recognizer);
147*37748cd8SNickeau
148*37748cd8SNickeau        if ($e instanceof NoViableAltException) {
149*37748cd8SNickeau            $this->reportNoViableAlternative($recognizer, $e);
150*37748cd8SNickeau        } elseif ($e instanceof InputMismatchException) {
151*37748cd8SNickeau            $this->reportInputMismatch($recognizer, $e);
152*37748cd8SNickeau        } elseif ($e instanceof FailedPredicateException) {
153*37748cd8SNickeau            $this->reportFailedPredicate($recognizer, $e);
154*37748cd8SNickeau        } else {
155*37748cd8SNickeau            $recognizer->notifyErrorListeners($e->getMessage(), $e->getOffendingToken(), $e);
156*37748cd8SNickeau        }
157*37748cd8SNickeau    }
158*37748cd8SNickeau
159*37748cd8SNickeau    /**
160*37748cd8SNickeau     * {@inheritdoc}
161*37748cd8SNickeau     *
162*37748cd8SNickeau     * The default implementation resynchronizes the parser by consuming tokens
163*37748cd8SNickeau     * until we find one in the resynchronization set--loosely the set of tokens
164*37748cd8SNickeau     * that can follow the current rule.
165*37748cd8SNickeau     */
166*37748cd8SNickeau    public function recover(Parser $recognizer, RecognitionException $e) : void
167*37748cd8SNickeau    {
168*37748cd8SNickeau        $inputStream = $recognizer->getInputStream();
169*37748cd8SNickeau
170*37748cd8SNickeau        if ($inputStream === null) {
171*37748cd8SNickeau            throw new \RuntimeException('Unexpected null input stream.');
172*37748cd8SNickeau        }
173*37748cd8SNickeau
174*37748cd8SNickeau        if ($this->lastErrorStates !== null
175*37748cd8SNickeau            && $this->lastErrorIndex === $inputStream->getIndex()
176*37748cd8SNickeau            && $this->lastErrorStates->contains($recognizer->getState())
177*37748cd8SNickeau        ) {
178*37748cd8SNickeau            // uh oh, another error at same token index and previously-visited
179*37748cd8SNickeau            // state in ATN; must be a case where LT(1) is in the recovery
180*37748cd8SNickeau            // token set so nothing got consumed. Consume a single token
181*37748cd8SNickeau            // at least to prevent an infinite loop; this is a failsafe.
182*37748cd8SNickeau            $recognizer->consume();
183*37748cd8SNickeau        }
184*37748cd8SNickeau
185*37748cd8SNickeau        $this->lastErrorIndex = $inputStream->getIndex();
186*37748cd8SNickeau
187*37748cd8SNickeau        if ($this->lastErrorStates === null) {
188*37748cd8SNickeau            $this->lastErrorStates = new IntervalSet();
189*37748cd8SNickeau        }
190*37748cd8SNickeau
191*37748cd8SNickeau        $this->lastErrorStates->addOne($recognizer->getState());
192*37748cd8SNickeau
193*37748cd8SNickeau        $followSet = $this->getErrorRecoverySet($recognizer);
194*37748cd8SNickeau
195*37748cd8SNickeau        $this->consumeUntil($recognizer, $followSet);
196*37748cd8SNickeau    }
197*37748cd8SNickeau
198*37748cd8SNickeau    /**
199*37748cd8SNickeau     * The default implementation of {@see ANTLRErrorStrategy::sync()} makes sure
200*37748cd8SNickeau     * that the current lookahead symbol is consistent with what were expecting
201*37748cd8SNickeau     * at this point in the ATN. You can call this anytime but ANTLR only
202*37748cd8SNickeau     * generates code to check before subrules/loops and each iteration.
203*37748cd8SNickeau     *
204*37748cd8SNickeau     * Implements Jim Idle's magic sync mechanism in closures and optional
205*37748cd8SNickeau     * subrules. E.g.,
206*37748cd8SNickeau     *
207*37748cd8SNickeau     *     a : sync ( stuff sync )* ;
208*37748cd8SNickeau     *     sync : {consume to what can follow sync} ;
209*37748cd8SNickeau     *
210*37748cd8SNickeau     * At the start of a sub rule upon error, {@see sync} performs single
211*37748cd8SNickeau     * token deletion, if possible. If it can't do that, it bails on the current
212*37748cd8SNickeau     * rule and uses the default error recovery, which consumes until the
213*37748cd8SNickeau     * resynchronization set of the current rule.
214*37748cd8SNickeau     *
215*37748cd8SNickeau     * If the sub rule is optional (`(...)?`, `(...)*`, or block
216*37748cd8SNickeau     * with an empty alternative), then the expected set includes what follows
217*37748cd8SNickeau     * the subrule.
218*37748cd8SNickeau     *
219*37748cd8SNickeau     * During loop iteration, it consumes until it sees a token that can start a
220*37748cd8SNickeau     * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
221*37748cd8SNickeau     * stay in the loop as long as possible.
222*37748cd8SNickeau     *
223*37748cd8SNickeau     * ORIGINS
224*37748cd8SNickeau     *
225*37748cd8SNickeau     * Previous versions of ANTLR did a poor job of their recovery within loops.
226*37748cd8SNickeau     * A single mismatch token or missing token would force the parser to bail
227*37748cd8SNickeau     * out of the entire rules surrounding the loop. So, for rule
228*37748cd8SNickeau     *
229*37748cd8SNickeau     *     classDef : 'class' ID '{' member* '}'
230*37748cd8SNickeau     *
231*37748cd8SNickeau     * input with an extra token between members would force the parser to
232*37748cd8SNickeau     * consume until it found the next class definition rather than the next
233*37748cd8SNickeau     * member definition of the current class.
234*37748cd8SNickeau     *
235*37748cd8SNickeau     * This functionality cost a little bit of effort because the parser has to
236*37748cd8SNickeau     * compare token set at the start of the loop and at each iteration. If for
237*37748cd8SNickeau     * some reason speed is suffering for you, you can turn off this
238*37748cd8SNickeau     * functionality by simply overriding this method as a blank { }.
239*37748cd8SNickeau     *
240*37748cd8SNickeau     * @throws RecognitionException
241*37748cd8SNickeau     */
242*37748cd8SNickeau    public function sync(Parser $recognizer) : void
243*37748cd8SNickeau    {
244*37748cd8SNickeau        $interpreter = $recognizer->getInterpreter();
245*37748cd8SNickeau
246*37748cd8SNickeau        if ($interpreter === null) {
247*37748cd8SNickeau            throw new \RuntimeException('Unexpected null interpreter.');
248*37748cd8SNickeau        }
249*37748cd8SNickeau
250*37748cd8SNickeau        /** @var ATNState $s */
251*37748cd8SNickeau        $s = $interpreter->atn->states[$recognizer->getState()];
252*37748cd8SNickeau
253*37748cd8SNickeau        // If already recovering, don't try to sync
254*37748cd8SNickeau        if ($this->inErrorRecoveryMode($recognizer)) {
255*37748cd8SNickeau            return;
256*37748cd8SNickeau        }
257*37748cd8SNickeau
258*37748cd8SNickeau        $tokens = $recognizer->getInputStream();
259*37748cd8SNickeau
260*37748cd8SNickeau        if ($tokens === null) {
261*37748cd8SNickeau            throw new \RuntimeException('Unexpected null input stream.');
262*37748cd8SNickeau        }
263*37748cd8SNickeau
264*37748cd8SNickeau        $la = $tokens->LA(1);
265*37748cd8SNickeau
266*37748cd8SNickeau        // try cheaper subset first; might get lucky. seems to shave a wee bit off
267*37748cd8SNickeau        $nextTokens = $recognizer->getATN()->nextTokens($s);
268*37748cd8SNickeau
269*37748cd8SNickeau        if ($nextTokens->contains($la)) {
270*37748cd8SNickeau            // We are sure the token matches
271*37748cd8SNickeau            $this->nextTokensContext = null;
272*37748cd8SNickeau            $this->nextTokensState = ATNState::INVALID_STATE_NUMBER;
273*37748cd8SNickeau
274*37748cd8SNickeau            return;
275*37748cd8SNickeau        }
276*37748cd8SNickeau
277*37748cd8SNickeau        if ($nextTokens->contains(Token::EPSILON)) {
278*37748cd8SNickeau            if ($this->nextTokensContext === null) {
279*37748cd8SNickeau                // It's possible the next token won't match; information tracked
280*37748cd8SNickeau                // by sync is restricted for performance.
281*37748cd8SNickeau                $this->nextTokensContext = $recognizer->getContext();
282*37748cd8SNickeau                $this->nextTokensState = $recognizer->getState();
283*37748cd8SNickeau            }
284*37748cd8SNickeau            return;
285*37748cd8SNickeau        }
286*37748cd8SNickeau
287*37748cd8SNickeau        switch ($s->getStateType()) {
288*37748cd8SNickeau            case ATNState::BLOCK_START:
289*37748cd8SNickeau            case ATNState::STAR_BLOCK_START:
290*37748cd8SNickeau            case ATNState::PLUS_BLOCK_START:
291*37748cd8SNickeau            case ATNState::STAR_LOOP_ENTRY:
292*37748cd8SNickeau                // report error and recover if possible
293*37748cd8SNickeau                if ($this->singleTokenDeletion($recognizer) !== null) {
294*37748cd8SNickeau                    return;
295*37748cd8SNickeau                }
296*37748cd8SNickeau
297*37748cd8SNickeau                throw new InputMismatchException($recognizer);
298*37748cd8SNickeau
299*37748cd8SNickeau            case ATNState::PLUS_LOOP_BACK:
300*37748cd8SNickeau            case ATNState::STAR_LOOP_BACK:
301*37748cd8SNickeau                $this->reportUnwantedToken($recognizer);
302*37748cd8SNickeau                $expecting = $recognizer->getExpectedTokens();
303*37748cd8SNickeau                $whatFollowsLoopIterationOrRule = $expecting->orSet($this->getErrorRecoverySet($recognizer));
304*37748cd8SNickeau                $this->consumeUntil($recognizer, $whatFollowsLoopIterationOrRule);
305*37748cd8SNickeau                break;
306*37748cd8SNickeau
307*37748cd8SNickeau            default:
308*37748cd8SNickeau                // do nothing if we can't identify the exact kind of ATN state
309*37748cd8SNickeau                break;
310*37748cd8SNickeau        }
311*37748cd8SNickeau    }
312*37748cd8SNickeau
313*37748cd8SNickeau    /**
314*37748cd8SNickeau     * This is called by {@see DefaultErrorStrategy::reportError()} when
315*37748cd8SNickeau     * the exception is a {@see NoViableAltException}.
316*37748cd8SNickeau     *
317*37748cd8SNickeau     * @param Parser               $recognizer The parser instance.
318*37748cd8SNickeau     * @param NoViableAltException $e          The recognition exception.
319*37748cd8SNickeau     *
320*37748cd8SNickeau     * @see DefaultErrorStrategy::reportError()
321*37748cd8SNickeau     */
322*37748cd8SNickeau    protected function reportNoViableAlternative(Parser $recognizer, NoViableAltException $e) : void
323*37748cd8SNickeau    {
324*37748cd8SNickeau        $tokens = $recognizer->getTokenStream();
325*37748cd8SNickeau
326*37748cd8SNickeau        $input = '<unknown input>';
327*37748cd8SNickeau
328*37748cd8SNickeau        if ($tokens !== null) {
329*37748cd8SNickeau            $startToken = $e->getStartToken();
330*37748cd8SNickeau
331*37748cd8SNickeau            if ($startToken === null) {
332*37748cd8SNickeau                throw new \RuntimeException('Unexpected null start token.');
333*37748cd8SNickeau            }
334*37748cd8SNickeau
335*37748cd8SNickeau            if ($startToken->getType() === Token::EOF) {
336*37748cd8SNickeau                $input = '<EOF>';
337*37748cd8SNickeau            } else {
338*37748cd8SNickeau                $input = $tokens->getTextByTokens($e->getStartToken(), $e->getOffendingToken());
339*37748cd8SNickeau            }
340*37748cd8SNickeau        }
341*37748cd8SNickeau
342*37748cd8SNickeau        $msg = \sprintf('no viable alternative at input %s', $this->escapeWSAndQuote($input));
343*37748cd8SNickeau
344*37748cd8SNickeau        $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e);
345*37748cd8SNickeau    }
346*37748cd8SNickeau
347*37748cd8SNickeau    /**
348*37748cd8SNickeau     * This is called by {@see DefaultErrorStrategy::reportError()} when
349*37748cd8SNickeau     * the exception is an {@see InputMismatchException}.
350*37748cd8SNickeau     *
351*37748cd8SNickeau     * @param Parser                 $recognizer The parser instance.
352*37748cd8SNickeau     * @param InputMismatchException $e          The recognition exception.
353*37748cd8SNickeau     *
354*37748cd8SNickeau     * @see DefaultErrorStrategy::reportError()
355*37748cd8SNickeau     */
356*37748cd8SNickeau    protected function reportInputMismatch(Parser $recognizer, InputMismatchException $e) : void
357*37748cd8SNickeau    {
358*37748cd8SNickeau        $expectedTokens = $e->getExpectedTokens();
359*37748cd8SNickeau
360*37748cd8SNickeau        if ($expectedTokens === null) {
361*37748cd8SNickeau            throw new \RuntimeException('Unexpected null expected tokens.');
362*37748cd8SNickeau        }
363*37748cd8SNickeau
364*37748cd8SNickeau        $msg = \sprintf(
365*37748cd8SNickeau            'mismatched input %s expecting %s',
366*37748cd8SNickeau            $this->getTokenErrorDisplay($e->getOffendingToken()),
367*37748cd8SNickeau            $expectedTokens->toStringVocabulary($recognizer->getVocabulary())
368*37748cd8SNickeau        );
369*37748cd8SNickeau
370*37748cd8SNickeau        $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e);
371*37748cd8SNickeau    }
372*37748cd8SNickeau
373*37748cd8SNickeau    /**
374*37748cd8SNickeau     * This is called by {@see DefaultErrorStrategy::reportError()} when
375*37748cd8SNickeau     * the exception is a {@see FailedPredicateException}.
376*37748cd8SNickeau     *
377*37748cd8SNickeau     * @param Parser                   $recognizer The parser instance.
378*37748cd8SNickeau     * @param FailedPredicateException $e          The recognition exception.
379*37748cd8SNickeau     *
380*37748cd8SNickeau     * @see DefaultErrorStrategy::reportError()
381*37748cd8SNickeau     */
382*37748cd8SNickeau    protected function reportFailedPredicate(Parser $recognizer, FailedPredicateException $e) : void
383*37748cd8SNickeau    {
384*37748cd8SNickeau        $msg = \sprintf('rule %s %s', $recognizer->getCurrentRuleName(), $e->getMessage());
385*37748cd8SNickeau
386*37748cd8SNickeau        $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e);
387*37748cd8SNickeau    }
388*37748cd8SNickeau
389*37748cd8SNickeau    /**
390*37748cd8SNickeau     * This method is called to report a syntax error which requires the removal
391*37748cd8SNickeau     * of a token from the input stream. At the time this method is called, the
392*37748cd8SNickeau     * erroneous symbol is current `LT(1)` symbol and has not yet been
393*37748cd8SNickeau     * removed from the input stream. When this method returns,
394*37748cd8SNickeau     * `$recognizer` is in error recovery mode.
395*37748cd8SNickeau     *
396*37748cd8SNickeau     * This method is called when {@see DefaultErrorStrategy::singleTokenDeletion()}
397*37748cd8SNickeau     * identifies single-token deletion as a viable recovery strategy for
398*37748cd8SNickeau     * a mismatched input error.
399*37748cd8SNickeau     *
400*37748cd8SNickeau     * The default implementation simply returns if the handler is already in
401*37748cd8SNickeau     * error recovery mode. Otherwise, it calls
402*37748cd8SNickeau     * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error
403*37748cd8SNickeau     * recovery mode, followed by calling {@see Parser::notifyErrorListeners}.
404*37748cd8SNickeau     *
405*37748cd8SNickeau     * @param Parser $recognizer The parser instance.
406*37748cd8SNickeau     */
407*37748cd8SNickeau    protected function reportUnwantedToken(Parser $recognizer) : void
408*37748cd8SNickeau    {
409*37748cd8SNickeau        if ($this->inErrorRecoveryMode($recognizer)) {
410*37748cd8SNickeau            return;
411*37748cd8SNickeau        }
412*37748cd8SNickeau
413*37748cd8SNickeau        $this->beginErrorCondition($recognizer);
414*37748cd8SNickeau
415*37748cd8SNickeau        $t = $recognizer->getCurrentToken();
416*37748cd8SNickeau        $tokenName = $this->getTokenErrorDisplay($t);
417*37748cd8SNickeau        $expecting = $this->getExpectedTokens($recognizer);
418*37748cd8SNickeau
419*37748cd8SNickeau        $msg = \sprintf(
420*37748cd8SNickeau            'extraneous input %s expecting %s',
421*37748cd8SNickeau            $tokenName,
422*37748cd8SNickeau            $expecting->toStringVocabulary($recognizer->getVocabulary())
423*37748cd8SNickeau        );
424*37748cd8SNickeau
425*37748cd8SNickeau        $recognizer->notifyErrorListeners($msg, $t);
426*37748cd8SNickeau    }
427*37748cd8SNickeau
428*37748cd8SNickeau    /**
429*37748cd8SNickeau     * This method is called to report a syntax error which requires the
430*37748cd8SNickeau     * insertion of a missing token into the input stream. At the time this
431*37748cd8SNickeau     * method is called, the missing token has not yet been inserted. When this
432*37748cd8SNickeau     * method returns, `$recognizer` is in error recovery mode.
433*37748cd8SNickeau     *
434*37748cd8SNickeau     * This method is called when {@see DefaultErrorStrategy::singleTokenInsertion()}
435*37748cd8SNickeau     * identifies single-token insertion as a viable recovery strategy for
436*37748cd8SNickeau     * a mismatched input error.
437*37748cd8SNickeau     *
438*37748cd8SNickeau     * The default implementation simply returns if the handler is already in
439*37748cd8SNickeau     * error recovery mode. Otherwise, it calls
440*37748cd8SNickeau     * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error
441*37748cd8SNickeau     * recovery mode, followed by calling {@see Parser::notifyErrorListeners()}.
442*37748cd8SNickeau     *
443*37748cd8SNickeau     * @param Parser $recognizer the parser instance
444*37748cd8SNickeau     */
445*37748cd8SNickeau    protected function reportMissingToken(Parser $recognizer) : void
446*37748cd8SNickeau    {
447*37748cd8SNickeau        if ($this->inErrorRecoveryMode($recognizer)) {
448*37748cd8SNickeau            return;
449*37748cd8SNickeau        }
450*37748cd8SNickeau
451*37748cd8SNickeau        $this->beginErrorCondition($recognizer);
452*37748cd8SNickeau
453*37748cd8SNickeau        $t = $recognizer->getCurrentToken();
454*37748cd8SNickeau        $expecting = $this->getExpectedTokens($recognizer);
455*37748cd8SNickeau
456*37748cd8SNickeau        $msg = \sprintf(
457*37748cd8SNickeau            'missing %s at %s',
458*37748cd8SNickeau            $expecting->toStringVocabulary($recognizer->getVocabulary()),
459*37748cd8SNickeau            $this->getTokenErrorDisplay($t)
460*37748cd8SNickeau        );
461*37748cd8SNickeau
462*37748cd8SNickeau        $recognizer->notifyErrorListeners($msg, $t);
463*37748cd8SNickeau    }
464*37748cd8SNickeau
465*37748cd8SNickeau    /**
466*37748cd8SNickeau     * {@inheritdoc}
467*37748cd8SNickeau     *
468*37748cd8SNickeau     * The default implementation attempts to recover from the mismatched input
469*37748cd8SNickeau     * by using single token insertion and deletion as described below. If the
470*37748cd8SNickeau     * recovery attempt fails, this method throws an
471*37748cd8SNickeau     * {@see InputMismatchException}.
472*37748cd8SNickeau     *
473*37748cd8SNickeau     * EXTRA TOKEN (single token deletion)
474*37748cd8SNickeau     *
475*37748cd8SNickeau     * `LA(1)` is not what we are looking for. If `LA(2)` has the
476*37748cd8SNickeau     * right token, however, then assume `LA(1)` is some extra spurious
477*37748cd8SNickeau     * token and delete it. Then consume and return the next token (which was
478*37748cd8SNickeau     * the `LA(2)` token) as the successful result of the match operation.
479*37748cd8SNickeau     *
480*37748cd8SNickeau     * This recovery strategy is implemented by
481*37748cd8SNickeau     * {@see DefaultErrorStrategy::singleTokenDeletion()}.
482*37748cd8SNickeau     *
483*37748cd8SNickeau     * MISSING TOKEN (single token insertion)
484*37748cd8SNickeau     *
485*37748cd8SNickeau     * If current token (at `LA(1)`) is consistent with what could come
486*37748cd8SNickeau     * after the expected `LA(1)` token, then assume the token is missing
487*37748cd8SNickeau     * and use the parser's {@see TokenFactory} to create it on the fly. The
488*37748cd8SNickeau     * "insertion" is performed by returning the created token as the successful
489*37748cd8SNickeau     * result of the match operation.
490*37748cd8SNickeau     *
491*37748cd8SNickeau     * This recovery strategy is implemented by
492*37748cd8SNickeau     * {@see DefaultErrorStrategy::singleTokenInsertion()}.
493*37748cd8SNickeau     *
494*37748cd8SNickeau     * EXAMPLE
495*37748cd8SNickeau     *
496*37748cd8SNickeau     * For example, Input `i=(3;` is clearly missing the `')'`. When
497*37748cd8SNickeau     * the parser returns from the nested call to `expr`, it will have
498*37748cd8SNickeau     * call chain:
499*37748cd8SNickeau     *
500*37748cd8SNickeau     *     stat &rarr; expr &rarr; atom
501*37748cd8SNickeau     *
502*37748cd8SNickeau     * and it will be trying to match the `')'` at this point in the
503*37748cd8SNickeau     * derivation:
504*37748cd8SNickeau     *
505*37748cd8SNickeau     *     =&gt; ID '=' '(' INT ')' ('+' atom)* ';'
506*37748cd8SNickeau     *                        ^
507*37748cd8SNickeau     *
508*37748cd8SNickeau     * The attempt to match `')'` will fail when it sees `';'` and call
509*37748cd8SNickeau     * {@see DefaultErrorStrategy::recoverInline()}. To recover, it sees that
510*37748cd8SNickeau     * `LA(1)==';'` is in the set of tokens that can follow the `')'` token
511*37748cd8SNickeau     * reference in rule `atom`. It can assume that you forgot the `')'`.
512*37748cd8SNickeau     *
513*37748cd8SNickeau     * @throws RecognitionException
514*37748cd8SNickeau     */
515*37748cd8SNickeau    public function recoverInline(Parser $recognizer) : Token
516*37748cd8SNickeau    {
517*37748cd8SNickeau        // SINGLE TOKEN DELETION
518*37748cd8SNickeau        $matchedSymbol = $this->singleTokenDeletion($recognizer);
519*37748cd8SNickeau
520*37748cd8SNickeau        if ($matchedSymbol !== null) {
521*37748cd8SNickeau            // we have deleted the extra token.
522*37748cd8SNickeau            // now, move past ttype token as if all were ok
523*37748cd8SNickeau            $recognizer->consume();
524*37748cd8SNickeau
525*37748cd8SNickeau            return $matchedSymbol;
526*37748cd8SNickeau        }
527*37748cd8SNickeau
528*37748cd8SNickeau        // SINGLE TOKEN INSERTION
529*37748cd8SNickeau        if ($this->singleTokenInsertion($recognizer)) {
530*37748cd8SNickeau            return $this->getMissingSymbol($recognizer);
531*37748cd8SNickeau        }
532*37748cd8SNickeau
533*37748cd8SNickeau        // even that didn't work; must throw the exception
534*37748cd8SNickeau        if ($this->nextTokensContext === null) {
535*37748cd8SNickeau            throw new InputMismatchException($recognizer);
536*37748cd8SNickeau        }
537*37748cd8SNickeau
538*37748cd8SNickeau        throw new InputMismatchException($recognizer, $this->nextTokensState, $this->nextTokensContext);
539*37748cd8SNickeau    }
540*37748cd8SNickeau
541*37748cd8SNickeau    /**
542*37748cd8SNickeau     * This method implements the single-token insertion inline error recovery
543*37748cd8SNickeau     * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()}
544*37748cd8SNickeau     * if the single-token deletion strategy fails to recover from the mismatched
545*37748cd8SNickeau     * input. If this method returns `true`, `$recognizer` will be in error
546*37748cd8SNickeau     * recovery mode.
547*37748cd8SNickeau     *
548*37748cd8SNickeau     * This method determines whether or not single-token insertion is viable by
549*37748cd8SNickeau     * checking if the `LA(1)` input symbol could be successfully matched
550*37748cd8SNickeau     * if it were instead the `LA(2)` symbol. If this method returns
551*37748cd8SNickeau     * `true`, the caller is responsible for creating and inserting a
552*37748cd8SNickeau     * token with the correct type to produce this behavior.
553*37748cd8SNickeau     *
554*37748cd8SNickeau     * @param Parser $recognizer The parser instance.
555*37748cd8SNickeau     *
556*37748cd8SNickeau     * @return bool `true` If single-token insertion is a viable recovery
557*37748cd8SNickeau     *              strategy for the current mismatched input, otherwise `false`.
558*37748cd8SNickeau     */
559*37748cd8SNickeau    protected function singleTokenInsertion(Parser $recognizer) : bool
560*37748cd8SNickeau    {
561*37748cd8SNickeau        $stream = $recognizer->getInputStream();
562*37748cd8SNickeau
563*37748cd8SNickeau        if ($stream === null) {
564*37748cd8SNickeau            throw new \RuntimeException('Unexpected null input stream.');
565*37748cd8SNickeau        }
566*37748cd8SNickeau
567*37748cd8SNickeau        $interpreter = $recognizer->getInterpreter();
568*37748cd8SNickeau
569*37748cd8SNickeau        if ($interpreter === null) {
570*37748cd8SNickeau            throw new \RuntimeException('Unexpected null interpreter.');
571*37748cd8SNickeau        }
572*37748cd8SNickeau
573*37748cd8SNickeau        $currentSymbolType = $stream->LA(1);
574*37748cd8SNickeau
575*37748cd8SNickeau        // if current token is consistent with what could come after current
576*37748cd8SNickeau        // ATN state, then we know we're missing a token; error recovery
577*37748cd8SNickeau        // is free to conjure up and insert the missing token
578*37748cd8SNickeau
579*37748cd8SNickeau        $atn = $interpreter->atn;
580*37748cd8SNickeau        /** @var ATNState $currentState */
581*37748cd8SNickeau        $currentState = $atn->states[$recognizer->getState()];
582*37748cd8SNickeau        $next = $currentState->getTransition(0)->target;
583*37748cd8SNickeau        $expectingAtLL2 = $atn->nextTokensInContext($next, $recognizer->getContext());
584*37748cd8SNickeau
585*37748cd8SNickeau        if ($expectingAtLL2->contains($currentSymbolType)) {
586*37748cd8SNickeau            $this->reportMissingToken($recognizer);
587*37748cd8SNickeau
588*37748cd8SNickeau            return true;
589*37748cd8SNickeau        }
590*37748cd8SNickeau
591*37748cd8SNickeau        return false;
592*37748cd8SNickeau    }
593*37748cd8SNickeau
594*37748cd8SNickeau    /**
595*37748cd8SNickeau     * This method implements the single-token deletion inline error recovery
596*37748cd8SNickeau     * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()}
597*37748cd8SNickeau     * to attempt to recover from mismatched input. If this method returns null,
598*37748cd8SNickeau     * the parser and error handler state will not have changed. If this method
599*37748cd8SNickeau     * returns non-null, `$recognizer` will _not_ be in error recovery mode
600*37748cd8SNickeau     * since the returned token was a successful match.
601*37748cd8SNickeau     *
602*37748cd8SNickeau     * If the single-token deletion is successful, this method calls
603*37748cd8SNickeau     * {@see DefaultErrorStrategy::reportUnwantedToken()} to report the error,
604*37748cd8SNickeau     * followed by {@see Parser::consume()} to actually "delete" the extraneous
605*37748cd8SNickeau     * token. Then, before returning {@see DefaultErrorStrategy::reportMatch()}
606*37748cd8SNickeau     * is called to signal a successful match.
607*37748cd8SNickeau     *
608*37748cd8SNickeau     * @param Parser $recognizer The parser instance.
609*37748cd8SNickeau     *
610*37748cd8SNickeau     * @return Token The successfully matched {@see Token} instance if
611*37748cd8SNickeau     *               single-token deletion successfully recovers from
612*37748cd8SNickeau     *               the mismatched input, otherwise `null`.
613*37748cd8SNickeau     */
614*37748cd8SNickeau    protected function singleTokenDeletion(Parser $recognizer) : ?Token
615*37748cd8SNickeau    {
616*37748cd8SNickeau        $inputStream = $recognizer->getInputStream();
617*37748cd8SNickeau
618*37748cd8SNickeau        if ($inputStream === null) {
619*37748cd8SNickeau            throw new \RuntimeException('Unexpected null input stream.');
620*37748cd8SNickeau        }
621*37748cd8SNickeau
622*37748cd8SNickeau        $nextTokenType = $inputStream->LA(2);
623*37748cd8SNickeau        $expecting = $this->getExpectedTokens($recognizer);
624*37748cd8SNickeau
625*37748cd8SNickeau        if ($expecting->contains($nextTokenType)) {
626*37748cd8SNickeau            $this->reportUnwantedToken($recognizer);
627*37748cd8SNickeau            $recognizer->consume(); // simply delete extra token
628*37748cd8SNickeau            // we want to return the token we're actually matching
629*37748cd8SNickeau            $matchedSymbol = $recognizer->getCurrentToken();
630*37748cd8SNickeau            $this->reportMatch($recognizer);  // we know current token is correct
631*37748cd8SNickeau
632*37748cd8SNickeau            return $matchedSymbol;
633*37748cd8SNickeau        }
634*37748cd8SNickeau
635*37748cd8SNickeau        return null;
636*37748cd8SNickeau    }
637*37748cd8SNickeau
638*37748cd8SNickeau    /** Conjure up a missing token during error recovery.
639*37748cd8SNickeau     *
640*37748cd8SNickeau     *  The recognizer attempts to recover from single missing
641*37748cd8SNickeau     *  symbols. But, actions might refer to that missing symbol.
642*37748cd8SNickeau     *  For example, x=ID {f($x);}. The action clearly assumes
643*37748cd8SNickeau     *  that there has been an identifier matched previously and that
644*37748cd8SNickeau     *  $x points at that token. If that token is missing, but
645*37748cd8SNickeau     *  the next token in the stream is what we want we assume that
646*37748cd8SNickeau     *  this token is missing and we keep going. Because we
647*37748cd8SNickeau     *  have to return some token to replace the missing token,
648*37748cd8SNickeau     *  we have to conjure one up. This method gives the user control
649*37748cd8SNickeau     *  over the tokens returned for missing tokens. Mostly,
650*37748cd8SNickeau     *  you will want to create something special for identifier
651*37748cd8SNickeau     *  tokens. For literals such as '{' and ',', the default
652*37748cd8SNickeau     *  action in the parser or tree parser works. It simply creates
653*37748cd8SNickeau     *  a CommonToken of the appropriate type. The text will be the token.
654*37748cd8SNickeau     *  If you change what tokens must be created by the lexer,
655*37748cd8SNickeau     *  override this method to create the appropriate tokens.
656*37748cd8SNickeau     */
657*37748cd8SNickeau    protected function getMissingSymbol(Parser $recognizer) : Token
658*37748cd8SNickeau    {
659*37748cd8SNickeau        $currentSymbol = $recognizer->getCurrentToken();
660*37748cd8SNickeau
661*37748cd8SNickeau        if ($currentSymbol === null) {
662*37748cd8SNickeau            throw new \RuntimeException('Unexpected null current token.');
663*37748cd8SNickeau        }
664*37748cd8SNickeau
665*37748cd8SNickeau        $inputStream = $recognizer->getInputStream();
666*37748cd8SNickeau
667*37748cd8SNickeau        if ($inputStream === null) {
668*37748cd8SNickeau            throw new \RuntimeException('Unexpected null input stream.');
669*37748cd8SNickeau        }
670*37748cd8SNickeau
671*37748cd8SNickeau        $tokenSource = $currentSymbol->getTokenSource();
672*37748cd8SNickeau
673*37748cd8SNickeau        if ($tokenSource === null) {
674*37748cd8SNickeau            throw new \RuntimeException('Unexpected null token source.');
675*37748cd8SNickeau        }
676*37748cd8SNickeau
677*37748cd8SNickeau        $expecting = $this->getExpectedTokens($recognizer);
678*37748cd8SNickeau
679*37748cd8SNickeau        $expectedTokenType = Token::INVALID_TYPE;
680*37748cd8SNickeau
681*37748cd8SNickeau        if (!$expecting->isNull()) {
682*37748cd8SNickeau            $expectedTokenType = $expecting->getMinElement(); // get any element
683*37748cd8SNickeau        }
684*37748cd8SNickeau
685*37748cd8SNickeau        if ($expectedTokenType === Token::EOF) {
686*37748cd8SNickeau            $tokenText = '<missing EOF>';
687*37748cd8SNickeau        } else {
688*37748cd8SNickeau            $tokenText = \sprintf('<missing %s>', $recognizer->getVocabulary()->getDisplayName($expectedTokenType));
689*37748cd8SNickeau        }
690*37748cd8SNickeau
691*37748cd8SNickeau        $current = $currentSymbol;
692*37748cd8SNickeau        $lookback = $inputStream->LT(-1);
693*37748cd8SNickeau
694*37748cd8SNickeau        if ($current->getType() === Token::EOF && $lookback !== null) {
695*37748cd8SNickeau            $current = $lookback;
696*37748cd8SNickeau        }
697*37748cd8SNickeau
698*37748cd8SNickeau        return $recognizer->getTokenFactory()->createEx(
699*37748cd8SNickeau            new Pair(
700*37748cd8SNickeau                $tokenSource,
701*37748cd8SNickeau                $tokenSource->getInputStream()
702*37748cd8SNickeau            ),
703*37748cd8SNickeau            $expectedTokenType,
704*37748cd8SNickeau            $tokenText,
705*37748cd8SNickeau            Token::DEFAULT_CHANNEL,
706*37748cd8SNickeau            -1,
707*37748cd8SNickeau            -1,
708*37748cd8SNickeau            $current->getLine(),
709*37748cd8SNickeau            $current->getCharPositionInLine()
710*37748cd8SNickeau        );
711*37748cd8SNickeau    }
712*37748cd8SNickeau
713*37748cd8SNickeau    protected function getExpectedTokens(Parser $recognizer) : IntervalSet
714*37748cd8SNickeau    {
715*37748cd8SNickeau        return $recognizer->getExpectedTokens();
716*37748cd8SNickeau    }
717*37748cd8SNickeau
718*37748cd8SNickeau    /**
719*37748cd8SNickeau     * How should a token be displayed in an error message? The default
720*37748cd8SNickeau     * is to display just the text, but during development you might
721*37748cd8SNickeau     * want to have a lot of information spit out.  Override in that case
722*37748cd8SNickeau     * to use (string) (which, for CommonToken, dumps everything about
723*37748cd8SNickeau     * the token). This is better than forcing you to override a method in
724*37748cd8SNickeau     * your token objects because you don't have to go modify your lexer
725*37748cd8SNickeau     * so that it creates a new Java type.
726*37748cd8SNickeau     */
727*37748cd8SNickeau    protected function getTokenErrorDisplay(?Token $t) : string
728*37748cd8SNickeau    {
729*37748cd8SNickeau        if ($t === null) {
730*37748cd8SNickeau            return '<no token>';
731*37748cd8SNickeau        }
732*37748cd8SNickeau
733*37748cd8SNickeau        $s = $this->getSymbolText($t);
734*37748cd8SNickeau
735*37748cd8SNickeau        if ($s === null) {
736*37748cd8SNickeau            if ($this->getSymbolType($t) === Token::EOF) {
737*37748cd8SNickeau                $s = '<EOF>';
738*37748cd8SNickeau            } else {
739*37748cd8SNickeau                $s = '<' . $this->getSymbolType($t) . '>';
740*37748cd8SNickeau            }
741*37748cd8SNickeau        }
742*37748cd8SNickeau
743*37748cd8SNickeau        return $this->escapeWSAndQuote($s);
744*37748cd8SNickeau    }
745*37748cd8SNickeau
746*37748cd8SNickeau    protected function getSymbolText(Token $symbol) : ?string
747*37748cd8SNickeau    {
748*37748cd8SNickeau        return $symbol->getText();
749*37748cd8SNickeau    }
750*37748cd8SNickeau
751*37748cd8SNickeau    protected function getSymbolType(Token $symbol) : int
752*37748cd8SNickeau    {
753*37748cd8SNickeau        return $symbol->getType();
754*37748cd8SNickeau    }
755*37748cd8SNickeau
756*37748cd8SNickeau    protected function escapeWSAndQuote(string $s) : string
757*37748cd8SNickeau    {
758*37748cd8SNickeau        return "'" . StringUtils::escapeWhitespace($s) . "'";
759*37748cd8SNickeau    }
760*37748cd8SNickeau
761*37748cd8SNickeau    /**
762*37748cd8SNickeau     * Compute the error recovery set for the current rule.  During
763*37748cd8SNickeau     * rule invocation, the parser pushes the set of tokens that can
764*37748cd8SNickeau     * follow that rule reference on the stack; this amounts to
765*37748cd8SNickeau     * computing FIRST of what follows the rule reference in the
766*37748cd8SNickeau     * enclosing rule. See LinearApproximator::FIRST.
767*37748cd8SNickeau     * This local follow set only includes tokens
768*37748cd8SNickeau     * from within the rule; i.e., the FIRST computation done by
769*37748cd8SNickeau     * ANTLR stops at the end of a rule.
770*37748cd8SNickeau     *
771*37748cd8SNickeau     * EXAMPLE
772*37748cd8SNickeau     *
773*37748cd8SNickeau     * When you find a "no viable alt exception", the input is not
774*37748cd8SNickeau     * consistent with any of the alternatives for rule r.  The best
775*37748cd8SNickeau     * thing to do is to consume tokens until you see something that
776*37748cd8SNickeau     * can legally follow a call to r *or* any rule that called r.
777*37748cd8SNickeau     * You don't want the exact set of viable next tokens because the
778*37748cd8SNickeau     * input might just be missing a token--you might consume the
779*37748cd8SNickeau     * rest of the input looking for one of the missing tokens.
780*37748cd8SNickeau     *
781*37748cd8SNickeau     * Consider grammar:
782*37748cd8SNickeau     *
783*37748cd8SNickeau     *     a : '[' b ']'
784*37748cd8SNickeau     *       | '(' b ')'
785*37748cd8SNickeau     *       ;
786*37748cd8SNickeau     *     b : c '^' INT ;
787*37748cd8SNickeau     *     c : ID
788*37748cd8SNickeau     *       | INT
789*37748cd8SNickeau     *       ;
790*37748cd8SNickeau     *
791*37748cd8SNickeau     * At each rule invocation, the set of tokens that could follow
792*37748cd8SNickeau     * that rule is pushed on a stack.  Here are the various
793*37748cd8SNickeau     * context-sensitive follow sets:
794*37748cd8SNickeau     *
795*37748cd8SNickeau     *     FOLLOW(b1_in_a) = FIRST(']') = ']'
796*37748cd8SNickeau     *     FOLLOW(b2_in_a) = FIRST(')') = ')'
797*37748cd8SNickeau     *     FOLLOW(c_in_b) = FIRST('^') = '^'
798*37748cd8SNickeau     *
799*37748cd8SNickeau     * Upon erroneous input "[]", the call chain is
800*37748cd8SNickeau     *
801*37748cd8SNickeau     *     a -> b -> c
802*37748cd8SNickeau     *
803*37748cd8SNickeau     * and, hence, the follow context stack is:
804*37748cd8SNickeau     *
805*37748cd8SNickeau     * depth | follow set | start of rule execution
806*37748cd8SNickeau     * ------|------------|-------------------------
807*37748cd8SNickeau     *   0   |   <EOF>    |    a (from main())
808*37748cd8SNickeau     *   1   |   ']'      |          b
809*37748cd8SNickeau     *   2   |   '^'      |          c
810*37748cd8SNickeau     *
811*37748cd8SNickeau     * Notice that ')' is not included, because b would have to have
812*37748cd8SNickeau     * been called from a different context in rule a for ')' to be
813*37748cd8SNickeau     * included.
814*37748cd8SNickeau     *
815*37748cd8SNickeau     * For error recovery, we cannot consider FOLLOW(c)
816*37748cd8SNickeau     * (context-sensitive or otherwise).  We need the combined set of
817*37748cd8SNickeau     * all context-sensitive FOLLOW sets--the set of all tokens that
818*37748cd8SNickeau     * could follow any reference in the call chain.  We need to
819*37748cd8SNickeau     * resync to one of those tokens.  Note that FOLLOW(c)='^' and if
820*37748cd8SNickeau     * we resync'd to that token, we'd consume until EOF.  We need to
821*37748cd8SNickeau     * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
822*37748cd8SNickeau     * In this case, for input "[]", LA(1) is ']' and in the set, so we would
823*37748cd8SNickeau     * not consume anything. After printing an error, rule c would
824*37748cd8SNickeau     * return normally.  Rule b would not find the required '^' though.
825*37748cd8SNickeau     * At this point, it gets a mismatched token error and throws an
826*37748cd8SNickeau     * exception (since LA(1) is not in the viable following token
827*37748cd8SNickeau     * set).  The rule exception handler tries to recover, but finds
828*37748cd8SNickeau     * the same recovery set and doesn't consume anything.  Rule b
829*37748cd8SNickeau     * exits normally returning to rule a.  Now it finds the ']' (and
830*37748cd8SNickeau     * with the successful match exits errorRecovery mode).
831*37748cd8SNickeau     *
832*37748cd8SNickeau     * So, you can see that the parser walks up the call chain looking
833*37748cd8SNickeau     * for the token that was a member of the recovery set.
834*37748cd8SNickeau     *
835*37748cd8SNickeau     * Errors are not generated in errorRecovery mode.
836*37748cd8SNickeau     *
837*37748cd8SNickeau     * ANTLR's error recovery mechanism is based upon original ideas:
838*37748cd8SNickeau     *
839*37748cd8SNickeau     * "Algorithms + Data Structures = Programs" by Niklaus Wirth
840*37748cd8SNickeau     *
841*37748cd8SNickeau     * and
842*37748cd8SNickeau     *
843*37748cd8SNickeau     * "A note on error recovery in recursive descent parsers":
844*37748cd8SNickeau     * http://portal.acm.org/citation.cfm?id=947902.947905
845*37748cd8SNickeau     *
846*37748cd8SNickeau     * Later, Josef Grosch had some good ideas:
847*37748cd8SNickeau     *
848*37748cd8SNickeau     * "Efficient and Comfortable Error Recovery in Recursive Descent
849*37748cd8SNickeau     * Parsers":
850*37748cd8SNickeau     * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
851*37748cd8SNickeau     *
852*37748cd8SNickeau     * Like Grosch I implement context-sensitive FOLLOW sets that are combined
853*37748cd8SNickeau     * at run-time upon error to avoid overhead during parsing.
854*37748cd8SNickeau     */
855*37748cd8SNickeau    protected function getErrorRecoverySet(Parser $recognizer) : IntervalSet
856*37748cd8SNickeau    {
857*37748cd8SNickeau        $interpreter = $recognizer->getInterpreter();
858*37748cd8SNickeau
859*37748cd8SNickeau        if ($interpreter === null) {
860*37748cd8SNickeau            throw new \RuntimeException('Unexpected null interpreter.');
861*37748cd8SNickeau        }
862*37748cd8SNickeau
863*37748cd8SNickeau        $atn = $interpreter->atn;
864*37748cd8SNickeau        $ctx = $recognizer->getContext();
865*37748cd8SNickeau        $recoverSet = new IntervalSet();
866*37748cd8SNickeau
867*37748cd8SNickeau        while ($ctx !== null && $ctx->invokingState >= 0) {
868*37748cd8SNickeau            // compute what follows who invoked us
869*37748cd8SNickeau            /** @var ATNState $invokingState */
870*37748cd8SNickeau            $invokingState = $atn->states[$ctx->invokingState];
871*37748cd8SNickeau            /** @var RuleTransition $rt */
872*37748cd8SNickeau            $rt = $invokingState->getTransition(0);
873*37748cd8SNickeau            $follow = $atn->nextTokens($rt->followState);
874*37748cd8SNickeau            $recoverSet->addSet($follow);
875*37748cd8SNickeau            $ctx = $ctx->getParent();
876*37748cd8SNickeau        }
877*37748cd8SNickeau
878*37748cd8SNickeau        $recoverSet->removeOne(Token::EPSILON);
879*37748cd8SNickeau
880*37748cd8SNickeau        return $recoverSet;
881*37748cd8SNickeau    }
882*37748cd8SNickeau
883*37748cd8SNickeau    /**
884*37748cd8SNickeau     * Consume tokens until one matches the given token set.
885*37748cd8SNickeau     */
886*37748cd8SNickeau    protected function consumeUntil(Parser $recognizer, IntervalSet $set) : void
887*37748cd8SNickeau    {
888*37748cd8SNickeau        $inputStream = $recognizer->getInputStream();
889*37748cd8SNickeau
890*37748cd8SNickeau        if ($inputStream === null) {
891*37748cd8SNickeau            throw new \RuntimeException('Unexpected null input stream.');
892*37748cd8SNickeau        }
893*37748cd8SNickeau
894*37748cd8SNickeau        $ttype = $inputStream->LA(1);
895*37748cd8SNickeau
896*37748cd8SNickeau        while ($ttype !== Token::EOF && !$set->contains($ttype)) {
897*37748cd8SNickeau            $recognizer->consume();
898*37748cd8SNickeau            $ttype = $inputStream->LA(1);
899*37748cd8SNickeau        }
900*37748cd8SNickeau    }
901*37748cd8SNickeau}
902