1*37748cd8SNickeau<?php 2*37748cd8SNickeau 3*37748cd8SNickeaudeclare(strict_types=1); 4*37748cd8SNickeau 5*37748cd8SNickeaunamespace Antlr\Antlr4\Runtime\Atn; 6*37748cd8SNickeau 7*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\SemanticContexts\SemanticContext; 8*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\States\ATNState; 9*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\States\BlockEndState; 10*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\States\BlockStartState; 11*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\States\DecisionState; 12*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\States\RuleStopState; 13*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\States\StarLoopEntryState; 14*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Transitions\ActionTransition; 15*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Transitions\EpsilonTransition; 16*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Transitions\PrecedencePredicateTransition; 17*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Transitions\PredicateTransition; 18*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition; 19*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Transitions\Transition; 20*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Dfa\DFA; 21*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Dfa\DFAState; 22*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Dfa\PredPrediction; 23*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Error\Exceptions\NoViableAltException; 24*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Interval; 25*37748cd8SNickeauuse Antlr\Antlr4\Runtime\IntervalSet; 26*37748cd8SNickeauuse Antlr\Antlr4\Runtime\IntStream; 27*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Parser; 28*37748cd8SNickeauuse Antlr\Antlr4\Runtime\ParserRuleContext; 29*37748cd8SNickeauuse Antlr\Antlr4\Runtime\PredictionContexts\PredictionContext; 30*37748cd8SNickeauuse Antlr\Antlr4\Runtime\PredictionContexts\PredictionContextCache; 31*37748cd8SNickeauuse Antlr\Antlr4\Runtime\PredictionContexts\SingletonPredictionContext; 32*37748cd8SNickeauuse Antlr\Antlr4\Runtime\RuleContext; 33*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Token; 34*37748cd8SNickeauuse Antlr\Antlr4\Runtime\TokenStream; 35*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Utils\BitSet; 36*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Utils\DoubleKeyMap; 37*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Utils\Set; 38*37748cd8SNickeauuse Antlr\Antlr4\Runtime\VocabularyImpl; 39*37748cd8SNickeau 40*37748cd8SNickeau/** 41*37748cd8SNickeau * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. 42*37748cd8SNickeau * 43*37748cd8SNickeau * The basic complexity of the adaptive strategy makes it harder to understand. 44*37748cd8SNickeau * We begin with ATN simulation to build paths in a DFA. Subsequent prediction 45*37748cd8SNickeau * requests go through the DFA first. If they reach a state without an edge for 46*37748cd8SNickeau * the current symbol, the algorithm fails over to the ATN simulation to 47*37748cd8SNickeau * complete the DFA path for the current input (until it finds a conflict state 48*37748cd8SNickeau * or uniquely predicting state). 49*37748cd8SNickeau * 50*37748cd8SNickeau * All of that is done without using the outer context because we want to create 51*37748cd8SNickeau * a DFA that is not dependent upon the rule invocation stack when we do a 52*37748cd8SNickeau * prediction. One DFA works in all contexts. We avoid using context not 53*37748cd8SNickeau * necessarily because it's slower, although it can be, but because of the DFA 54*37748cd8SNickeau * caching problem. The closure routine only considers the rule invocation stack 55*37748cd8SNickeau * created during prediction beginning in the decision rule. For example, if 56*37748cd8SNickeau * prediction occurs without invoking another rule's ATN, there are no context 57*37748cd8SNickeau * stacks in the configurations. When lack of context leads to a conflict, we 58*37748cd8SNickeau * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing 59*37748cd8SNickeau * strategy (versus full LL(*)). 60*37748cd8SNickeau * 61*37748cd8SNickeau * When SLL yields a configuration set with conflict, we rewind the input and 62*37748cd8SNickeau * retry the ATN simulation, this time using full outer context without adding 63*37748cd8SNickeau * to the DFA. Configuration context stacks will be the full invocation stacks 64*37748cd8SNickeau * from the start rule. If we get a conflict using full context, then we can 65*37748cd8SNickeau * definitively say we have a true ambiguity for that input sequence. If we 66*37748cd8SNickeau * don't get a conflict, it implies that the decision is sensitive to the outer 67*37748cd8SNickeau * context. (It is not context-sensitive in the sense of context-sensitive 68*37748cd8SNickeau * grammars.) 69*37748cd8SNickeau * 70*37748cd8SNickeau * The next time we reach this DFA state with an SLL conflict, through DFA 71*37748cd8SNickeau * simulation, we will again retry the ATN simulation using full context mode. 72*37748cd8SNickeau * This is slow because we can't save the results and have to "interpret" the 73*37748cd8SNickeau * ATN each time we get that input. 74*37748cd8SNickeau * 75*37748cd8SNickeau * CACHING FULL CONTEXT PREDICTIONS 76*37748cd8SNickeau * 77*37748cd8SNickeau * We could cache results from full context to predicted alternative easily and 78*37748cd8SNickeau * that saves a lot of time but doesn't work in presence of predicates. The set 79*37748cd8SNickeau * of visible predicates from the ATN start state changes depending on the 80*37748cd8SNickeau * context, because closure can fall off the end of a rule. I tried to cache 81*37748cd8SNickeau * tuples (stack context, semantic context, predicted alt) but it was slower 82*37748cd8SNickeau * than interpreting and much more complicated. Also required a huge amount of 83*37748cd8SNickeau * memory. The goal is not to create the world's fastest parser anyway. I'd like 84*37748cd8SNickeau * to keep this algorithm simple. By launching multiple threads, we can improve 85*37748cd8SNickeau * the speed of parsing across a large number of files. 86*37748cd8SNickeau * 87*37748cd8SNickeau * There is no strict ordering between the amount of input used by SLL vs LL, 88*37748cd8SNickeau * which makes it really hard to build a cache for full context. Let's say that 89*37748cd8SNickeau * we have input A B C that leads to an SLL conflict with full context X. That 90*37748cd8SNickeau * implies that using X we might only use A B but we could also use A B C D to 91*37748cd8SNickeau * resolve conflict. Input A B C D could predict alternative 1 in one position 92*37748cd8SNickeau * in the input and A B C E could predict alternative 2 in another position in 93*37748cd8SNickeau * input. The conflicting SLL configurations could still be non-unique in the 94*37748cd8SNickeau * full context prediction, which would lead us to requiring more input than the 95*37748cd8SNickeau * original A B C. To make a prediction cache work, we have to track the exact 96*37748cd8SNickeau * input used during the previous prediction. That amounts to a cache that maps 97*37748cd8SNickeau * X to a specific DFA for that context. 98*37748cd8SNickeau * 99*37748cd8SNickeau * Something should be done for left-recursive expression predictions. They are 100*37748cd8SNickeau * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry 101*37748cd8SNickeau * with full LL thing Sam does. 102*37748cd8SNickeau * 103*37748cd8SNickeau * AVOIDING FULL CONTEXT PREDICTION 104*37748cd8SNickeau * 105*37748cd8SNickeau * We avoid doing full context retry when the outer context is empty, we did not 106*37748cd8SNickeau * dip into the outer context by falling off the end of the decision state rule, 107*37748cd8SNickeau * or when we force SLL mode. 108*37748cd8SNickeau * 109*37748cd8SNickeau * As an example of the not dip into outer context case, consider as super 110*37748cd8SNickeau * constructor calls versus function calls. One grammar might look like 111*37748cd8SNickeau * this: 112*37748cd8SNickeau * 113*37748cd8SNickeau * ctorBody 114*37748cd8SNickeau * : '{' superCall? stat* '}' 115*37748cd8SNickeau * ; 116*37748cd8SNickeau * 117*37748cd8SNickeau * 118*37748cd8SNickeau * Or, you might see something like 119*37748cd8SNickeau * 120*37748cd8SNickeau * stat 121*37748cd8SNickeau * : superCall ';' 122*37748cd8SNickeau * | expression ';' 123*37748cd8SNickeau * | ... 124*37748cd8SNickeau * ; 125*37748cd8SNickeau * 126*37748cd8SNickeau * 127*37748cd8SNickeau * In both cases I believe that no closure operations will dip into the outer 128*37748cd8SNickeau * context. In the first case ctorBody in the worst case will stop at the '}'. 129*37748cd8SNickeau * In the 2nd case it should stop at the ';'. Both cases should stay within the 130*37748cd8SNickeau * entry rule and not dip into the outer context. 131*37748cd8SNickeau * 132*37748cd8SNickeau * PREDICATES 133*37748cd8SNickeau * 134*37748cd8SNickeau * Predicates are always evaluated if present in either SLL or LL both. SLL and 135*37748cd8SNickeau * LL simulation deals with predicates differently. SLL collects predicates as 136*37748cd8SNickeau * it performs closure operations like ANTLR v3 did. It delays predicate 137*37748cd8SNickeau * evaluation until it reaches and accept state. This allows us to cache the SLL 138*37748cd8SNickeau * ATN simulation whereas, if we had evaluated predicates on-the-fly during 139*37748cd8SNickeau * closure, the DFA state configuration sets would be different and we couldn't 140*37748cd8SNickeau * build up a suitable DFA. 141*37748cd8SNickeau * 142*37748cd8SNickeau * When building a DFA accept state during ATN simulation, we evaluate any 143*37748cd8SNickeau * predicates and return the sole semantically valid alternative. If there is 144*37748cd8SNickeau * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, 145*37748cd8SNickeau * we throw an exception. Alternatives without predicates act like they have 146*37748cd8SNickeau * true predicates. The simple way to think about it is to strip away all 147*37748cd8SNickeau * alternatives with false predicates and choose the minimum alternative that 148*37748cd8SNickeau * remains. 149*37748cd8SNickeau * 150*37748cd8SNickeau * When we start in the DFA and reach an accept state that's predicated, we test 151*37748cd8SNickeau * those and return the minimum semantically viable alternative. If no 152*37748cd8SNickeau * alternatives are viable, we throw an exception. 153*37748cd8SNickeau * 154*37748cd8SNickeau * During full LL ATN simulation, closure always evaluates predicates and 155*37748cd8SNickeau * on-the-fly. This is crucial to reducing the configuration set size during 156*37748cd8SNickeau * closure. It hits a landmine when parsing with the Java grammar, for example, 157*37748cd8SNickeau * without this on-the-fly evaluation. 158*37748cd8SNickeau * 159*37748cd8SNickeau * SHARING DFA 160*37748cd8SNickeau * 161*37748cd8SNickeau * All instances of the same parser share the same decision DFAs through a 162*37748cd8SNickeau * static field. Each instance gets its own ATN simulator but they share the 163*37748cd8SNickeau * same {@see ParserATNSimulator::$decisionToDFA} field. They also share a 164*37748cd8SNickeau * {@see PredictionContextCache} object that makes sure that all 165*37748cd8SNickeau * {@see PredictionContext} objects are shared among the DFA states. This makes 166*37748cd8SNickeau * a big size difference. 167*37748cd8SNickeau * 168*37748cd8SNickeau * THREAD SAFETY 169*37748cd8SNickeau * 170*37748cd8SNickeau * The {@see ParserATNSimulator} locks on the {@see ParserATNSimulator::$decisionToDFA} 171*37748cd8SNickeau * field when it adds a new DFA object to that array. 172*37748cd8SNickeau * {@see ParserATNSimulator::$addDFAEdge} locks on the DFA for the current 173*37748cd8SNickeau * decision when setting the {@see DFAState::$edges} field. 174*37748cd8SNickeau * {@see ParserATNSimulator::addDFAState()} locks on the DFA for the current 175*37748cd8SNickeau * decision when looking up a DFA state to see if it already exists. We must 176*37748cd8SNickeau * make sure that all requests to add DFA states that are equivalent result in 177*37748cd8SNickeau * the same shared DFA object. This is because lots of threads will be trying 178*37748cd8SNickeau * to update the DFA at once. {@see ParserATNSimulator::addDFAState()} also 179*37748cd8SNickeau * locks inside the DFA lock but this time on the shared context cache when it 180*37748cd8SNickeau * rebuilds the configurations' {@see PredictionContext} objects using cached 181*37748cd8SNickeau * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is 182*37748cd8SNickeau * safe as long as we can guarantee that all threads referencing 183*37748cd8SNickeau * `s.edge[t]` get the same physical target {@see DFAState}, or `null`. Once 184*37748cd8SNickeau * into the DFA, the DFA simulation does not reference the {@see DFA::$states} map. 185*37748cd8SNickeau * It follows the {@see DFAState::$edges} field to new targets. The DFA simulator 186*37748cd8SNickeau * will either find {@see DFAState::$edges} to be `null`, to be non-`null` and 187*37748cd8SNickeau * `dfa.edges[t]` null, or `dfa.edges[t]` to be non-null. The 188*37748cd8SNickeau * {@see ParserATNSimulator::addDFAEdge()} method could be racing to set the field 189*37748cd8SNickeau * but in either case the DFA simulator works; if `null`, and requests ATN 190*37748cd8SNickeau * simulation. It could also race trying to get `dfa.edges[t]`, but either 191*37748cd8SNickeau * way it will work because it's not doing a test and set operation. 192*37748cd8SNickeau * 193*37748cd8SNickeau * tarting with SLL then failing to combined SLL/LL (Two-Stage Parsing) 194*37748cd8SNickeau * 195*37748cd8SNickeau * Sam pointed out that if SLL does not give a syntax error, then there is no 196*37748cd8SNickeau * point in doing full LL, which is slower. We only have to try LL if we get a 197*37748cd8SNickeau * syntax error. For maximum speed, Sam starts the parser set to pure SLL 198*37748cd8SNickeau * mode with the {@see BailErrorStrategy}: 199*37748cd8SNickeau * 200*37748cd8SNickeau * parser.{@see Parser::getInterpreter()}. 201*37748cd8SNickeau * {@see ParserATNSimulator::setPredictionMode()}`(}{@see PredictionMode::$SLL})`; 202*37748cd8SNickeau * parser->{@see Parser::setErrorHandler()}(new {@see BailErrorStrategy}()); 203*37748cd8SNickeau * 204*37748cd8SNickeau * If it does not get a syntax error, then we're done. If it does get a syntax 205*37748cd8SNickeau * error, we need to retry with the combined SLL/LL strategy. 206*37748cd8SNickeau * 207*37748cd8SNickeau * The reason this works is as follows. If there are no SLL conflicts, then the 208*37748cd8SNickeau * grammar is SLL (at least for that input set). If there is an SLL conflict, 209*37748cd8SNickeau * the full LL analysis must yield a set of viable alternatives which is a 210*37748cd8SNickeau * subset of the alternatives reported by SLL. If the LL set is a singleton, 211*37748cd8SNickeau * then the grammar is LL but not SLL. If the LL set is the same size as the SLL 212*37748cd8SNickeau * set, the decision is SLL. If the LL set has size > 1, then that decision 213*37748cd8SNickeau * is truly ambiguous on the current input. If the LL set is smaller, then the 214*37748cd8SNickeau * SLL conflict resolution might choose an alternative that the full LL would 215*37748cd8SNickeau * rule out as a possibility based upon better context information. If that's 216*37748cd8SNickeau * the case, then the SLL parse will definitely get an error because the full LL 217*37748cd8SNickeau * analysis says it's not viable. If SLL conflict resolution chooses an 218*37748cd8SNickeau * alternative within the LL set, them both SLL and LL would choose the same 219*37748cd8SNickeau * alternative because they both choose the minimum of multiple conflicting 220*37748cd8SNickeau * alternatives. 221*37748cd8SNickeau * 222*37748cd8SNickeau * Let's say we have a set of SLL conflicting alternatives `{1, 2, 3}` and 223*37748cd8SNickeau * a smaller LL set called s. If s is `{2, 3}`, then SLL parsing will 224*37748cd8SNickeau * get an error because SLL will pursue alternative 1. If s is `{1, 2}` or 225*37748cd8SNickeau * `{1, 3}` then both SLL and LL will choose the same alternative because 226*37748cd8SNickeau * alternative one is the minimum of either set. If s is `{2}` or `{3}` then 227*37748cd8SNickeau * SLL will get a syntax error. If s is `{1}` then SLL will succeed. 228*37748cd8SNickeau * 229*37748cd8SNickeau * Of course, if the input is invalid, then we will get an error for sure in 230*37748cd8SNickeau * both SLL and LL parsing. Erroneous input will therefore require 2 passes over 231*37748cd8SNickeau * the input. 232*37748cd8SNickeau */ 233*37748cd8SNickeaufinal class ParserATNSimulator extends ATNSimulator 234*37748cd8SNickeau{ 235*37748cd8SNickeau /** @var bool */ 236*37748cd8SNickeau public static $debug = false; 237*37748cd8SNickeau 238*37748cd8SNickeau /** @var bool */ 239*37748cd8SNickeau public static $debug_closure = false; 240*37748cd8SNickeau 241*37748cd8SNickeau /** @var bool */ 242*37748cd8SNickeau public static $debug_add = false; 243*37748cd8SNickeau 244*37748cd8SNickeau /** @var bool */ 245*37748cd8SNickeau public static $debug_list_atn_decisions = false; 246*37748cd8SNickeau 247*37748cd8SNickeau /** @var bool */ 248*37748cd8SNickeau public static $dfa_debug = false; 249*37748cd8SNickeau 250*37748cd8SNickeau /** @var bool */ 251*37748cd8SNickeau public static $retry_debug = false; 252*37748cd8SNickeau 253*37748cd8SNickeau /** @var array<string> */ 254*37748cd8SNickeau public $log = []; 255*37748cd8SNickeau 256*37748cd8SNickeau /** @var Parser */ 257*37748cd8SNickeau protected $parser; 258*37748cd8SNickeau 259*37748cd8SNickeau /** @var array<DFA> */ 260*37748cd8SNickeau public $decisionToDFA = []; 261*37748cd8SNickeau 262*37748cd8SNickeau /** @var int */ 263*37748cd8SNickeau private $mode = PredictionMode::LL; 264*37748cd8SNickeau 265*37748cd8SNickeau /** 266*37748cd8SNickeau * Each prediction operation uses a cache for merge of prediction contexts. 267*37748cd8SNickeau * Don't keep around as it wastes huge amounts of memory. DoubleKeyMap 268*37748cd8SNickeau * isn't synchronized but we're ok since two threads shouldn't reuse same 269*37748cd8SNickeau * parser/atnsim object because it can only handle one input at a time. 270*37748cd8SNickeau * This maps graphs a and b to merged result c. (a,b)→c. We can avoid 271*37748cd8SNickeau * the merge if we ever see a and b again. Note that (b,a)→c should 272*37748cd8SNickeau * also be examined during cache lookup. 273*37748cd8SNickeau * 274*37748cd8SNickeau * @var DoubleKeyMap|null 275*37748cd8SNickeau */ 276*37748cd8SNickeau protected $mergeCache; 277*37748cd8SNickeau 278*37748cd8SNickeau /** 279*37748cd8SNickeau * LAME globals to avoid parameters!!!!! I need these down deep in predTransition. 280*37748cd8SNickeau * 281*37748cd8SNickeau * @var TokenStream 282*37748cd8SNickeau */ 283*37748cd8SNickeau protected $input; 284*37748cd8SNickeau 285*37748cd8SNickeau /** @var int */ 286*37748cd8SNickeau protected $startIndex = 0; 287*37748cd8SNickeau 288*37748cd8SNickeau /** @var ParserRuleContext|null */ 289*37748cd8SNickeau protected $outerContext; 290*37748cd8SNickeau 291*37748cd8SNickeau /** @var DFA|null */ 292*37748cd8SNickeau protected $dfa; 293*37748cd8SNickeau 294*37748cd8SNickeau /** 295*37748cd8SNickeau * @param array<DFA> $decisionToDFA 296*37748cd8SNickeau */ 297*37748cd8SNickeau public function __construct( 298*37748cd8SNickeau Parser $parser, 299*37748cd8SNickeau ATN $atn, 300*37748cd8SNickeau array $decisionToDFA, 301*37748cd8SNickeau PredictionContextCache $sharedContextCache 302*37748cd8SNickeau ) { 303*37748cd8SNickeau parent::__construct($atn, $sharedContextCache); 304*37748cd8SNickeau 305*37748cd8SNickeau $this->parser = $parser; 306*37748cd8SNickeau $this->decisionToDFA = $decisionToDFA; 307*37748cd8SNickeau } 308*37748cd8SNickeau 309*37748cd8SNickeau public function reset() : void 310*37748cd8SNickeau { 311*37748cd8SNickeau } 312*37748cd8SNickeau 313*37748cd8SNickeau public function clearDFA() : void 314*37748cd8SNickeau { 315*37748cd8SNickeau for ($d = 0, $count = \count($this->decisionToDFA); $d < $count; $d++) { 316*37748cd8SNickeau $decisionState = $this->atn->getDecisionState($d); 317*37748cd8SNickeau 318*37748cd8SNickeau if ($decisionState !== null) { 319*37748cd8SNickeau $this->decisionToDFA[$d] = new DFA($decisionState, $d); 320*37748cd8SNickeau } 321*37748cd8SNickeau } 322*37748cd8SNickeau } 323*37748cd8SNickeau 324*37748cd8SNickeau public function adaptivePredict(TokenStream $input, int $decision, ParserRuleContext $outerContext) : int 325*37748cd8SNickeau { 326*37748cd8SNickeau if (self::$debug || self::$debug_list_atn_decisions) { 327*37748cd8SNickeau $token = $input->LT(1); 328*37748cd8SNickeau 329*37748cd8SNickeau $this->log[] = \sprintf( 330*37748cd8SNickeau 'adaptivePredict decision %d exec LA(1)==%s line %d:%d', 331*37748cd8SNickeau $decision, 332*37748cd8SNickeau $this->getLookaheadName($input), 333*37748cd8SNickeau $token === null ? '' : $token->getLine(), 334*37748cd8SNickeau $token === null ? '' : $token->getCharPositionInLine() 335*37748cd8SNickeau ); 336*37748cd8SNickeau } 337*37748cd8SNickeau 338*37748cd8SNickeau $this->input = $input; 339*37748cd8SNickeau $this->startIndex = $input->getIndex(); 340*37748cd8SNickeau $this->outerContext = $outerContext; 341*37748cd8SNickeau 342*37748cd8SNickeau $dfa = $this->decisionToDFA[$decision]; 343*37748cd8SNickeau $this->dfa = $dfa; 344*37748cd8SNickeau 345*37748cd8SNickeau $m = $input->mark(); 346*37748cd8SNickeau $index = $this->startIndex; 347*37748cd8SNickeau 348*37748cd8SNickeau // Now we are certain to have a specific decision's DFA, but do we still need an initial state? 349*37748cd8SNickeau try { 350*37748cd8SNickeau if ($dfa->isPrecedenceDfa()) { 351*37748cd8SNickeau // The start state for a precedence DFA depends on the current 352*37748cd8SNickeau // parser precedence, and is provided by a DFA method. 353*37748cd8SNickeau 354*37748cd8SNickeau $s0 = $dfa->getPrecedenceStartState($this->parser->getPrecedence()); 355*37748cd8SNickeau } else { 356*37748cd8SNickeau // The start state for a "regular" DFA is just s0. 357*37748cd8SNickeau $s0 = $dfa->s0; 358*37748cd8SNickeau } 359*37748cd8SNickeau 360*37748cd8SNickeau if ($s0 === null) { 361*37748cd8SNickeau if (self::$debug || self::$debug_list_atn_decisions) { 362*37748cd8SNickeau $this->log[] = \sprintf( 363*37748cd8SNickeau 'predictATN decision %d exec LA(1)==%s, outerContext=%s', 364*37748cd8SNickeau $dfa->decision, 365*37748cd8SNickeau $this->getLookaheadName($input), 366*37748cd8SNickeau $outerContext->toString($this->parser->getRuleNames()) 367*37748cd8SNickeau ); 368*37748cd8SNickeau } 369*37748cd8SNickeau 370*37748cd8SNickeau $fullCtx = false; 371*37748cd8SNickeau 372*37748cd8SNickeau if ($dfa->atnStartState === null) { 373*37748cd8SNickeau throw new \RuntimeException('ATN Start State cannot be null.'); 374*37748cd8SNickeau } 375*37748cd8SNickeau 376*37748cd8SNickeau $s0_closure = $this->computeStartState( 377*37748cd8SNickeau $dfa->atnStartState, 378*37748cd8SNickeau ParserRuleContext::emptyContext(), 379*37748cd8SNickeau $fullCtx 380*37748cd8SNickeau ); 381*37748cd8SNickeau 382*37748cd8SNickeau if ($dfa->isPrecedenceDfa()) { 383*37748cd8SNickeau /* 384*37748cd8SNickeau * If this is a precedence DFA, we use applyPrecedenceFilter 385*37748cd8SNickeau * to convert the computed start state to a precedence start 386*37748cd8SNickeau * state. We then use DFA.setPrecedenceStartState to set the 387*37748cd8SNickeau * appropriate start state for the precedence level rather 388*37748cd8SNickeau * than simply setting DFA.s0. 389*37748cd8SNickeau */ 390*37748cd8SNickeau 391*37748cd8SNickeau if ($dfa->s0 === null) { 392*37748cd8SNickeau throw new \RuntimeException('DFA.s0 cannot be null.'); 393*37748cd8SNickeau } 394*37748cd8SNickeau 395*37748cd8SNickeau $dfa->s0->configs = $s0_closure; // not used for prediction but useful to know start configs anyway 396*37748cd8SNickeau 397*37748cd8SNickeau $s0_closure = $this->applyPrecedenceFilter($s0_closure); 398*37748cd8SNickeau 399*37748cd8SNickeau $s0 = $this->addDFAState($dfa, new DFAState($s0_closure)); 400*37748cd8SNickeau 401*37748cd8SNickeau $dfa->setPrecedenceStartState($this->parser->getPrecedence(), $s0); 402*37748cd8SNickeau } else { 403*37748cd8SNickeau $s0 = $this->addDFAState($dfa, new DFAState($s0_closure)); 404*37748cd8SNickeau $dfa->s0 = $s0; 405*37748cd8SNickeau } 406*37748cd8SNickeau } 407*37748cd8SNickeau 408*37748cd8SNickeau $alt = $this->execATN($dfa, $s0, $input, $index, $outerContext); 409*37748cd8SNickeau 410*37748cd8SNickeau if (self::$debug) { 411*37748cd8SNickeau $this->log[] = \sprintf('DFA after predictATN: %s', $dfa->toString($this->parser->getVocabulary())); 412*37748cd8SNickeau } 413*37748cd8SNickeau 414*37748cd8SNickeau return $alt ?? 0; 415*37748cd8SNickeau } finally { 416*37748cd8SNickeau $this->mergeCache = null; // wack cache after each prediction 417*37748cd8SNickeau $this->dfa = null; 418*37748cd8SNickeau $input->seek($index); 419*37748cd8SNickeau $input->release($m); 420*37748cd8SNickeau } 421*37748cd8SNickeau } 422*37748cd8SNickeau 423*37748cd8SNickeau /** 424*37748cd8SNickeau * Performs ATN simulation to compute a predicted alternative based 425*37748cd8SNickeau * upon the remaining input, but also updates the DFA cache to avoid 426*37748cd8SNickeau * having to traverse the ATN again for the same input sequence. 427*37748cd8SNickeau * 428*37748cd8SNickeau * There are some key conditions we're looking for after computing a new 429*37748cd8SNickeau * set of ATN configs (proposed DFA state): 430*37748cd8SNickeau * if the set is empty, there is no viable alternative for current symbol 431*37748cd8SNickeau * does the state uniquely predict an alternative? 432*37748cd8SNickeau * does the state have a conflict that would prevent us from 433*37748cd8SNickeau * putting it on the work list? 434*37748cd8SNickeau * 435*37748cd8SNickeau * We also have some key operations to do: 436*37748cd8SNickeau * - add an edge from previous DFA state to potentially new DFA state, D, 437*37748cd8SNickeau * upon current symbol but only if adding to work list, which means in all 438*37748cd8SNickeau * cases except no viable alternative (and possibly non-greedy decisions?) 439*37748cd8SNickeau * - collecting predicates and adding semantic context to DFA accept states 440*37748cd8SNickeau * - adding rule context to context-sensitive DFA accept states 441*37748cd8SNickeau * - consuming an input symbol 442*37748cd8SNickeau * - reporting a conflict 443*37748cd8SNickeau * - reporting an ambiguity 444*37748cd8SNickeau * - reporting a context sensitivity 445*37748cd8SNickeau * - reporting insufficient predicates 446*37748cd8SNickeau * 447*37748cd8SNickeau * cover these cases: 448*37748cd8SNickeau * - dead end 449*37748cd8SNickeau * - single alt 450*37748cd8SNickeau * - single alt + preds 451*37748cd8SNickeau * - conflict 452*37748cd8SNickeau * - conflict + preds 453*37748cd8SNickeau */ 454*37748cd8SNickeau public function execATN( 455*37748cd8SNickeau DFA $dfa, 456*37748cd8SNickeau DFAState $s0, 457*37748cd8SNickeau TokenStream $input, 458*37748cd8SNickeau int $startIndex, 459*37748cd8SNickeau ParserRuleContext $outerContext 460*37748cd8SNickeau ) : ?int { 461*37748cd8SNickeau if (self::$debug || self::$debug_list_atn_decisions) { 462*37748cd8SNickeau $token = $input->LT(1); 463*37748cd8SNickeau 464*37748cd8SNickeau $this->log[] = \sprintf( 465*37748cd8SNickeau 'execATN decision %d exec LA(1)==%s line %d:%d', 466*37748cd8SNickeau $dfa->decision, 467*37748cd8SNickeau $this->getLookaheadName($input), 468*37748cd8SNickeau $token === null ? '' : $token->getLine(), 469*37748cd8SNickeau $token === null ? '' : $token->getCharPositionInLine() 470*37748cd8SNickeau ); 471*37748cd8SNickeau } 472*37748cd8SNickeau 473*37748cd8SNickeau $previousD = $s0; 474*37748cd8SNickeau 475*37748cd8SNickeau if (self::$debug) { 476*37748cd8SNickeau $this->log[] = 's0 = ' . $s0; 477*37748cd8SNickeau } 478*37748cd8SNickeau 479*37748cd8SNickeau $t = $input->LA(1); 480*37748cd8SNickeau 481*37748cd8SNickeau while (true) { 482*37748cd8SNickeau $D = $this->getExistingTargetState($previousD, $t); 483*37748cd8SNickeau 484*37748cd8SNickeau if ($D === null) { 485*37748cd8SNickeau $D = $this->computeTargetState($dfa, $previousD, $t); 486*37748cd8SNickeau } 487*37748cd8SNickeau 488*37748cd8SNickeau if ($D === null) { 489*37748cd8SNickeau throw new \RuntimeException('DFA State cannot be null.'); 490*37748cd8SNickeau } 491*37748cd8SNickeau 492*37748cd8SNickeau if ($D === self::error()) { 493*37748cd8SNickeau /* If any configs in previous dipped into outer context, that 494*37748cd8SNickeau * means that input up to t actually finished entry rule 495*37748cd8SNickeau * at least for SLL decision. Full LL doesn't dip into outer 496*37748cd8SNickeau * so don't need special case. 497*37748cd8SNickeau * We will get an error no matter what so delay until after 498*37748cd8SNickeau * decision; better error message. Also, no reachable target 499*37748cd8SNickeau * ATN states in SLL implies LL will also get nowhere. 500*37748cd8SNickeau * If conflict in states that dip out, choose min since we 501*37748cd8SNickeau * will get error no matter what. 502*37748cd8SNickeau */ 503*37748cd8SNickeau 504*37748cd8SNickeau $e = $this->noViableAlt($input, $outerContext, $previousD->configs, $startIndex); 505*37748cd8SNickeau 506*37748cd8SNickeau $input->seek($startIndex); 507*37748cd8SNickeau 508*37748cd8SNickeau $alt = $this->getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule( 509*37748cd8SNickeau $previousD->configs, 510*37748cd8SNickeau $outerContext 511*37748cd8SNickeau ); 512*37748cd8SNickeau 513*37748cd8SNickeau if ($alt !== ATN::INVALID_ALT_NUMBER) { 514*37748cd8SNickeau return $alt; 515*37748cd8SNickeau } 516*37748cd8SNickeau 517*37748cd8SNickeau throw $e; 518*37748cd8SNickeau } 519*37748cd8SNickeau 520*37748cd8SNickeau if ($D->requiresFullContext && $this->mode !== PredictionMode::SLL) { 521*37748cd8SNickeau // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) 522*37748cd8SNickeau 523*37748cd8SNickeau $conflictingAlts = $D->configs->getConflictingAlts(); 524*37748cd8SNickeau 525*37748cd8SNickeau if ($D->predicates !== null) { 526*37748cd8SNickeau if (self::$debug) { 527*37748cd8SNickeau $this->log[] = 'DFA state has preds in DFA sim LL failover'; 528*37748cd8SNickeau } 529*37748cd8SNickeau 530*37748cd8SNickeau $conflictIndex = $input->getIndex(); 531*37748cd8SNickeau 532*37748cd8SNickeau if ($conflictIndex !== $startIndex) { 533*37748cd8SNickeau $input->seek($startIndex); 534*37748cd8SNickeau } 535*37748cd8SNickeau 536*37748cd8SNickeau $conflictingAlts = $this->evalSemanticContextMany($D->predicates, $outerContext, true); 537*37748cd8SNickeau 538*37748cd8SNickeau if ($conflictingAlts->length() === 1) { 539*37748cd8SNickeau if (self::$debug) { 540*37748cd8SNickeau $this->log[] = 'Full LL avoided'; 541*37748cd8SNickeau } 542*37748cd8SNickeau 543*37748cd8SNickeau return $conflictingAlts->minValue(); 544*37748cd8SNickeau } 545*37748cd8SNickeau 546*37748cd8SNickeau if ($conflictIndex !== $startIndex) { 547*37748cd8SNickeau // Restore the index so reporting the fallback to full 548*37748cd8SNickeau // context occurs with the index at the correct spot 549*37748cd8SNickeau 550*37748cd8SNickeau $input->seek($conflictIndex); 551*37748cd8SNickeau } 552*37748cd8SNickeau } 553*37748cd8SNickeau 554*37748cd8SNickeau if (self::$dfa_debug) { 555*37748cd8SNickeau $this->log[] = \sprintf( 556*37748cd8SNickeau 'Ctx sensitive state %s in %s', 557*37748cd8SNickeau (string) $outerContext, 558*37748cd8SNickeau (string) $D 559*37748cd8SNickeau ); 560*37748cd8SNickeau } 561*37748cd8SNickeau 562*37748cd8SNickeau if ($dfa->atnStartState === null) { 563*37748cd8SNickeau throw new \RuntimeException('ATN Start State cannot be null.'); 564*37748cd8SNickeau } 565*37748cd8SNickeau 566*37748cd8SNickeau $s0_closure = $this->computeStartState($dfa->atnStartState, $outerContext, true); 567*37748cd8SNickeau 568*37748cd8SNickeau $this->reportAttemptingFullContext( 569*37748cd8SNickeau $dfa, 570*37748cd8SNickeau $conflictingAlts, 571*37748cd8SNickeau $D->configs, 572*37748cd8SNickeau $startIndex, 573*37748cd8SNickeau $input->getIndex() 574*37748cd8SNickeau ); 575*37748cd8SNickeau 576*37748cd8SNickeau return $this->execATNWithFullContext($dfa, $D, $s0_closure, $input, $startIndex, $outerContext); 577*37748cd8SNickeau } 578*37748cd8SNickeau 579*37748cd8SNickeau if ($D->isAcceptState) { 580*37748cd8SNickeau if ($D->predicates === null) { 581*37748cd8SNickeau return $D->prediction; 582*37748cd8SNickeau } 583*37748cd8SNickeau 584*37748cd8SNickeau $stopIndex = $input->getIndex(); 585*37748cd8SNickeau $input->seek($startIndex); 586*37748cd8SNickeau $alts = $this->evalSemanticContextMany($D->predicates, $outerContext, true); 587*37748cd8SNickeau 588*37748cd8SNickeau switch ($alts->length()) { 589*37748cd8SNickeau case 0: 590*37748cd8SNickeau throw $this->noViableAlt($input, $outerContext, $D->configs, $startIndex); 591*37748cd8SNickeau 592*37748cd8SNickeau case 1: 593*37748cd8SNickeau return $alts->minValue(); 594*37748cd8SNickeau 595*37748cd8SNickeau default: 596*37748cd8SNickeau // Report ambiguity after predicate evaluation to make sure 597*37748cd8SNickeau // the correct set of ambig alts is reported. 598*37748cd8SNickeau $this->reportAmbiguity($dfa, $D, $startIndex, $stopIndex, false, $alts, $D->configs); 599*37748cd8SNickeau 600*37748cd8SNickeau return $alts->minValue(); 601*37748cd8SNickeau } 602*37748cd8SNickeau } 603*37748cd8SNickeau 604*37748cd8SNickeau $previousD = $D; 605*37748cd8SNickeau 606*37748cd8SNickeau if ($t !== IntStream::EOF) { 607*37748cd8SNickeau $input->consume(); 608*37748cd8SNickeau $t = $input->LA(1); 609*37748cd8SNickeau } 610*37748cd8SNickeau } 611*37748cd8SNickeau } 612*37748cd8SNickeau 613*37748cd8SNickeau /** 614*37748cd8SNickeau * Get an existing target state for an edge in the DFA. If the target state 615*37748cd8SNickeau * for the edge has not yet been computed or is otherwise not available, 616*37748cd8SNickeau * this method returns `null`. 617*37748cd8SNickeau * 618*37748cd8SNickeau * @param DFAState $previousD The current DFA state 619*37748cd8SNickeau * @param int $t The next input symbol 620*37748cd8SNickeau * 621*37748cd8SNickeau * @return DFAState|null The existing target DFA state for the given input 622*37748cd8SNickeau * symbol `t`, or `null` if the target state for 623*37748cd8SNickeau * this edge is not already cached. 624*37748cd8SNickeau */ 625*37748cd8SNickeau public function getExistingTargetState(DFAState $previousD, int $t) : ?DFAState 626*37748cd8SNickeau { 627*37748cd8SNickeau $edges = $previousD->edges; 628*37748cd8SNickeau 629*37748cd8SNickeau if ($edges === null || $t + 1 < 0 || $t + 1 >= $edges->count()) { 630*37748cd8SNickeau return null; 631*37748cd8SNickeau } 632*37748cd8SNickeau 633*37748cd8SNickeau return $edges[$t + 1]; 634*37748cd8SNickeau } 635*37748cd8SNickeau 636*37748cd8SNickeau /** 637*37748cd8SNickeau * Compute a target state for an edge in the DFA, and attempt to add 638*37748cd8SNickeau * the computed state and corresponding edge to the DFA. 639*37748cd8SNickeau * 640*37748cd8SNickeau * @param DFA $dfa The DFA 641*37748cd8SNickeau * @param DFAState $previousD The current DFA state 642*37748cd8SNickeau * @param int $t The next input symbol 643*37748cd8SNickeau * 644*37748cd8SNickeau * @return DFAState|null The computed target DFA state for the given input 645*37748cd8SNickeau * symbol `t`. If `t` does not lead to a valid DFA 646*37748cd8SNickeau * state, this method returns 647*37748cd8SNickeau * {@see ParserATNSimulator::error()}. 648*37748cd8SNickeau */ 649*37748cd8SNickeau public function computeTargetState(DFA $dfa, DFAState $previousD, int $t) : ?DFAState 650*37748cd8SNickeau { 651*37748cd8SNickeau $reach = $this->computeReachSet($previousD->configs, $t, false); 652*37748cd8SNickeau 653*37748cd8SNickeau if ($reach === null) { 654*37748cd8SNickeau $this->addDFAEdge($dfa, $previousD, $t, self::error()); 655*37748cd8SNickeau 656*37748cd8SNickeau return self::error(); 657*37748cd8SNickeau } 658*37748cd8SNickeau 659*37748cd8SNickeau // Create new target state; we'll add to DFA after it's complete 660*37748cd8SNickeau $D = new DFAState($reach); 661*37748cd8SNickeau 662*37748cd8SNickeau $predictedAlt = self::getUniqueAlt($reach); 663*37748cd8SNickeau 664*37748cd8SNickeau if (self::$debug) { 665*37748cd8SNickeau $altSubSets = PredictionMode::getConflictingAltSubsets($reach); 666*37748cd8SNickeau 667*37748cd8SNickeau $this->log[] = \sprintf( 668*37748cd8SNickeau 'SLL altSubSets=[%s], previous=%s, configs=%s, predict=%d, allSubsetsConflict=%s, conflictingAlts=%s', 669*37748cd8SNickeau \implode(', ', $altSubSets), 670*37748cd8SNickeau (string) $previousD->configs, 671*37748cd8SNickeau (string) $reach, 672*37748cd8SNickeau $predictedAlt, 673*37748cd8SNickeau PredictionMode::allSubsetsConflict($altSubSets), 674*37748cd8SNickeau $this->getConflictingAlts($reach) 675*37748cd8SNickeau ); 676*37748cd8SNickeau } 677*37748cd8SNickeau 678*37748cd8SNickeau if ($predictedAlt !== ATN::INVALID_ALT_NUMBER) { 679*37748cd8SNickeau // NO CONFLICT, UNIQUELY PREDICTED ALT 680*37748cd8SNickeau 681*37748cd8SNickeau $D->isAcceptState = true; 682*37748cd8SNickeau $D->configs->uniqueAlt = $predictedAlt; 683*37748cd8SNickeau $D->prediction = $predictedAlt; 684*37748cd8SNickeau } elseif (PredictionMode::hasSLLConflictTerminatingPrediction($this->mode, $reach)) { 685*37748cd8SNickeau // MORE THAN ONE VIABLE ALTERNATIVE 686*37748cd8SNickeau 687*37748cd8SNickeau $D->configs->setConflictingAlts($this->getConflictingAlts($reach)); 688*37748cd8SNickeau $D->requiresFullContext = true; 689*37748cd8SNickeau 690*37748cd8SNickeau // in SLL-only mode, we will stop at this state and return the minimum alt 691*37748cd8SNickeau $D->isAcceptState = true; 692*37748cd8SNickeau 693*37748cd8SNickeau $conflictingAlts = $D->configs->getConflictingAlts(); 694*37748cd8SNickeau 695*37748cd8SNickeau if ($conflictingAlts === null) { 696*37748cd8SNickeau throw new \RuntimeException('Unexpected null conflicting alternatives.'); 697*37748cd8SNickeau } 698*37748cd8SNickeau 699*37748cd8SNickeau $D->prediction = $conflictingAlts->minValue(); 700*37748cd8SNickeau } 701*37748cd8SNickeau 702*37748cd8SNickeau if ($D->isAcceptState && $D->configs->hasSemanticContext) { 703*37748cd8SNickeau $decisionState = $this->atn->getDecisionState($dfa->decision); 704*37748cd8SNickeau 705*37748cd8SNickeau if ($decisionState !== null) { 706*37748cd8SNickeau $this->predicateDFAState($D, $decisionState); 707*37748cd8SNickeau } 708*37748cd8SNickeau 709*37748cd8SNickeau if ($D->predicates !== null) { 710*37748cd8SNickeau $D->prediction = ATN::INVALID_ALT_NUMBER; 711*37748cd8SNickeau } 712*37748cd8SNickeau } 713*37748cd8SNickeau 714*37748cd8SNickeau // All adds to dfa are done after we've created full D state 715*37748cd8SNickeau $D = $this->addDFAEdge($dfa, $previousD, $t, $D); 716*37748cd8SNickeau 717*37748cd8SNickeau return $D; 718*37748cd8SNickeau } 719*37748cd8SNickeau 720*37748cd8SNickeau protected function predicateDFAState(DFAState $dfaState, DecisionState $decisionState) : void 721*37748cd8SNickeau { 722*37748cd8SNickeau // We need to test all predicates, even in DFA states that uniquely predict alternative. 723*37748cd8SNickeau $nalts = $decisionState->getNumberOfTransitions(); 724*37748cd8SNickeau 725*37748cd8SNickeau // Update DFA so reach becomes accept state with (predicate,alt) pairs 726*37748cd8SNickeau // if preds found for conflicting alts. 727*37748cd8SNickeau 728*37748cd8SNickeau $altsToCollectPredsFrom = $this->getConflictingAltsOrUniqueAlt($dfaState->configs); 729*37748cd8SNickeau $altToPred = $altsToCollectPredsFrom === null ? 730*37748cd8SNickeau null : 731*37748cd8SNickeau $this->getPredsForAmbigAlts($altsToCollectPredsFrom, $dfaState->configs, $nalts); 732*37748cd8SNickeau 733*37748cd8SNickeau if ($altToPred !== null) { 734*37748cd8SNickeau $dfaState->predicates = $this->getPredicatePredictions($altsToCollectPredsFrom, $altToPred); 735*37748cd8SNickeau $dfaState->prediction = ATN::INVALID_ALT_NUMBER; // make sure we use preds 736*37748cd8SNickeau } else { 737*37748cd8SNickeau // There are preds in configs but they might go away when 738*37748cd8SNickeau // OR'd together like {p}? || NONE == NONE. If neither alt has preds, 739*37748cd8SNickeau // resolve to min alt. 740*37748cd8SNickeau 741*37748cd8SNickeau if ($altsToCollectPredsFrom === null) { 742*37748cd8SNickeau throw new \RuntimeException('Unexpected null alternatives to collect predicates'); 743*37748cd8SNickeau } 744*37748cd8SNickeau 745*37748cd8SNickeau $dfaState->prediction = $altsToCollectPredsFrom->minValue(); 746*37748cd8SNickeau } 747*37748cd8SNickeau } 748*37748cd8SNickeau 749*37748cd8SNickeau /** 750*37748cd8SNickeau * Comes back with reach.uniqueAlt set to a valid alt. 751*37748cd8SNickeau */ 752*37748cd8SNickeau protected function execATNWithFullContext( 753*37748cd8SNickeau DFA $dfa, 754*37748cd8SNickeau DFAState $D, // how far we got before failing over 755*37748cd8SNickeau ATNConfigSet $s0, 756*37748cd8SNickeau TokenStream $input, 757*37748cd8SNickeau int $startIndex, 758*37748cd8SNickeau ParserRuleContext $outerContext 759*37748cd8SNickeau ) : int { 760*37748cd8SNickeau if (self::$debug || self::$debug_list_atn_decisions) { 761*37748cd8SNickeau $this->log[] = \sprintf('ExecATNWithFullContext %s', (string) $s0); 762*37748cd8SNickeau } 763*37748cd8SNickeau 764*37748cd8SNickeau $fullCtx = true; 765*37748cd8SNickeau $foundExactAmbig = false; 766*37748cd8SNickeau $reach = null; 767*37748cd8SNickeau $previous = $s0; 768*37748cd8SNickeau $input->seek($startIndex); 769*37748cd8SNickeau $t = $input->LA(1); 770*37748cd8SNickeau $predictedAlt = 0; 771*37748cd8SNickeau 772*37748cd8SNickeau while (true) { // while more work 773*37748cd8SNickeau $reach = $this->computeReachSet($previous, $t, $fullCtx); 774*37748cd8SNickeau 775*37748cd8SNickeau if ($reach === null) { 776*37748cd8SNickeau /* I any configs in previous dipped into outer context, that 777*37748cd8SNickeau * means that input up to t actually finished entry rule 778*37748cd8SNickeau * at least for LL decision. Full LL doesn't dip into outer 779*37748cd8SNickeau * so don't need special case. 780*37748cd8SNickeau * We will get an error no matter what so delay until after 781*37748cd8SNickeau * decision; better error message. Also, no reachable target 782*37748cd8SNickeau * ATN states in SLL implies LL will also get nowhere. 783*37748cd8SNickeau * If conflict in states that dip out, choose min since we 784*37748cd8SNickeau * will get error no matter what. 785*37748cd8SNickeau */ 786*37748cd8SNickeau 787*37748cd8SNickeau $e = $this->noViableAlt($input, $outerContext, $previous, $startIndex); 788*37748cd8SNickeau 789*37748cd8SNickeau $input->seek($startIndex); 790*37748cd8SNickeau 791*37748cd8SNickeau $alt = $this->getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule($previous, $outerContext); 792*37748cd8SNickeau 793*37748cd8SNickeau if ($alt !== ATN::INVALID_ALT_NUMBER) { 794*37748cd8SNickeau return $alt; 795*37748cd8SNickeau } 796*37748cd8SNickeau 797*37748cd8SNickeau throw $e; 798*37748cd8SNickeau } 799*37748cd8SNickeau 800*37748cd8SNickeau $altSubSets = PredictionMode::getConflictingAltSubsets($reach); 801*37748cd8SNickeau 802*37748cd8SNickeau if (self::$debug) { 803*37748cd8SNickeau $this->log[] = \sprintf( 804*37748cd8SNickeau 'LL altSubSets=%s, predict=%d, resolvesToJustOneViableAlt=%d', 805*37748cd8SNickeau \implode(',', $altSubSets), 806*37748cd8SNickeau PredictionMode::getUniqueAlt($altSubSets), 807*37748cd8SNickeau PredictionMode::resolvesToJustOneViableAlt($altSubSets) 808*37748cd8SNickeau ); 809*37748cd8SNickeau } 810*37748cd8SNickeau 811*37748cd8SNickeau $reach->uniqueAlt = self::getUniqueAlt($reach); 812*37748cd8SNickeau 813*37748cd8SNickeau // unique prediction? 814*37748cd8SNickeau if ($reach->uniqueAlt !== ATN::INVALID_ALT_NUMBER) { 815*37748cd8SNickeau $predictedAlt = $reach->uniqueAlt; 816*37748cd8SNickeau 817*37748cd8SNickeau break; 818*37748cd8SNickeau } 819*37748cd8SNickeau 820*37748cd8SNickeau if ($this->mode !== PredictionMode::LL_EXACT_AMBIG_DETECTION) { 821*37748cd8SNickeau $predictedAlt = PredictionMode::resolvesToJustOneViableAlt($altSubSets); 822*37748cd8SNickeau 823*37748cd8SNickeau if ($predictedAlt !== ATN::INVALID_ALT_NUMBER) { 824*37748cd8SNickeau break; 825*37748cd8SNickeau } 826*37748cd8SNickeau } else { 827*37748cd8SNickeau // In exact ambiguity mode, we never try to terminate early. 828*37748cd8SNickeau // Just keeps scarfing until we know what the conflict is 829*37748cd8SNickeau if (PredictionMode::allSubsetsConflict($altSubSets) && PredictionMode::allSubsetsEqual($altSubSets)) { 830*37748cd8SNickeau $foundExactAmbig = true; 831*37748cd8SNickeau $predictedAlt = PredictionMode::getSingleViableAlt($altSubSets); 832*37748cd8SNickeau 833*37748cd8SNickeau break; 834*37748cd8SNickeau } 835*37748cd8SNickeau 836*37748cd8SNickeau // Else there are multiple non-conflicting subsets or 837*37748cd8SNickeau // we're not sure what the ambiguity is yet. 838*37748cd8SNickeau // So, keep going. 839*37748cd8SNickeau } 840*37748cd8SNickeau 841*37748cd8SNickeau $previous = $reach; 842*37748cd8SNickeau 843*37748cd8SNickeau if ($t !== IntStream::EOF) { 844*37748cd8SNickeau $input->consume(); 845*37748cd8SNickeau $t = $input->LA(1); 846*37748cd8SNickeau } 847*37748cd8SNickeau } 848*37748cd8SNickeau 849*37748cd8SNickeau // If the configuration set uniquely predicts an alternative, 850*37748cd8SNickeau // without conflict, then we know that it's a full LL decision not SLL. 851*37748cd8SNickeau if ($reach->uniqueAlt !== ATN::INVALID_ALT_NUMBER) { 852*37748cd8SNickeau $this->reportContextSensitivity($dfa, $predictedAlt, $reach, $startIndex, $input->getIndex()); 853*37748cd8SNickeau 854*37748cd8SNickeau return $predictedAlt; 855*37748cd8SNickeau } 856*37748cd8SNickeau 857*37748cd8SNickeau /* We do not check predicates here because we have checked them on-the-fly 858*37748cd8SNickeau * when doing full context prediction. 859*37748cd8SNickeau * 860*37748cd8SNickeau * In non-exact ambiguity detection mode, we might actually be able to 861*37748cd8SNickeau * detect an exact ambiguity, but I'm not going to spend the cycles 862*37748cd8SNickeau * needed to check. We only emit ambiguity warnings in exact ambiguity 863*37748cd8SNickeau * mode. 864*37748cd8SNickeau * 865*37748cd8SNickeau * For example, we might know that we have conflicting configurations. 866*37748cd8SNickeau * But, that does not mean that there is no way forward without a 867*37748cd8SNickeau * conflict. It's possible to have nonconflicting alt subsets as in: 868*37748cd8SNickeau * 869*37748cd8SNickeau * altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] 870*37748cd8SNickeau * 871*37748cd8SNickeau * from 872*37748cd8SNickeau * [ 873*37748cd8SNickeau * (17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), 874*37748cd8SNickeau * (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$]) 875*37748cd8SNickeau * ] 876*37748cd8SNickeau * 877*37748cd8SNickeau * In this case, (17,1,[5 $]) indicates there is some next sequence that 878*37748cd8SNickeau * would resolve this without conflict to alternative 1. Any other viable 879*37748cd8SNickeau * next sequence, however, is associated with a conflict. We stop 880*37748cd8SNickeau * looking for input because no amount of further lookahead will alter 881*37748cd8SNickeau * the fact that we should predict alternative 1. We just can't say for 882*37748cd8SNickeau * sure that there is an ambiguity without looking further. 883*37748cd8SNickeau */ 884*37748cd8SNickeau 885*37748cd8SNickeau $this->reportAmbiguity($dfa, $D, $startIndex, $input->getIndex(), $foundExactAmbig, null, $reach); 886*37748cd8SNickeau 887*37748cd8SNickeau return $predictedAlt; 888*37748cd8SNickeau } 889*37748cd8SNickeau 890*37748cd8SNickeau protected function computeReachSet(ATNConfigSet $closure, int $t, bool $fullCtx) : ?ATNConfigSet 891*37748cd8SNickeau { 892*37748cd8SNickeau if (self::$debug) { 893*37748cd8SNickeau $this->log[] = \sprintf('In computeReachSet, starting closure: %s', (string) $closure); 894*37748cd8SNickeau } 895*37748cd8SNickeau 896*37748cd8SNickeau if ($this->mergeCache === null) { 897*37748cd8SNickeau $this->mergeCache = new DoubleKeyMap(); 898*37748cd8SNickeau } 899*37748cd8SNickeau 900*37748cd8SNickeau $intermediate = new ATNConfigSet($fullCtx); 901*37748cd8SNickeau 902*37748cd8SNickeau /* 903*37748cd8SNickeau * Configurations already in a rule stop state indicate reaching the end 904*37748cd8SNickeau * of the decision rule (local context) or end of the start rule (full 905*37748cd8SNickeau * context). Once reached, these configurations are never updated by a 906*37748cd8SNickeau * closure operation, so they are handled separately for the performance 907*37748cd8SNickeau * advantage of having a smaller intermediate set when calling closure. 908*37748cd8SNickeau * 909*37748cd8SNickeau * For full-context reach operations, separate handling is required to 910*37748cd8SNickeau * ensure that the alternative matching the longest overall sequence is 911*37748cd8SNickeau * chosen when multiple such configurations can match the input. 912*37748cd8SNickeau */ 913*37748cd8SNickeau $skippedStopStates = null; 914*37748cd8SNickeau 915*37748cd8SNickeau // First figure out where we can reach on input t 916*37748cd8SNickeau foreach ($closure->elements() as $c) { 917*37748cd8SNickeau if (self::$debug_add) { 918*37748cd8SNickeau $this->log[] = \sprintf('Testing %s at %s', $this->getTokenName($t), (string) $c); 919*37748cd8SNickeau } 920*37748cd8SNickeau 921*37748cd8SNickeau if ($c->state instanceof RuleStopState) { 922*37748cd8SNickeau if ($c->context !== null && !$c->context->isEmpty()) { 923*37748cd8SNickeau throw new \RuntimeException('Context cannot be empty.'); 924*37748cd8SNickeau } 925*37748cd8SNickeau 926*37748cd8SNickeau if ($fullCtx || $t === IntStream::EOF) { 927*37748cd8SNickeau if ($skippedStopStates === null) { 928*37748cd8SNickeau $skippedStopStates = []; 929*37748cd8SNickeau } 930*37748cd8SNickeau 931*37748cd8SNickeau $skippedStopStates[] = $c; 932*37748cd8SNickeau 933*37748cd8SNickeau if (self::$debug_add) { 934*37748cd8SNickeau $this->log[] = \sprintf('Added %s to skippedStopStates', (string) $c); 935*37748cd8SNickeau } 936*37748cd8SNickeau } 937*37748cd8SNickeau 938*37748cd8SNickeau continue; 939*37748cd8SNickeau } 940*37748cd8SNickeau 941*37748cd8SNickeau foreach ($c->state->getTransitions() as $trans) { 942*37748cd8SNickeau $target = $this->getReachableTarget($trans, $t); 943*37748cd8SNickeau 944*37748cd8SNickeau if ($target !== null) { 945*37748cd8SNickeau $cfg = new ATNConfig($c, $target); 946*37748cd8SNickeau $intermediate->add($cfg, $this->mergeCache); 947*37748cd8SNickeau 948*37748cd8SNickeau if (self::$debug_add) { 949*37748cd8SNickeau $this->log[] = \sprintf('Added %s to intermediate', (string) $cfg); 950*37748cd8SNickeau } 951*37748cd8SNickeau } 952*37748cd8SNickeau } 953*37748cd8SNickeau } 954*37748cd8SNickeau 955*37748cd8SNickeau // Now figure out where the reach operation can take us... 956*37748cd8SNickeau 957*37748cd8SNickeau $reach = null; 958*37748cd8SNickeau 959*37748cd8SNickeau /* This block optimizes the reach operation for intermediate sets which 960*37748cd8SNickeau * trivially indicate a termination state for the overall 961*37748cd8SNickeau * adaptivePredict operation. 962*37748cd8SNickeau * 963*37748cd8SNickeau * The conditions assume that intermediate contains all configurations 964*37748cd8SNickeau * relevant to the reach set, but this condition is not true when one 965*37748cd8SNickeau * or more configurations have been withheld in skippedStopStates, or 966*37748cd8SNickeau * when the current symbol is EOF. 967*37748cd8SNickeau */ 968*37748cd8SNickeau if ($skippedStopStates === null && $t !== Token::EOF) { 969*37748cd8SNickeau if (\count($intermediate->elements()) === 1) { 970*37748cd8SNickeau // Don't pursue the closure if there is just one state. 971*37748cd8SNickeau // It can only have one alternative; just add to result 972*37748cd8SNickeau // Also don't pursue the closure if there is unique alternative 973*37748cd8SNickeau // among the configurations. 974*37748cd8SNickeau 975*37748cd8SNickeau $reach = $intermediate; 976*37748cd8SNickeau } elseif (self::getUniqueAlt($intermediate) !== ATN::INVALID_ALT_NUMBER) { 977*37748cd8SNickeau // Also don't pursue the closure if there is unique alternative among the configurations. 978*37748cd8SNickeau $reach = $intermediate; 979*37748cd8SNickeau } 980*37748cd8SNickeau } 981*37748cd8SNickeau 982*37748cd8SNickeau // If the reach set could not be trivially determined, perform a closure 983*37748cd8SNickeau // operation on the intermediate set to compute its initial value. 984*37748cd8SNickeau if ($reach === null) { 985*37748cd8SNickeau $reach = new ATNConfigSet($fullCtx); 986*37748cd8SNickeau $closureBusy = new Set(); 987*37748cd8SNickeau $treatEofAsEpsilon = $t === Token::EOF; 988*37748cd8SNickeau 989*37748cd8SNickeau foreach ($intermediate->elements() as $item) { 990*37748cd8SNickeau $this->closure($item, $reach, $closureBusy, false, $fullCtx, $treatEofAsEpsilon); 991*37748cd8SNickeau } 992*37748cd8SNickeau } 993*37748cd8SNickeau 994*37748cd8SNickeau if ($t === IntStream::EOF) { 995*37748cd8SNickeau /* After consuming EOF no additional input is possible, so we are 996*37748cd8SNickeau * only interested in configurations which reached the end of the 997*37748cd8SNickeau * decision rule (local context) or end of the start rule (full 998*37748cd8SNickeau * context). Update reach to contain only these configurations. This 999*37748cd8SNickeau * handles both explicit EOF transitions in the grammar and implicit 1000*37748cd8SNickeau * EOF transitions following the end of the decision or start rule. 1001*37748cd8SNickeau * 1002*37748cd8SNickeau * When reach==intermediate, no closure operation was performed. In 1003*37748cd8SNickeau * this case, removeAllConfigsNotInRuleStopState needs to check for 1004*37748cd8SNickeau * reachable rule stop states as well as configurations already in 1005*37748cd8SNickeau * a rule stop state. 1006*37748cd8SNickeau * 1007*37748cd8SNickeau * This is handled before the configurations in skippedStopStates, 1008*37748cd8SNickeau * because any configurations potentially added from that list are 1009*37748cd8SNickeau * already guaranteed to meet this condition whether or not it's 1010*37748cd8SNickeau * required. 1011*37748cd8SNickeau */ 1012*37748cd8SNickeau 1013*37748cd8SNickeau $reach = $this->removeAllConfigsNotInRuleStopState($reach, $reach->equals($intermediate)); 1014*37748cd8SNickeau } 1015*37748cd8SNickeau 1016*37748cd8SNickeau /* If `skippedStopStates !== null`, then it contains at least one 1017*37748cd8SNickeau * configuration. For full-context reach operations, these 1018*37748cd8SNickeau * configurations reached the end of the start rule, in which case we 1019*37748cd8SNickeau * only add them back to reach if no configuration during the current 1020*37748cd8SNickeau * closure operation reached such a state. This ensures adaptivePredict 1021*37748cd8SNickeau * chooses an alternative matching the longest overall sequence when 1022*37748cd8SNickeau * multiple alternatives are viable.*/ 1023*37748cd8SNickeau 1024*37748cd8SNickeau if ($skippedStopStates !== null && (!$fullCtx || !PredictionMode::hasConfigInRuleStopState($reach))) { 1025*37748cd8SNickeau if (\count($skippedStopStates) === 0) { 1026*37748cd8SNickeau throw new \RuntimeException('Skipped stop states cannot be empty.'); 1027*37748cd8SNickeau } 1028*37748cd8SNickeau 1029*37748cd8SNickeau foreach ($skippedStopStates as $lValue) { 1030*37748cd8SNickeau $reach->add($lValue, $this->mergeCache); 1031*37748cd8SNickeau } 1032*37748cd8SNickeau } 1033*37748cd8SNickeau 1034*37748cd8SNickeau if ($reach->isEmpty()) { 1035*37748cd8SNickeau return null; 1036*37748cd8SNickeau } 1037*37748cd8SNickeau 1038*37748cd8SNickeau return $reach; 1039*37748cd8SNickeau } 1040*37748cd8SNickeau 1041*37748cd8SNickeau /** 1042*37748cd8SNickeau * Return a configuration set containing only the configurations from 1043*37748cd8SNickeau * `configs` which are in a {@see RuleStopState}. If all configurations in 1044*37748cd8SNickeau * `configs` are already in a rule stop state, this method simply returns 1045*37748cd8SNickeau * `configs`. 1046*37748cd8SNickeau * 1047*37748cd8SNickeau * When `lookToEndOfRule` is true, this method uses {@see ATN::nextTokens()} 1048*37748cd8SNickeau * for each configuration in `configs` which is not already in a rule stop 1049*37748cd8SNickeau * state to see if a rule stop state is reachable from the configuration via 1050*37748cd8SNickeau * epsilon-only transitions. 1051*37748cd8SNickeau * 1052*37748cd8SNickeau * @param ATNConfigSet $configs The configuration set to update 1053*37748cd8SNickeau * @param bool $lookToEndOfRule When true, this method checks for 1054*37748cd8SNickeau * rule stop states reachable by 1055*37748cd8SNickeau * epsilon-only transitions from each 1056*37748cd8SNickeau * configuration in `configs`. 1057*37748cd8SNickeau * 1058*37748cd8SNickeau * @return ATNConfigSet `configs` if all configurations in `configs` are 1059*37748cd8SNickeau * in a rule stop state, otherwise return a new 1060*37748cd8SNickeau * configuration set containing only the configurations 1061*37748cd8SNickeau * from `configs` which are in a rule stop state. 1062*37748cd8SNickeau * 1063*37748cd8SNickeau * @throws \InvalidArgumentException 1064*37748cd8SNickeau */ 1065*37748cd8SNickeau protected function removeAllConfigsNotInRuleStopState(ATNConfigSet $configs, bool $lookToEndOfRule) : ATNConfigSet 1066*37748cd8SNickeau { 1067*37748cd8SNickeau if (PredictionMode::allConfigsInRuleStopStates($configs)) { 1068*37748cd8SNickeau return $configs; 1069*37748cd8SNickeau } 1070*37748cd8SNickeau 1071*37748cd8SNickeau $result = new ATNConfigSet($configs->fullCtx); 1072*37748cd8SNickeau 1073*37748cd8SNickeau foreach ($configs->elements() as $config) { 1074*37748cd8SNickeau if ($config->state instanceof RuleStopState) { 1075*37748cd8SNickeau $result->add($config, $this->mergeCache); 1076*37748cd8SNickeau 1077*37748cd8SNickeau continue; 1078*37748cd8SNickeau } 1079*37748cd8SNickeau 1080*37748cd8SNickeau if ($lookToEndOfRule && $config->state->onlyHasEpsilonTransitions()) { 1081*37748cd8SNickeau $nextTokens = $this->atn->nextTokens($config->state); 1082*37748cd8SNickeau 1083*37748cd8SNickeau if ($nextTokens->contains(Token::EPSILON)) { 1084*37748cd8SNickeau $endOfRuleState = $this->atn->ruleToStopState[$config->state->ruleIndex]; 1085*37748cd8SNickeau $result->add(new ATNConfig($config, $endOfRuleState), $this->mergeCache); 1086*37748cd8SNickeau } 1087*37748cd8SNickeau } 1088*37748cd8SNickeau } 1089*37748cd8SNickeau 1090*37748cd8SNickeau return $result; 1091*37748cd8SNickeau } 1092*37748cd8SNickeau 1093*37748cd8SNickeau protected function computeStartState(ATNState $p, RuleContext $ctx, bool $fullCtx) : ATNConfigSet 1094*37748cd8SNickeau { 1095*37748cd8SNickeau // Always at least the implicit call to start rule 1096*37748cd8SNickeau $initialContext = PredictionContext::fromRuleContext($this->atn, $ctx); 1097*37748cd8SNickeau $configs = new ATNConfigSet($fullCtx); 1098*37748cd8SNickeau 1099*37748cd8SNickeau foreach ($p->getTransitions() as $i => $t) { 1100*37748cd8SNickeau $c = new ATNConfig(null, $t->target, $initialContext, null, $i + 1); 1101*37748cd8SNickeau $closureBusy = new Set(); 1102*37748cd8SNickeau 1103*37748cd8SNickeau $this->closure($c, $configs, $closureBusy, true, $fullCtx, false); 1104*37748cd8SNickeau } 1105*37748cd8SNickeau 1106*37748cd8SNickeau return $configs; 1107*37748cd8SNickeau } 1108*37748cd8SNickeau 1109*37748cd8SNickeau /* 1110*37748cd8SNickeau * parrt internal source braindump that doesn't mess up external API spec. 1111*37748cd8SNickeau * 1112*37748cd8SNickeau * Context-sensitive in that they can only be properly evaluated in the context 1113*37748cd8SNickeau * of the proper prec argument. Without pruning, these predicates are normal 1114*37748cd8SNickeau * predicates evaluated when we reach conflict state (or unique prediction). 1115*37748cd8SNickeau * As we cannot evaluate these predicates out of context, the resulting 1116*37748cd8SNickeau * conflict leads to full LL evaluation and nonlinear prediction which 1117*37748cd8SNickeau * shows up very clearly with fairly large expressions. 1118*37748cd8SNickeau * 1119*37748cd8SNickeau * Example grammar: 1120*37748cd8SNickeau * e 1121*37748cd8SNickeau * : e '*' e 1122*37748cd8SNickeau * | e '+' e 1123*37748cd8SNickeau * | INT 1124*37748cd8SNickeau * ; 1125*37748cd8SNickeau * 1126*37748cd8SNickeau * We convert that to the following: 1127*37748cd8SNickeau * 1128*37748cd8SNickeau * e[int prec] 1129*37748cd8SNickeau * : INT ( {3>=prec}? '*' e[4] | {2>=prec}? '+' e[3] )* 1130*37748cd8SNickeau * ; 1131*37748cd8SNickeau * 1132*37748cd8SNickeau * The (..)* loop has a decision for the inner block as well as an enter 1133*37748cd8SNickeau * or exit decision, which is what concerns us here. At the 1st + of input 1134*37748cd8SNickeau * 1+2+3, the loop entry sees both predicates and the loop exit also sees 1135*37748cd8SNickeau * both predicates by falling off the edge of e. This is because we have 1136*37748cd8SNickeau * no stack information with SLL and find the follow of e, which will 1137*37748cd8SNickeau * hit the return states inside the loop after e[4] and e[3], which brings 1138*37748cd8SNickeau * it back to the enter or exit decision. In this case, we know that we 1139*37748cd8SNickeau * cannot evaluate those predicates because we have fallen off the edge 1140*37748cd8SNickeau * of the stack and will in general not know which prec parameter is 1141*37748cd8SNickeau * the right one to use in the predicate. 1142*37748cd8SNickeau * 1143*37748cd8SNickeau * Because we have special information, that these are precedence predicates, 1144*37748cd8SNickeau * we can resolve them without failing over to full LL despite their context 1145*37748cd8SNickeau * sensitive nature. We make an assumption that prec[-1] <= prec[0], meaning 1146*37748cd8SNickeau * that the current precedence level is greater than or equal to the precedence 1147*37748cd8SNickeau * level of recursive invocations above us in the stack. For example, if 1148*37748cd8SNickeau * predicate {3>=prec}? is true of the current prec, then one option is to 1149*37748cd8SNickeau * enter the loop to match it now. The other option is to exit the loop and 1150*37748cd8SNickeau * the left recursive rule to match the current operator in rule invocation 1151*37748cd8SNickeau * further up the stack. But, we know that all of those prec are lower or 1152*37748cd8SNickeau * the same value and so we can decide to enter the loop instead of matching 1153*37748cd8SNickeau * it later. That means we can strip out the other configuration for the exit branch. 1154*37748cd8SNickeau * 1155*37748cd8SNickeau * So imagine we have (14,1,$,{2>=prec}?) and then 1156*37748cd8SNickeau * (14,2,$-dipsIntoOuterContext,{2>=prec}?). The optimization allows us to 1157*37748cd8SNickeau * collapse these two configurations. We know that if {2>=prec}? is true 1158*37748cd8SNickeau * for the current prec parameter, it will also be true for any precfrom 1159*37748cd8SNickeau * an invoking e call, indicated by dipsIntoOuterContext. As the predicates 1160*37748cd8SNickeau * are both true, we have the option to evaluate them early in the decision 1161*37748cd8SNickeau * start state. We do this by stripping both predicates and choosing to 1162*37748cd8SNickeau * enter the loop as it is consistent with the notion of operator precedence. 1163*37748cd8SNickeau * It's also how the full LL conflict resolution would work. 1164*37748cd8SNickeau * 1165*37748cd8SNickeau * The solution requires a different DFA start state for each precedence level. 1166*37748cd8SNickeau * 1167*37748cd8SNickeau * The basic filter mechanism is to remove configurations of the form (p, 2, pi) 1168*37748cd8SNickeau * if (p, 1, pi) exists for the same p and pi. In other words, for the same 1169*37748cd8SNickeau * ATN state and predicate context, remove any configuration associated with 1170*37748cd8SNickeau * an exit branch if there is a configuration associated with the enter branch. 1171*37748cd8SNickeau * 1172*37748cd8SNickeau * It's also the case that the filter evaluates precedence predicates and 1173*37748cd8SNickeau * resolves conflicts according to precedence levels. For example, for input 1174*37748cd8SNickeau * 1+2+3 at the first +, we see prediction filtering. 1175*37748cd8SNickeau * 1176*37748cd8SNickeau * [(11,1,[$],{3>=prec}?), (14,1,[$],{2>=prec}?), (5,2,[$],up=1), 1177*37748cd8SNickeau * (11,2,[$],up=1), (14,2,[$],up=1)], hasSemanticContext=true,dipsIntoOuterContext 1178*37748cd8SNickeau * 1179*37748cd8SNickeau * to 1180*37748cd8SNickeau * 1181*37748cd8SNickeau * [(11,1,[$]), (14,1,[$]), (5,2,[$],up=1)],dipsIntoOuterContext 1182*37748cd8SNickeau * 1183*37748cd8SNickeau * This filters because {3>=prec}? evals to true and collapses 1184*37748cd8SNickeau * (11,1,[$],{3>=prec}?) and (11,2,[$],up=1) since early conflict 1185*37748cd8SNickeau * resolution based upon rules of operator precedence fits with our 1186*37748cd8SNickeau * usual match first alt upon conflict. 1187*37748cd8SNickeau * 1188*37748cd8SNickeau * We noticed a problem where a recursive call resets precedence to 0. 1189*37748cd8SNickeau * Sam's fix: each config has flag indicating if it has returned from 1190*37748cd8SNickeau * an expr[0] call. then just don't filter any config with that flag set. 1191*37748cd8SNickeau * flag is carried along in closure(). so to avoid adding field, set bit 1192*37748cd8SNickeau * just under sign bit of dipsIntoOuterContext (SUPPRESS_PRECEDENCE_FILTER). 1193*37748cd8SNickeau * With the change you filter "unless (p, 2, pi) was reached after leaving 1194*37748cd8SNickeau * the rule stop state of the LR rule containing state p, corresponding 1195*37748cd8SNickeau * to a rule invocation with precedence level 0". 1196*37748cd8SNickeau */ 1197*37748cd8SNickeau 1198*37748cd8SNickeau /** 1199*37748cd8SNickeau * This method transforms the start state computed by 1200*37748cd8SNickeau * {@see ParserATNSimulator::computeStartState()} to the special start state 1201*37748cd8SNickeau * used by a precedence DFA for a particular precedence value. The transformation 1202*37748cd8SNickeau * process applies the following changes to the start state's configuration 1203*37748cd8SNickeau * set. 1204*37748cd8SNickeau * 1205*37748cd8SNickeau * - Evaluate the precedence predicates for each configuration using 1206*37748cd8SNickeau * {@see SemanticContext//evalPrecedence}. 1207*37748cd8SNickeau * - Remove all configurations which predict an alternative greater than 1208*37748cd8SNickeau * 1, for which another configuration that predicts alternative 1 is in the 1209*37748cd8SNickeau * same ATN state with the same prediction context. This transformation is 1210*37748cd8SNickeau * valid for the following reasons: 1211*37748cd8SNickeau * - The closure block cannot contain any epsilon transitions which bypass 1212*37748cd8SNickeau * the body of the closure, so all states reachable via alternative 1 are 1213*37748cd8SNickeau * part of the precedence alternatives of the transformed left-recursive 1214*37748cd8SNickeau * rule. 1215*37748cd8SNickeau * - The "primary" portion of a left recursive rule cannot contain an 1216*37748cd8SNickeau * epsilon transition, so the only way an alternative other than 1 can exist 1217*37748cd8SNickeau * in a state that is also reachable via alternative 1 is by nesting calls 1218*37748cd8SNickeau * to the left-recursive rule, with the outer calls not being at the 1219*37748cd8SNickeau * preferred precedence level. 1220*37748cd8SNickeau * 1221*37748cd8SNickeau * The prediction context must be considered by this filter to address 1222*37748cd8SNickeau * situations like the following. 1223*37748cd8SNickeau * 1224*37748cd8SNickeau * grammar TA; 1225*37748cd8SNickeau * prog: statement* EOF; 1226*37748cd8SNickeau * statement: letterA | statement letterA 'b' ; 1227*37748cd8SNickeau * letterA: 'a'; 1228*37748cd8SNickeau * 1229*37748cd8SNickeau * If the above grammar, the ATN state immediately before the token 1230*37748cd8SNickeau * reference `'a'` in `letterA` is reachable from the left edge 1231*37748cd8SNickeau * of both the primary and closure blocks of the left-recursive rule 1232*37748cd8SNickeau * `statement`. The prediction context associated with each of these 1233*37748cd8SNickeau * configurations distinguishes between them, and prevents the alternative 1234*37748cd8SNickeau * which stepped out to `prog` (and then back in to `statement` from being 1235*37748cd8SNickeau * eliminated by the filter. 1236*37748cd8SNickeau * 1237*37748cd8SNickeau * @param ATNConfigSet $configs The configuration set computed by 1238*37748cd8SNickeau * {@see ParserATNSimulator::computeStartState()} 1239*37748cd8SNickeau * as the start state for the DFA. 1240*37748cd8SNickeau * 1241*37748cd8SNickeau * @return ATNConfigSet The transformed configuration set representing the start state 1242*37748cd8SNickeau * for a precedence DFA at a particular precedence level 1243*37748cd8SNickeau * (determined by calling {@see Parser::getPrecedence()}). 1244*37748cd8SNickeau * 1245*37748cd8SNickeau * @throws \InvalidArgumentException 1246*37748cd8SNickeau */ 1247*37748cd8SNickeau protected function applyPrecedenceFilter(ATNConfigSet $configs) : ATNConfigSet 1248*37748cd8SNickeau { 1249*37748cd8SNickeau /** @var array<PredictionContext> $statesFromAlt1 */ 1250*37748cd8SNickeau $statesFromAlt1 = []; 1251*37748cd8SNickeau $configSet = new ATNConfigSet($configs->fullCtx); 1252*37748cd8SNickeau 1253*37748cd8SNickeau foreach ($configs->elements() as $config) { 1254*37748cd8SNickeau // handle alt 1 first 1255*37748cd8SNickeau if ($config->alt !== 1) { 1256*37748cd8SNickeau continue; 1257*37748cd8SNickeau } 1258*37748cd8SNickeau 1259*37748cd8SNickeau $updatedContext = $this->outerContext !== null ? 1260*37748cd8SNickeau $config->semanticContext->evalPrecedence($this->parser, $this->outerContext) : 1261*37748cd8SNickeau null; 1262*37748cd8SNickeau 1263*37748cd8SNickeau if ($updatedContext === null) { 1264*37748cd8SNickeau continue; 1265*37748cd8SNickeau } 1266*37748cd8SNickeau 1267*37748cd8SNickeau $statesFromAlt1[$config->state->stateNumber] = $config->context; 1268*37748cd8SNickeau 1269*37748cd8SNickeau if (!$updatedContext->equals($config->semanticContext)) { 1270*37748cd8SNickeau $configSet->add( 1271*37748cd8SNickeau new ATNConfig($config, null, null, $updatedContext), 1272*37748cd8SNickeau $this->mergeCache 1273*37748cd8SNickeau ); 1274*37748cd8SNickeau } else { 1275*37748cd8SNickeau $configSet->add($config, $this->mergeCache); 1276*37748cd8SNickeau } 1277*37748cd8SNickeau } 1278*37748cd8SNickeau 1279*37748cd8SNickeau foreach ($configs->elements() as $config) { 1280*37748cd8SNickeau if ($config->alt === 1) { 1281*37748cd8SNickeau continue; // already handled 1282*37748cd8SNickeau } 1283*37748cd8SNickeau 1284*37748cd8SNickeau /* In the future, this elimination step could be updated to also 1285*37748cd8SNickeau * filter the prediction context for alternatives predicting alt>1 1286*37748cd8SNickeau * (basically a graph subtraction algorithm). 1287*37748cd8SNickeau */ 1288*37748cd8SNickeau if (!$config->isPrecedenceFilterSuppressed()) { 1289*37748cd8SNickeau $context = $statesFromAlt1[$config->state->stateNumber] ?? null; 1290*37748cd8SNickeau 1291*37748cd8SNickeau if ($context !== null && $config->context !== null && $context->equals($config->context)) { 1292*37748cd8SNickeau continue; // eliminated 1293*37748cd8SNickeau } 1294*37748cd8SNickeau } 1295*37748cd8SNickeau 1296*37748cd8SNickeau $configSet->add($config, $this->mergeCache); 1297*37748cd8SNickeau } 1298*37748cd8SNickeau 1299*37748cd8SNickeau return $configSet; 1300*37748cd8SNickeau } 1301*37748cd8SNickeau 1302*37748cd8SNickeau protected function getReachableTarget(Transition $trans, int $ttype) : ?ATNState 1303*37748cd8SNickeau { 1304*37748cd8SNickeau return $trans->matches($ttype, 0, $this->atn->maxTokenType) ? $trans->target : null; 1305*37748cd8SNickeau } 1306*37748cd8SNickeau 1307*37748cd8SNickeau /** 1308*37748cd8SNickeau * @return array<SemanticContext>|null 1309*37748cd8SNickeau */ 1310*37748cd8SNickeau protected function getPredsForAmbigAlts(BitSet $ambigAlts, ATNConfigSet $configs, int $nalts) : ?array 1311*37748cd8SNickeau { 1312*37748cd8SNickeau /* REACH=[1|1|[]|0:0, 1|2|[]|0:1] 1313*37748cd8SNickeau * altToPred starts as an array of all null contexts. The entry at index i 1314*37748cd8SNickeau * corresponds to alternative i. altToPred[i] may have one of three values: 1315*37748cd8SNickeau * 1. null: no ATNConfig c is found such that c.alt==i 1316*37748cd8SNickeau * 2. SemanticContext.NONE: At least one ATNConfig c exists such that 1317*37748cd8SNickeau * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, 1318*37748cd8SNickeau * alt i has at least one unpredicated config. 1319*37748cd8SNickeau * 3. Non-NONE Semantic Context: There exists at least one, and for all 1320*37748cd8SNickeau * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. 1321*37748cd8SNickeau * 1322*37748cd8SNickeau * From this, it is clear that NONE||anything==NONE. 1323*37748cd8SNickeau */ 1324*37748cd8SNickeau 1325*37748cd8SNickeau $altToPred = new \SplFixedArray($nalts + 1); 1326*37748cd8SNickeau 1327*37748cd8SNickeau foreach ($configs->elements() as $c) { 1328*37748cd8SNickeau if ($ambigAlts->contains($c->alt)) { 1329*37748cd8SNickeau $altToPred[$c->alt] = SemanticContext::orContext($altToPred[$c->alt] ?? null, $c->semanticContext); 1330*37748cd8SNickeau } 1331*37748cd8SNickeau } 1332*37748cd8SNickeau 1333*37748cd8SNickeau $nPredAlts = 0; 1334*37748cd8SNickeau 1335*37748cd8SNickeau for ($i = 1; $i <= $nalts; $i++) { 1336*37748cd8SNickeau $pred = $altToPred[$i]; 1337*37748cd8SNickeau 1338*37748cd8SNickeau if ($pred === null) { 1339*37748cd8SNickeau $altToPred[$i] = SemanticContext::none(); 1340*37748cd8SNickeau } elseif ($pred !== SemanticContext::none()) { 1341*37748cd8SNickeau $nPredAlts++; 1342*37748cd8SNickeau } 1343*37748cd8SNickeau } 1344*37748cd8SNickeau 1345*37748cd8SNickeau // nonambig alts are null in altToPred 1346*37748cd8SNickeau if ($nPredAlts === 0) { 1347*37748cd8SNickeau $altToPred = null; 1348*37748cd8SNickeau } else { 1349*37748cd8SNickeau $altToPred = $altToPred->toArray(); 1350*37748cd8SNickeau } 1351*37748cd8SNickeau 1352*37748cd8SNickeau if (self::$debug) { 1353*37748cd8SNickeau $this->log[] = \sprintf( 1354*37748cd8SNickeau 'getPredsForAmbigAlts result [%s]', 1355*37748cd8SNickeau $altToPred === null ? '' : \implode(', ', $altToPred) 1356*37748cd8SNickeau ); 1357*37748cd8SNickeau } 1358*37748cd8SNickeau 1359*37748cd8SNickeau return $altToPred; 1360*37748cd8SNickeau } 1361*37748cd8SNickeau 1362*37748cd8SNickeau /** 1363*37748cd8SNickeau * @param array<SemanticContext> $altToPred 1364*37748cd8SNickeau * 1365*37748cd8SNickeau * @return array<PredPrediction>|null 1366*37748cd8SNickeau */ 1367*37748cd8SNickeau protected function getPredicatePredictions(?BitSet $ambigAlts, array $altToPred) : ?array 1368*37748cd8SNickeau { 1369*37748cd8SNickeau $pairs = []; 1370*37748cd8SNickeau $containsPredicate = false; 1371*37748cd8SNickeau $count = \count($altToPred); 1372*37748cd8SNickeau 1373*37748cd8SNickeau for ($i = 1; $i < $count; $i++) { 1374*37748cd8SNickeau $pred = $altToPred[$i]; 1375*37748cd8SNickeau 1376*37748cd8SNickeau // unpredicated is indicated by SemanticContext.NONE 1377*37748cd8SNickeau if ($ambigAlts !== null && $ambigAlts->contains($i)) { 1378*37748cd8SNickeau $pairs[] = new PredPrediction($pred, $i); 1379*37748cd8SNickeau } 1380*37748cd8SNickeau 1381*37748cd8SNickeau if ($pred !== SemanticContext::none()) { 1382*37748cd8SNickeau $containsPredicate = true; 1383*37748cd8SNickeau } 1384*37748cd8SNickeau } 1385*37748cd8SNickeau 1386*37748cd8SNickeau if (!$containsPredicate) { 1387*37748cd8SNickeau return null; 1388*37748cd8SNickeau } 1389*37748cd8SNickeau 1390*37748cd8SNickeau return $pairs; 1391*37748cd8SNickeau } 1392*37748cd8SNickeau 1393*37748cd8SNickeau /** 1394*37748cd8SNickeau * This method is used to improve the localization of error messages by 1395*37748cd8SNickeau * choosing an alternative rather than throwing a 1396*37748cd8SNickeau * {@see NoViableAltException} in particular prediction scenarios where the 1397*37748cd8SNickeau * {@see ParserATNSimulator::error()} state was reached during ATN simulation. 1398*37748cd8SNickeau * 1399*37748cd8SNickeau * The default implementation of this method uses the following 1400*37748cd8SNickeau * algorithm to identify an ATN configuration which successfully parsed the 1401*37748cd8SNickeau * decision entry rule. Choosing such an alternative ensures that the 1402*37748cd8SNickeau * {@see ParserRuleContext} returned by the calling rule will be complete 1403*37748cd8SNickeau * and valid, and the syntax error will be reported later at a more 1404*37748cd8SNickeau * localized location. 1405*37748cd8SNickeau * 1406*37748cd8SNickeau * - If a syntactically valid path or paths reach the end of the decision rule and 1407*37748cd8SNickeau * they are semantically valid if predicated, return the min associated alt. 1408*37748cd8SNickeau * - Else, if a semantically invalid but syntactically valid path exist 1409*37748cd8SNickeau * or paths exist, return the minimum associated alt. 1410*37748cd8SNickeau * - Otherwise, return {@see ATN::INVALID_ALT_NUMBER}. 1411*37748cd8SNickeau * 1412*37748cd8SNickeau * In some scenarios, the algorithm described above could predict an 1413*37748cd8SNickeau * alternative which will result in a {@see FailedPredicateException} in 1414*37748cd8SNickeau * the parser. Specifically, this could occur if the only configuration 1415*37748cd8SNickeau * capable of successfully parsing to the end of the decision rule is 1416*37748cd8SNickeau * blocked by a semantic predicate. By choosing this alternative within 1417*37748cd8SNickeau * {@see ParserATNSimulator::adaptivePredict()} instead of throwing a 1418*37748cd8SNickeau * {@see NoViableAltException}, the resulting {@see FailedPredicateException} 1419*37748cd8SNickeau * in the parser will identify the specific predicate which is preventing 1420*37748cd8SNickeau * the parser from successfully parsing the decision rule, which helps 1421*37748cd8SNickeau * developers identify and correct logic errors in semantic predicates. 1422*37748cd8SNickeau * 1423*37748cd8SNickeau * @param ATNConfigSet $configs The ATN configurations which were valid 1424*37748cd8SNickeau * immediately before the 1425*37748cd8SNickeau * {@see ParserATNSimulator::error()} 1426*37748cd8SNickeau * state was reached. 1427*37748cd8SNickeau * @param ParserRuleContext $outerContext The \gamma_0 initial parser context 1428*37748cd8SNickeau * from the paper or the parser stack 1429*37748cd8SNickeau * at the instant before prediction commences. 1430*37748cd8SNickeau * 1431*37748cd8SNickeau * @return int The value to return from {@see ParserATNSimulator::adaptivePredict()}, 1432*37748cd8SNickeau * or {@see ATN::INVALID_ALT_NUMBER} if a suitable alternative 1433*37748cd8SNickeau * was not identified and {@see ParserATNSimulator::::adaptivePredict()} 1434*37748cd8SNickeau * should report an error instead. 1435*37748cd8SNickeau * 1436*37748cd8SNickeau * @throws \Exception 1437*37748cd8SNickeau */ 1438*37748cd8SNickeau protected function getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule( 1439*37748cd8SNickeau ATNConfigSet $configs, 1440*37748cd8SNickeau ParserRuleContext $outerContext 1441*37748cd8SNickeau ) : int { 1442*37748cd8SNickeau /** @var ATNConfigSet $semValidConfigs */ 1443*37748cd8SNickeau /** @var ATNConfigSet $semInvalidConfigs */ 1444*37748cd8SNickeau [$semValidConfigs, $semInvalidConfigs] = $this->splitAccordingToSemanticValidity($configs, $outerContext); 1445*37748cd8SNickeau 1446*37748cd8SNickeau $alt = $this->getAltThatFinishedDecisionEntryRule($semValidConfigs); 1447*37748cd8SNickeau 1448*37748cd8SNickeau if ($alt !== ATN::INVALID_ALT_NUMBER) { 1449*37748cd8SNickeau // semantically/syntactically viable path exists 1450*37748cd8SNickeau 1451*37748cd8SNickeau return $alt; 1452*37748cd8SNickeau } 1453*37748cd8SNickeau 1454*37748cd8SNickeau // Is there a syntactically valid path with a failed pred? 1455*37748cd8SNickeau if ($semInvalidConfigs->getLength() > 0) { 1456*37748cd8SNickeau $alt = $this->getAltThatFinishedDecisionEntryRule($semInvalidConfigs); 1457*37748cd8SNickeau 1458*37748cd8SNickeau if ($alt !== ATN::INVALID_ALT_NUMBER) { 1459*37748cd8SNickeau // syntactically viable path exists 1460*37748cd8SNickeau 1461*37748cd8SNickeau return $alt; 1462*37748cd8SNickeau } 1463*37748cd8SNickeau } 1464*37748cd8SNickeau 1465*37748cd8SNickeau return ATN::INVALID_ALT_NUMBER; 1466*37748cd8SNickeau } 1467*37748cd8SNickeau 1468*37748cd8SNickeau protected function getAltThatFinishedDecisionEntryRule(ATNConfigSet $configs) : int 1469*37748cd8SNickeau { 1470*37748cd8SNickeau $alts = new IntervalSet(); 1471*37748cd8SNickeau /** @var ATNConfig $c */ 1472*37748cd8SNickeau foreach ($configs->elements() as $c) { 1473*37748cd8SNickeau if ($c->getOuterContextDepth() > 0 1474*37748cd8SNickeau || ($c->state instanceof RuleStopState && $c->context !== null && $c->context->hasEmptyPath())) { 1475*37748cd8SNickeau $alts->addOne($c->alt); 1476*37748cd8SNickeau } 1477*37748cd8SNickeau } 1478*37748cd8SNickeau 1479*37748cd8SNickeau return $alts->length() === 0 ? ATN::INVALID_ALT_NUMBER : $alts->getMinElement(); 1480*37748cd8SNickeau } 1481*37748cd8SNickeau 1482*37748cd8SNickeau /** 1483*37748cd8SNickeau * Walk the list of configurations and split them according to 1484*37748cd8SNickeau * those that have preds evaluating to true/false. If no pred, assume 1485*37748cd8SNickeau * true pred and include in succeeded set. Returns Pair of sets. 1486*37748cd8SNickeau * 1487*37748cd8SNickeau * Create a new set so as not to alter the incoming parameter. 1488*37748cd8SNickeau * 1489*37748cd8SNickeau * Assumption: the input stream has been restored to the starting point 1490*37748cd8SNickeau * prediction, which is where predicates need to evaluate. 1491*37748cd8SNickeau * 1492*37748cd8SNickeau * @return array<ATNConfigSet> 1493*37748cd8SNickeau 1494*37748cd8SNickeau * @throws \Exception 1495*37748cd8SNickeau */ 1496*37748cd8SNickeau protected function splitAccordingToSemanticValidity(ATNConfigSet $configs, ParserRuleContext $outerContext) : array 1497*37748cd8SNickeau { 1498*37748cd8SNickeau $succeeded = new ATNConfigSet($configs->fullCtx); 1499*37748cd8SNickeau $failed = new ATNConfigSet($configs->fullCtx); 1500*37748cd8SNickeau 1501*37748cd8SNickeau foreach ($configs->elements() as $c) { 1502*37748cd8SNickeau if ($c->semanticContext !== SemanticContext::none()) { 1503*37748cd8SNickeau $predicateEvaluationResult = $c->semanticContext->eval($this->parser, $outerContext); 1504*37748cd8SNickeau 1505*37748cd8SNickeau if ($predicateEvaluationResult) { 1506*37748cd8SNickeau $succeeded->add($c); 1507*37748cd8SNickeau } else { 1508*37748cd8SNickeau $failed->add($c); 1509*37748cd8SNickeau } 1510*37748cd8SNickeau } else { 1511*37748cd8SNickeau $succeeded->add($c); 1512*37748cd8SNickeau } 1513*37748cd8SNickeau } 1514*37748cd8SNickeau 1515*37748cd8SNickeau return [$succeeded, $failed]; 1516*37748cd8SNickeau } 1517*37748cd8SNickeau 1518*37748cd8SNickeau /** 1519*37748cd8SNickeau * Look through a list of predicate/alt pairs, returning alts for the 1520*37748cd8SNickeau * pairs that win. A `NONE` predicate indicates an alt containing an 1521*37748cd8SNickeau * unpredicated config which behaves as "always true." If !complete 1522*37748cd8SNickeau * then we stop at the first predicate that evaluates to true. This 1523*37748cd8SNickeau * includes pairs with null predicates. 1524*37748cd8SNickeau * 1525*37748cd8SNickeau * @param array<PredPrediction> $predPredictions 1526*37748cd8SNickeau */ 1527*37748cd8SNickeau protected function evalSemanticContextMany( 1528*37748cd8SNickeau array $predPredictions, 1529*37748cd8SNickeau ParserRuleContext $outerContext, 1530*37748cd8SNickeau bool $complete 1531*37748cd8SNickeau ) : BitSet { 1532*37748cd8SNickeau $predictions = new BitSet(); 1533*37748cd8SNickeau 1534*37748cd8SNickeau foreach ($predPredictions as $pair) { 1535*37748cd8SNickeau if ($pair->pred === SemanticContext::none()) { 1536*37748cd8SNickeau $predictions->add($pair->alt); 1537*37748cd8SNickeau 1538*37748cd8SNickeau if (!$complete) { 1539*37748cd8SNickeau break; 1540*37748cd8SNickeau } 1541*37748cd8SNickeau 1542*37748cd8SNickeau continue; 1543*37748cd8SNickeau } 1544*37748cd8SNickeau 1545*37748cd8SNickeau $fullCtx = false; // in dfa 1546*37748cd8SNickeau 1547*37748cd8SNickeau $predicateEvaluationResult = $this->evalSemanticContextOne( 1548*37748cd8SNickeau $pair->pred, 1549*37748cd8SNickeau $outerContext, 1550*37748cd8SNickeau $pair->alt, 1551*37748cd8SNickeau $fullCtx 1552*37748cd8SNickeau ); 1553*37748cd8SNickeau 1554*37748cd8SNickeau if (self::$debug || self::$dfa_debug) { 1555*37748cd8SNickeau $this->log[] = \sprintf('eval pred $pair = %s"', $predicateEvaluationResult); 1556*37748cd8SNickeau } 1557*37748cd8SNickeau 1558*37748cd8SNickeau if ($predicateEvaluationResult) { 1559*37748cd8SNickeau if (self::$debug || self::$dfa_debug) { 1560*37748cd8SNickeau $this->log[] = \sprintf('PREDICT %d', $pair->alt); 1561*37748cd8SNickeau } 1562*37748cd8SNickeau 1563*37748cd8SNickeau $predictions->add($pair->alt); 1564*37748cd8SNickeau 1565*37748cd8SNickeau if (!$complete) { 1566*37748cd8SNickeau break; 1567*37748cd8SNickeau } 1568*37748cd8SNickeau } 1569*37748cd8SNickeau } 1570*37748cd8SNickeau 1571*37748cd8SNickeau return $predictions; 1572*37748cd8SNickeau } 1573*37748cd8SNickeau 1574*37748cd8SNickeau protected function evalSemanticContextOne( 1575*37748cd8SNickeau SemanticContext $pred, 1576*37748cd8SNickeau ParserRuleContext $parserCallStack, 1577*37748cd8SNickeau int $alt, 1578*37748cd8SNickeau bool $fullCtx 1579*37748cd8SNickeau ) : bool { 1580*37748cd8SNickeau return $pred->eval($this->parser, $parserCallStack); 1581*37748cd8SNickeau } 1582*37748cd8SNickeau 1583*37748cd8SNickeau /** 1584*37748cd8SNickeau * TODO: If we are doing predicates, there is no point in pursuing 1585*37748cd8SNickeau * closure operations if we reach a DFA state that uniquely predicts 1586*37748cd8SNickeau * alternative. We will not be caching that DFA state and it is a 1587*37748cd8SNickeau * waste to pursue the closure. Might have to advance when we do 1588*37748cd8SNickeau * ambig detection thought :( 1589*37748cd8SNickeau */ 1590*37748cd8SNickeau protected function closure( 1591*37748cd8SNickeau ATNConfig $config, 1592*37748cd8SNickeau ATNConfigSet $configs, 1593*37748cd8SNickeau Set $closureBusy, 1594*37748cd8SNickeau bool $collectPredicates, 1595*37748cd8SNickeau bool $fullCtx, 1596*37748cd8SNickeau bool $treatEofAsEpsilon 1597*37748cd8SNickeau ) : void { 1598*37748cd8SNickeau $initialDepth = 0; 1599*37748cd8SNickeau 1600*37748cd8SNickeau $this->closureCheckingStopState( 1601*37748cd8SNickeau $config, 1602*37748cd8SNickeau $configs, 1603*37748cd8SNickeau $closureBusy, 1604*37748cd8SNickeau $collectPredicates, 1605*37748cd8SNickeau $fullCtx, 1606*37748cd8SNickeau $initialDepth, 1607*37748cd8SNickeau $treatEofAsEpsilon 1608*37748cd8SNickeau ); 1609*37748cd8SNickeau 1610*37748cd8SNickeau if ($fullCtx && $configs->dipsIntoOuterContext) { 1611*37748cd8SNickeau throw new \RuntimeException('Error.'); 1612*37748cd8SNickeau } 1613*37748cd8SNickeau } 1614*37748cd8SNickeau 1615*37748cd8SNickeau protected function closureCheckingStopState( 1616*37748cd8SNickeau ATNConfig $config, 1617*37748cd8SNickeau ATNConfigSet $configs, 1618*37748cd8SNickeau Set $closureBusy, 1619*37748cd8SNickeau bool $collectPredicates, 1620*37748cd8SNickeau bool $fullCtx, 1621*37748cd8SNickeau int $depth, 1622*37748cd8SNickeau bool $treatEofAsEpsilon 1623*37748cd8SNickeau ) : void { 1624*37748cd8SNickeau if (self::$debug || self::$debug_closure) { 1625*37748cd8SNickeau $this->log[] = \sprintf('closure(%s)', $config->toString(true)); 1626*37748cd8SNickeau 1627*37748cd8SNickeau if ($config->reachesIntoOuterContext > 50) { 1628*37748cd8SNickeau throw new \RuntimeException('problem'); 1629*37748cd8SNickeau } 1630*37748cd8SNickeau } 1631*37748cd8SNickeau 1632*37748cd8SNickeau if ($config->state instanceof RuleStopState) { 1633*37748cd8SNickeau // We hit rule end. If we have context info, use it run thru all possible stack tops in ctx 1634*37748cd8SNickeau 1635*37748cd8SNickeau if ($config->context !== null && !$config->context->isEmpty()) { 1636*37748cd8SNickeau for ($i =0; $i < $config->context->getLength(); $i++) { 1637*37748cd8SNickeau if ($config->context->getReturnState($i) === PredictionContext::EMPTY_RETURN_STATE) { 1638*37748cd8SNickeau if ($fullCtx) { 1639*37748cd8SNickeau $configs->add( 1640*37748cd8SNickeau new ATNConfig($config, $config->state, PredictionContext::empty(), null, null), 1641*37748cd8SNickeau $this->mergeCache 1642*37748cd8SNickeau ); 1643*37748cd8SNickeau } else { 1644*37748cd8SNickeau // we have no context info, just chase follow links (if greedy) 1645*37748cd8SNickeau if (self::$debug) { 1646*37748cd8SNickeau $this->log[] = \sprintf( 1647*37748cd8SNickeau 'FALLING off rule %s', 1648*37748cd8SNickeau $this->getRuleName($config->state->ruleIndex) 1649*37748cd8SNickeau ); 1650*37748cd8SNickeau } 1651*37748cd8SNickeau 1652*37748cd8SNickeau $this->closure_( 1653*37748cd8SNickeau $config, 1654*37748cd8SNickeau $configs, 1655*37748cd8SNickeau $closureBusy, 1656*37748cd8SNickeau $collectPredicates, 1657*37748cd8SNickeau $fullCtx, 1658*37748cd8SNickeau $depth, 1659*37748cd8SNickeau $treatEofAsEpsilon 1660*37748cd8SNickeau ); 1661*37748cd8SNickeau } 1662*37748cd8SNickeau 1663*37748cd8SNickeau continue; 1664*37748cd8SNickeau } 1665*37748cd8SNickeau 1666*37748cd8SNickeau $returnState = $this->atn->states[$config->context->getReturnState($i)]; 1667*37748cd8SNickeau $newContext = $config->context->getParent($i);// "pop" return state 1668*37748cd8SNickeau 1669*37748cd8SNickeau $c = new ATNConfig(null, $returnState, $newContext, $config->semanticContext, $config->alt); 1670*37748cd8SNickeau 1671*37748cd8SNickeau // While we have context to pop back from, we may have 1672*37748cd8SNickeau // gotten that context AFTER having falling off a rule. 1673*37748cd8SNickeau // Make sure we track that we are now out of context. 1674*37748cd8SNickeau // 1675*37748cd8SNickeau // This assignment also propagates the 1676*37748cd8SNickeau // isPrecedenceFilterSuppressed() value to the new 1677*37748cd8SNickeau // configuration. 1678*37748cd8SNickeau $c->reachesIntoOuterContext = $config->reachesIntoOuterContext; 1679*37748cd8SNickeau 1680*37748cd8SNickeau $this->closureCheckingStopState( 1681*37748cd8SNickeau $c, 1682*37748cd8SNickeau $configs, 1683*37748cd8SNickeau $closureBusy, 1684*37748cd8SNickeau $collectPredicates, 1685*37748cd8SNickeau $fullCtx, 1686*37748cd8SNickeau $depth - 1, 1687*37748cd8SNickeau $treatEofAsEpsilon 1688*37748cd8SNickeau ); 1689*37748cd8SNickeau } 1690*37748cd8SNickeau 1691*37748cd8SNickeau return; 1692*37748cd8SNickeau } elseif ($fullCtx) { 1693*37748cd8SNickeau // Reached end of start rule 1694*37748cd8SNickeau $configs->add($config, $this->mergeCache); 1695*37748cd8SNickeau 1696*37748cd8SNickeau return; 1697*37748cd8SNickeau } else { 1698*37748cd8SNickeau // Else if we have no context info, just chase follow links (if greedy) 1699*37748cd8SNickeau if (self::$debug) { 1700*37748cd8SNickeau $this->log[] = \sprintf('FALLING off rule %s.', $this->getRuleName($config->state->ruleIndex)); 1701*37748cd8SNickeau } 1702*37748cd8SNickeau } 1703*37748cd8SNickeau } 1704*37748cd8SNickeau 1705*37748cd8SNickeau $this->closure_($config, $configs, $closureBusy, $collectPredicates, $fullCtx, $depth, $treatEofAsEpsilon); 1706*37748cd8SNickeau } 1707*37748cd8SNickeau 1708*37748cd8SNickeau /** 1709*37748cd8SNickeau * Do the actual work of walking epsilon edges. 1710*37748cd8SNickeau */ 1711*37748cd8SNickeau protected function closure_( 1712*37748cd8SNickeau ATNConfig $config, 1713*37748cd8SNickeau ATNConfigSet $configs, 1714*37748cd8SNickeau Set $closureBusy, 1715*37748cd8SNickeau bool $collectPredicates, 1716*37748cd8SNickeau bool $fullCtx, 1717*37748cd8SNickeau int $depth, 1718*37748cd8SNickeau bool $treatEofAsEpsilon 1719*37748cd8SNickeau ) : void { 1720*37748cd8SNickeau $p = $config->state; 1721*37748cd8SNickeau 1722*37748cd8SNickeau // optimization 1723*37748cd8SNickeau if (!$p->onlyHasEpsilonTransitions()) { 1724*37748cd8SNickeau // make sure to not return here, because EOF transitions can act as 1725*37748cd8SNickeau // both epsilon transitions and non-epsilon transitions. 1726*37748cd8SNickeau 1727*37748cd8SNickeau $configs->add($config, $this->mergeCache); 1728*37748cd8SNickeau } 1729*37748cd8SNickeau 1730*37748cd8SNickeau foreach ($p->getTransitions() as $i => $t) { 1731*37748cd8SNickeau if ($i === 0 && $this->canDropLoopEntryEdgeInLeftRecursiveRule($config)) { 1732*37748cd8SNickeau continue; 1733*37748cd8SNickeau } 1734*37748cd8SNickeau 1735*37748cd8SNickeau $continueCollecting = $collectPredicates && !$t instanceof ActionTransition; 1736*37748cd8SNickeau $c = $this->getEpsilonTarget($config, $t, $continueCollecting, $depth === 0, $fullCtx, $treatEofAsEpsilon); 1737*37748cd8SNickeau 1738*37748cd8SNickeau if ($c !== null) { 1739*37748cd8SNickeau $newDepth = $depth; 1740*37748cd8SNickeau 1741*37748cd8SNickeau if ($config->state instanceof RuleStopState) { 1742*37748cd8SNickeau if ($fullCtx) { 1743*37748cd8SNickeau throw new \RuntimeException('Error.'); 1744*37748cd8SNickeau } 1745*37748cd8SNickeau 1746*37748cd8SNickeau // Target fell off end of rule; mark resulting c as having dipped into outer context 1747*37748cd8SNickeau // We can't get here if incoming config was rule stop and we had context 1748*37748cd8SNickeau // track how far we dip into outer context. Might 1749*37748cd8SNickeau // come in handy and we avoid evaluating context dependent 1750*37748cd8SNickeau // preds if this is > 0. 1751*37748cd8SNickeau 1752*37748cd8SNickeau if ($this->dfa && $this->dfa->isPrecedenceDfa()) { 1753*37748cd8SNickeau if ($t instanceof EpsilonTransition 1754*37748cd8SNickeau && $this->dfa->atnStartState !== null 1755*37748cd8SNickeau && $t->getOutermostPrecedenceReturn() === $this->dfa->atnStartState->ruleIndex) { 1756*37748cd8SNickeau $c->setPrecedenceFilterSuppressed(true); 1757*37748cd8SNickeau } 1758*37748cd8SNickeau } 1759*37748cd8SNickeau 1760*37748cd8SNickeau $c->reachesIntoOuterContext++; 1761*37748cd8SNickeau 1762*37748cd8SNickeau if (!$closureBusy->add($c)) { 1763*37748cd8SNickeau // avoid infinite recursion for right-recursive rules 1764*37748cd8SNickeau 1765*37748cd8SNickeau continue; 1766*37748cd8SNickeau } 1767*37748cd8SNickeau 1768*37748cd8SNickeau // TODO: can remove? only care when we add to set per middle of this method 1769*37748cd8SNickeau $configs->dipsIntoOuterContext = true; 1770*37748cd8SNickeau $newDepth--; 1771*37748cd8SNickeau 1772*37748cd8SNickeau if (self::$debug) { 1773*37748cd8SNickeau $this->log[] = \sprintf('dips into outer ctx: %s', $c); 1774*37748cd8SNickeau } 1775*37748cd8SNickeau } else { 1776*37748cd8SNickeau if (!$t->isEpsilon() && !$closureBusy->add($c)) { 1777*37748cd8SNickeau // avoid infinite recursion for EOF* and EOF+ 1778*37748cd8SNickeau 1779*37748cd8SNickeau continue; 1780*37748cd8SNickeau } 1781*37748cd8SNickeau 1782*37748cd8SNickeau if ($t instanceof RuleTransition) { 1783*37748cd8SNickeau // latch when newDepth goes negative - once we step out of the entry context we can't return 1784*37748cd8SNickeau 1785*37748cd8SNickeau if ($newDepth >= 0) { 1786*37748cd8SNickeau $newDepth++; 1787*37748cd8SNickeau } 1788*37748cd8SNickeau } 1789*37748cd8SNickeau } 1790*37748cd8SNickeau 1791*37748cd8SNickeau $this->closureCheckingStopState( 1792*37748cd8SNickeau $c, 1793*37748cd8SNickeau $configs, 1794*37748cd8SNickeau $closureBusy, 1795*37748cd8SNickeau $continueCollecting, 1796*37748cd8SNickeau $fullCtx, 1797*37748cd8SNickeau $newDepth, 1798*37748cd8SNickeau $treatEofAsEpsilon 1799*37748cd8SNickeau ); 1800*37748cd8SNickeau } 1801*37748cd8SNickeau } 1802*37748cd8SNickeau } 1803*37748cd8SNickeau 1804*37748cd8SNickeau /** 1805*37748cd8SNickeau * Implements first-edge (loop entry) elimination as an optimization 1806*37748cd8SNickeau * during closure operations. See antlr/antlr4#1398. 1807*37748cd8SNickeau * 1808*37748cd8SNickeau * The optimization is to avoid adding the loop entry config when 1809*37748cd8SNickeau * the exit path can only lead back to the same 1810*37748cd8SNickeau * StarLoopEntryState after popping context at the rule end state 1811*37748cd8SNickeau * (traversing only epsilon edges, so we're still in closure, in 1812*37748cd8SNickeau * this same rule). 1813*37748cd8SNickeau * 1814*37748cd8SNickeau * We need to detect any state that can reach loop entry on 1815*37748cd8SNickeau * epsilon w/o exiting rule. We don't have to look at FOLLOW 1816*37748cd8SNickeau * links, just ensure that all stack tops for config refer to key 1817*37748cd8SNickeau * states in LR rule. 1818*37748cd8SNickeau * 1819*37748cd8SNickeau * To verify we are in the right situation we must first check 1820*37748cd8SNickeau * closure is at a StarLoopEntryState generated during LR removal. 1821*37748cd8SNickeau * Then we check that each stack top of context is a return state 1822*37748cd8SNickeau * from one of these cases: 1823*37748cd8SNickeau * 1824*37748cd8SNickeau * 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state 1825*37748cd8SNickeau * 2. expr op expr. The return state is the block end of internal block of (...)* 1826*37748cd8SNickeau * 3. 'between' expr 'and' expr. The return state of 2nd expr reference. 1827*37748cd8SNickeau * That state points at block end of internal block of (...)*. 1828*37748cd8SNickeau * 4. expr '?' expr ':' expr. The return state points at block end, 1829*37748cd8SNickeau * which points at loop entry state. 1830*37748cd8SNickeau * 1831*37748cd8SNickeau * If any is true for each stack top, then closure does not add a 1832*37748cd8SNickeau * config to the current config set for edge[0], the loop entry branch. 1833*37748cd8SNickeau * 1834*37748cd8SNickeau * Conditions fail if any context for the current config is: 1835*37748cd8SNickeau * 1836*37748cd8SNickeau * a. empty (we'd fall out of expr to do a global FOLLOW which could 1837*37748cd8SNickeau * even be to some weird spot in expr) or, 1838*37748cd8SNickeau * b. lies outside of expr or, 1839*37748cd8SNickeau * c. lies within expr but at a state not the BlockEndState 1840*37748cd8SNickeau * generated during LR removal 1841*37748cd8SNickeau * 1842*37748cd8SNickeau * Do we need to evaluate predicates ever in closure for this case? 1843*37748cd8SNickeau * 1844*37748cd8SNickeau * No. Predicates, including precedence predicates, are only 1845*37748cd8SNickeau * evaluated when computing a DFA start state. I.e., only before 1846*37748cd8SNickeau * the lookahead (but not parser) consumes a token. 1847*37748cd8SNickeau * 1848*37748cd8SNickeau * There are no epsilon edges allowed in LR rule alt blocks or in 1849*37748cd8SNickeau * the "primary" part (ID here). If closure is in 1850*37748cd8SNickeau * StarLoopEntryState any lookahead operation will have consumed a 1851*37748cd8SNickeau * token as there are no epsilon-paths that lead to 1852*37748cd8SNickeau * StarLoopEntryState. We do not have to evaluate predicates 1853*37748cd8SNickeau * therefore if we are in the generated StarLoopEntryState of a LR 1854*37748cd8SNickeau * rule. Note that when making a prediction starting at that 1855*37748cd8SNickeau * decision point, decision d=2, compute-start-state performs 1856*37748cd8SNickeau * closure starting at edges[0], edges[1] emanating from 1857*37748cd8SNickeau * StarLoopEntryState. That means it is not performing closure on 1858*37748cd8SNickeau * StarLoopEntryState during compute-start-state. 1859*37748cd8SNickeau * 1860*37748cd8SNickeau * How do we know this always gives same prediction answer? 1861*37748cd8SNickeau * 1862*37748cd8SNickeau * Without predicates, loop entry and exit paths are ambiguous 1863*37748cd8SNickeau * upon remaining input +b (in, say, a+b). Either paths lead to 1864*37748cd8SNickeau * valid parses. Closure can lead to consuming + immediately or by 1865*37748cd8SNickeau * falling out of this call to expr back into expr and loop back 1866*37748cd8SNickeau * again to StarLoopEntryState to match +b. In this special case, 1867*37748cd8SNickeau * we choose the more efficient path, which is to take the bypass 1868*37748cd8SNickeau * path. 1869*37748cd8SNickeau * 1870*37748cd8SNickeau * The lookahead language has not changed because closure chooses 1871*37748cd8SNickeau * one path over the other. Both paths lead to consuming the same 1872*37748cd8SNickeau * remaining input during a lookahead operation. If the next token 1873*37748cd8SNickeau * is an operator, lookahead will enter the choice block with 1874*37748cd8SNickeau * operators. If it is not, lookahead will exit expr. Same as if 1875*37748cd8SNickeau * closure had chosen to enter the choice block immediately. 1876*37748cd8SNickeau * 1877*37748cd8SNickeau * Closure is examining one config (some loopentrystate, some alt, 1878*37748cd8SNickeau * context) which means it is considering exactly one alt. Closure 1879*37748cd8SNickeau * always copies the same alt to any derived configs. 1880*37748cd8SNickeau * 1881*37748cd8SNickeau * How do we know this optimization doesn't mess up precedence in 1882*37748cd8SNickeau * our parse trees? 1883*37748cd8SNickeau * 1884*37748cd8SNickeau * Looking through expr from left edge of stat only has to confirm 1885*37748cd8SNickeau * that an input, say, a+b+c; begins with any valid interpretation 1886*37748cd8SNickeau * of an expression. The precedence actually doesn't matter when 1887*37748cd8SNickeau * making a decision in stat seeing through expr. It is only when 1888*37748cd8SNickeau * parsing rule expr that we must use the precedence to get the 1889*37748cd8SNickeau * right interpretation and, hence, parse tree. 1890*37748cd8SNickeau * 1891*37748cd8SNickeau * @since 4.6 1892*37748cd8SNickeau */ 1893*37748cd8SNickeau protected function canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig $config) : bool 1894*37748cd8SNickeau { 1895*37748cd8SNickeau $p = $config->state; 1896*37748cd8SNickeau 1897*37748cd8SNickeau /* First check to see if we are in StarLoopEntryState generated during 1898*37748cd8SNickeau * left-recursion elimination. For efficiency, also check if 1899*37748cd8SNickeau * the context has an empty stack case. If so, it would mean 1900*37748cd8SNickeau * global FOLLOW so we can't perform optimization 1901*37748cd8SNickeau * Are we the special loop entry/exit state? or SLL wildcard 1902*37748cd8SNickeau */ 1903*37748cd8SNickeau 1904*37748cd8SNickeau if ($config->context === null) { 1905*37748cd8SNickeau throw new \RuntimeException('Prediction context cannot be null.'); 1906*37748cd8SNickeau } 1907*37748cd8SNickeau 1908*37748cd8SNickeau if ($p->getStateType() !== ATNState::STAR_LOOP_ENTRY 1909*37748cd8SNickeau || ($p instanceof StarLoopEntryState && !$p->isPrecedenceDecision) 1910*37748cd8SNickeau || $config->context->isEmpty() 1911*37748cd8SNickeau || $config->context->hasEmptyPath()) { 1912*37748cd8SNickeau return false; 1913*37748cd8SNickeau } 1914*37748cd8SNickeau 1915*37748cd8SNickeau // Require all return states to return back to the same rule that p is in. 1916*37748cd8SNickeau $numCtxs = $config->context->getLength(); 1917*37748cd8SNickeau 1918*37748cd8SNickeau for ($i = 0; $i < $numCtxs; $i++) { 1919*37748cd8SNickeau // For each stack context 1920*37748cd8SNickeau $returnState = $this->atn->states[$config->context->getReturnState($i)]; 1921*37748cd8SNickeau 1922*37748cd8SNickeau if ($returnState->ruleIndex !== $p->ruleIndex) { 1923*37748cd8SNickeau return false; 1924*37748cd8SNickeau } 1925*37748cd8SNickeau } 1926*37748cd8SNickeau 1927*37748cd8SNickeau $decisionStartState = $p->getTransition(0)->target; 1928*37748cd8SNickeau 1929*37748cd8SNickeau if (!$decisionStartState instanceof BlockStartState || $decisionStartState->endState === null) { 1930*37748cd8SNickeau throw new \RuntimeException('Unexpected transition type.'); 1931*37748cd8SNickeau } 1932*37748cd8SNickeau 1933*37748cd8SNickeau $blockEndStateNum = $decisionStartState->endState->stateNumber; 1934*37748cd8SNickeau $blockEndState = $this->atn->states[$blockEndStateNum]; 1935*37748cd8SNickeau 1936*37748cd8SNickeau if (!$blockEndState instanceof BlockEndState) { 1937*37748cd8SNickeau throw new \RuntimeException('Unexpected transition type.'); 1938*37748cd8SNickeau } 1939*37748cd8SNickeau 1940*37748cd8SNickeau // Verify that the top of each stack context leads to loop entry/exit 1941*37748cd8SNickeau // state through epsilon edges and w/o leaving rule. 1942*37748cd8SNickeau for ($i = 0; $i < $numCtxs; $i++) { 1943*37748cd8SNickeau // For each stack context 1944*37748cd8SNickeau 1945*37748cd8SNickeau $returnStateNumber = $config->context->getReturnState($i); 1946*37748cd8SNickeau $returnState = $this->atn->states[$returnStateNumber]; 1947*37748cd8SNickeau 1948*37748cd8SNickeau // All states must have single outgoing epsilon edge 1949*37748cd8SNickeau if ($returnState->getNumberOfTransitions() !== 1 || !$returnState->getTransition(0)->isEpsilon()) { 1950*37748cd8SNickeau return false; 1951*37748cd8SNickeau } 1952*37748cd8SNickeau 1953*37748cd8SNickeau // Look for prefix op case like 'not expr', (' type ')' expr 1954*37748cd8SNickeau $returnStateTarget = $returnState->getTransition(0)->target; 1955*37748cd8SNickeau 1956*37748cd8SNickeau if ($returnState->getStateType() === ATNState::BLOCK_END && $returnStateTarget->equals($p)) { 1957*37748cd8SNickeau continue; 1958*37748cd8SNickeau } 1959*37748cd8SNickeau 1960*37748cd8SNickeau // Look for 'expr op expr' or case where expr's return state is block end 1961*37748cd8SNickeau // of (...)* internal block; the block end points to loop back 1962*37748cd8SNickeau // which points to p but we don't need to check that 1963*37748cd8SNickeau if ($returnState->equals($blockEndState)) { 1964*37748cd8SNickeau continue; 1965*37748cd8SNickeau } 1966*37748cd8SNickeau 1967*37748cd8SNickeau // Look for ternary expr ? expr : expr. The return state points at block end, 1968*37748cd8SNickeau // which points at loop entry state 1969*37748cd8SNickeau if ($returnStateTarget->equals($blockEndState)) { 1970*37748cd8SNickeau continue; 1971*37748cd8SNickeau } 1972*37748cd8SNickeau 1973*37748cd8SNickeau // Look for complex prefix 'between expr and expr' case where 2nd expr's 1974*37748cd8SNickeau // return state points at block end state of (...)* internal block 1975*37748cd8SNickeau if ($returnStateTarget->getStateType() === ATNState::BLOCK_END 1976*37748cd8SNickeau && $returnStateTarget->getNumberOfTransitions() === 1 1977*37748cd8SNickeau && $returnStateTarget->getTransition(0)->isEpsilon() 1978*37748cd8SNickeau && $returnStateTarget->getTransition(0)->target->equals($p)) { 1979*37748cd8SNickeau continue; 1980*37748cd8SNickeau } 1981*37748cd8SNickeau 1982*37748cd8SNickeau // anything else ain't conforming 1983*37748cd8SNickeau return false; 1984*37748cd8SNickeau } 1985*37748cd8SNickeau 1986*37748cd8SNickeau return true; 1987*37748cd8SNickeau } 1988*37748cd8SNickeau 1989*37748cd8SNickeau public function getRuleName(int $index) : string 1990*37748cd8SNickeau { 1991*37748cd8SNickeau if ($this->parser !== null && $index >= 0) { 1992*37748cd8SNickeau return $this->parser->getRuleNames()[$index]; 1993*37748cd8SNickeau } 1994*37748cd8SNickeau 1995*37748cd8SNickeau return '<rule $index>'; 1996*37748cd8SNickeau } 1997*37748cd8SNickeau 1998*37748cd8SNickeau protected function getEpsilonTarget( 1999*37748cd8SNickeau ATNConfig $config, 2000*37748cd8SNickeau Transition $t, 2001*37748cd8SNickeau bool $collectPredicates, 2002*37748cd8SNickeau bool $inContext, 2003*37748cd8SNickeau bool $fullCtx, 2004*37748cd8SNickeau bool $treatEofAsEpsilon 2005*37748cd8SNickeau ) : ?ATNConfig { 2006*37748cd8SNickeau switch ($t->getSerializationType()) { 2007*37748cd8SNickeau case Transition::RULE: 2008*37748cd8SNickeau if (!$t instanceof RuleTransition) { 2009*37748cd8SNickeau throw new \RuntimeException('Unexpected transition type.'); 2010*37748cd8SNickeau } 2011*37748cd8SNickeau 2012*37748cd8SNickeau return $this->ruleTransition($config, $t); 2013*37748cd8SNickeau 2014*37748cd8SNickeau case Transition::PRECEDENCE: 2015*37748cd8SNickeau if (!$t instanceof PrecedencePredicateTransition) { 2016*37748cd8SNickeau throw new \RuntimeException('Unexpected transition type.'); 2017*37748cd8SNickeau } 2018*37748cd8SNickeau 2019*37748cd8SNickeau return $this->precedenceTransition($config, $t, $collectPredicates, $inContext, $fullCtx); 2020*37748cd8SNickeau 2021*37748cd8SNickeau case Transition::PREDICATE: 2022*37748cd8SNickeau if (!$t instanceof PredicateTransition) { 2023*37748cd8SNickeau throw new \RuntimeException('Unexpected transition type.'); 2024*37748cd8SNickeau } 2025*37748cd8SNickeau 2026*37748cd8SNickeau return $this->predTransition($config, $t, $collectPredicates, $inContext, $fullCtx); 2027*37748cd8SNickeau 2028*37748cd8SNickeau case Transition::ACTION: 2029*37748cd8SNickeau if (!$t instanceof ActionTransition) { 2030*37748cd8SNickeau throw new \RuntimeException('Unexpected transition type.'); 2031*37748cd8SNickeau } 2032*37748cd8SNickeau 2033*37748cd8SNickeau return $this->actionTransition($config, $t); 2034*37748cd8SNickeau 2035*37748cd8SNickeau case Transition::EPSILON: 2036*37748cd8SNickeau return new ATNConfig($config, $t->target); 2037*37748cd8SNickeau 2038*37748cd8SNickeau case Transition::ATOM: 2039*37748cd8SNickeau case Transition::RANGE: 2040*37748cd8SNickeau case Transition::SET: 2041*37748cd8SNickeau // EOF transitions act like epsilon transitions after the first EOF transition is traversed 2042*37748cd8SNickeau 2043*37748cd8SNickeau if ($treatEofAsEpsilon) { 2044*37748cd8SNickeau if ($t->matches(Token::EOF, 0, 1)) { 2045*37748cd8SNickeau return new ATNConfig($config, $t->target); 2046*37748cd8SNickeau } 2047*37748cd8SNickeau } 2048*37748cd8SNickeau 2049*37748cd8SNickeau return null; 2050*37748cd8SNickeau 2051*37748cd8SNickeau default: 2052*37748cd8SNickeau return null; 2053*37748cd8SNickeau } 2054*37748cd8SNickeau } 2055*37748cd8SNickeau 2056*37748cd8SNickeau protected function actionTransition(ATNConfig $config, ActionTransition $t) : ATNConfig 2057*37748cd8SNickeau { 2058*37748cd8SNickeau if (self::$debug) { 2059*37748cd8SNickeau $this->log[] = \sprintf('ACTION edge %d:%d', $t->ruleIndex, $t->actionIndex); 2060*37748cd8SNickeau } 2061*37748cd8SNickeau 2062*37748cd8SNickeau return new ATNConfig($config, $t->target); 2063*37748cd8SNickeau } 2064*37748cd8SNickeau 2065*37748cd8SNickeau public function precedenceTransition( 2066*37748cd8SNickeau ATNConfig $config, 2067*37748cd8SNickeau PrecedencePredicateTransition $pt, 2068*37748cd8SNickeau bool $collectPredicates, 2069*37748cd8SNickeau bool $inContext, 2070*37748cd8SNickeau bool $fullCtx 2071*37748cd8SNickeau ) : ?ATNConfig { 2072*37748cd8SNickeau if (self::$debug) { 2073*37748cd8SNickeau $this->log[] = \sprintf( 2074*37748cd8SNickeau 'PRED (collectPredicates=%s) %d>=_p, ctx dependent=true', 2075*37748cd8SNickeau $collectPredicates, 2076*37748cd8SNickeau $pt->precedence 2077*37748cd8SNickeau ); 2078*37748cd8SNickeau 2079*37748cd8SNickeau if ($this->parser !== null) { 2080*37748cd8SNickeau $this->log[] = \sprintf( 2081*37748cd8SNickeau 'context surrounding pred is [%s]', 2082*37748cd8SNickeau \implode(', ', $this->parser->getRuleInvocationStack()) 2083*37748cd8SNickeau ); 2084*37748cd8SNickeau } 2085*37748cd8SNickeau } 2086*37748cd8SNickeau 2087*37748cd8SNickeau $c = null; 2088*37748cd8SNickeau 2089*37748cd8SNickeau if ($collectPredicates && $inContext) { 2090*37748cd8SNickeau if ($fullCtx) { 2091*37748cd8SNickeau /* In full context mode, we can evaluate predicates on-the-fly 2092*37748cd8SNickeau * during closure, which dramatically reduces the size of 2093*37748cd8SNickeau * the config sets. It also obviates the need to test predicates 2094*37748cd8SNickeau * later during conflict resolution. 2095*37748cd8SNickeau */ 2096*37748cd8SNickeau 2097*37748cd8SNickeau $currentPosition = $this->input->getIndex(); 2098*37748cd8SNickeau 2099*37748cd8SNickeau $this->input->seek($this->startIndex); 2100*37748cd8SNickeau 2101*37748cd8SNickeau $predSucceeds = $this->outerContext !== null ? 2102*37748cd8SNickeau $pt->getPredicate()->eval($this->parser, $this->outerContext) : 2103*37748cd8SNickeau false; 2104*37748cd8SNickeau 2105*37748cd8SNickeau $this->input->seek($currentPosition); 2106*37748cd8SNickeau 2107*37748cd8SNickeau if ($predSucceeds) { 2108*37748cd8SNickeau $c = new ATNConfig($config, $pt->target);// no pred context 2109*37748cd8SNickeau } 2110*37748cd8SNickeau } else { 2111*37748cd8SNickeau $newSemCtx = SemanticContext::andContext($config->semanticContext, $pt->getPredicate()); 2112*37748cd8SNickeau $c = new ATNConfig($config, $pt->target, null, $newSemCtx); 2113*37748cd8SNickeau } 2114*37748cd8SNickeau } else { 2115*37748cd8SNickeau $c = new ATNConfig($config, $pt->target); 2116*37748cd8SNickeau } if (self::$debug) { 2117*37748cd8SNickeau $this->log[] = \sprintf('Config from pred transition=%s', (string) $c); 2118*37748cd8SNickeau } 2119*37748cd8SNickeau 2120*37748cd8SNickeau return $c; 2121*37748cd8SNickeau } 2122*37748cd8SNickeau 2123*37748cd8SNickeau protected function predTransition( 2124*37748cd8SNickeau ATNConfig $config, 2125*37748cd8SNickeau PredicateTransition $pt, 2126*37748cd8SNickeau bool $collectPredicates, 2127*37748cd8SNickeau bool $inContext, 2128*37748cd8SNickeau bool $fullCtx 2129*37748cd8SNickeau ) : ?ATNConfig { 2130*37748cd8SNickeau if (self::$debug) { 2131*37748cd8SNickeau $this->log[] = \sprintf( 2132*37748cd8SNickeau 'PRED (collectPredicates=%s) %d:%d, ctx dependent=%s', 2133*37748cd8SNickeau $collectPredicates, 2134*37748cd8SNickeau $pt->ruleIndex, 2135*37748cd8SNickeau $pt->predIndex, 2136*37748cd8SNickeau $pt->isCtxDependent 2137*37748cd8SNickeau ); 2138*37748cd8SNickeau 2139*37748cd8SNickeau if ($this->parser !== null) { 2140*37748cd8SNickeau $this->log[] = \sprintf( 2141*37748cd8SNickeau 'Context surrounding pred is [%s]', 2142*37748cd8SNickeau \implode(', ', $this->parser->getRuleInvocationStack()) 2143*37748cd8SNickeau ); 2144*37748cd8SNickeau } 2145*37748cd8SNickeau } 2146*37748cd8SNickeau 2147*37748cd8SNickeau $c = null; 2148*37748cd8SNickeau 2149*37748cd8SNickeau if ($collectPredicates && (!$pt->isCtxDependent || $inContext)) { 2150*37748cd8SNickeau if ($fullCtx) { 2151*37748cd8SNickeau // In full context mode, we can evaluate predicates on-the-fly 2152*37748cd8SNickeau // during closure, which dramatically reduces the size of 2153*37748cd8SNickeau // the config sets. It also obviates the need to test predicates 2154*37748cd8SNickeau // later during conflict resolution. 2155*37748cd8SNickeau 2156*37748cd8SNickeau $currentPosition = $this->input->getIndex(); 2157*37748cd8SNickeau 2158*37748cd8SNickeau $this->input->seek($this->startIndex); 2159*37748cd8SNickeau 2160*37748cd8SNickeau $predSucceeds = $this->outerContext !== null ? 2161*37748cd8SNickeau $pt->getPredicate()->eval($this->parser, $this->outerContext) : 2162*37748cd8SNickeau false; 2163*37748cd8SNickeau 2164*37748cd8SNickeau $this->input->seek($currentPosition); 2165*37748cd8SNickeau 2166*37748cd8SNickeau if ($predSucceeds) { 2167*37748cd8SNickeau $c = new ATNConfig($config, $pt->target);// no pred context 2168*37748cd8SNickeau } 2169*37748cd8SNickeau } else { 2170*37748cd8SNickeau $newSemCtx = SemanticContext::andContext($config->semanticContext, $pt->getPredicate()); 2171*37748cd8SNickeau $c = new ATNConfig($config, $pt->target, null, $newSemCtx); 2172*37748cd8SNickeau } 2173*37748cd8SNickeau } else { 2174*37748cd8SNickeau $c = new ATNConfig($config, $pt->target); 2175*37748cd8SNickeau } 2176*37748cd8SNickeau 2177*37748cd8SNickeau if (self::$debug) { 2178*37748cd8SNickeau $this->log[] = \sprintf('Config from pred transition=%s', (string) $c); 2179*37748cd8SNickeau } 2180*37748cd8SNickeau 2181*37748cd8SNickeau return $c; 2182*37748cd8SNickeau } 2183*37748cd8SNickeau 2184*37748cd8SNickeau protected function ruleTransition(ATNConfig $config, RuleTransition $t) : ATNConfig 2185*37748cd8SNickeau { 2186*37748cd8SNickeau if (self::$debug) { 2187*37748cd8SNickeau $this->log[] = \sprintf( 2188*37748cd8SNickeau 'CALL rule %s, ctx=%s', 2189*37748cd8SNickeau $this->getRuleName($t->target->ruleIndex), 2190*37748cd8SNickeau $config->context 2191*37748cd8SNickeau ); 2192*37748cd8SNickeau } 2193*37748cd8SNickeau 2194*37748cd8SNickeau $returnState = $t->followState; 2195*37748cd8SNickeau $newContext = SingletonPredictionContext::create($config->context, $returnState->stateNumber); 2196*37748cd8SNickeau 2197*37748cd8SNickeau return new ATNConfig($config, $t->target, $newContext); 2198*37748cd8SNickeau } 2199*37748cd8SNickeau 2200*37748cd8SNickeau /** 2201*37748cd8SNickeau * Gets a {@see BitSet} containing the alternatives in `configs` 2202*37748cd8SNickeau * which are part of one or more conflicting alternative subsets. 2203*37748cd8SNickeau * 2204*37748cd8SNickeau * @param ATNConfigSet $configs The {@see ATNConfigSet} to analyze. 2205*37748cd8SNickeau * 2206*37748cd8SNickeau * @return BitSet The alternatives in `configs` which are part of one or 2207*37748cd8SNickeau * more conflicting alternative subsets. If `configs` does 2208*37748cd8SNickeau * not contain any conflicting subsets, this method returns 2209*37748cd8SNickeau * an empty {@see BitSet}. 2210*37748cd8SNickeau */ 2211*37748cd8SNickeau protected function getConflictingAlts(ATNConfigSet $configs) : BitSet 2212*37748cd8SNickeau { 2213*37748cd8SNickeau $altsets = PredictionMode::getConflictingAltSubsets($configs); 2214*37748cd8SNickeau 2215*37748cd8SNickeau return PredictionMode::getAlts($altsets); 2216*37748cd8SNickeau } 2217*37748cd8SNickeau 2218*37748cd8SNickeau /** 2219*37748cd8SNickeau Sam pointed out a problem with the previous definition, v3, of 2220*37748cd8SNickeau ambiguous states. If we have another state associated with conflicting 2221*37748cd8SNickeau alternatives, we should keep going. For example, the following grammar 2222*37748cd8SNickeau 2223*37748cd8SNickeau s : (ID | ID ID?) ';' ; 2224*37748cd8SNickeau 2225*37748cd8SNickeau When the ATN simulation reaches the state before ';', it has a DFA 2226*37748cd8SNickeau state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally 2227*37748cd8SNickeau 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node 2228*37748cd8SNickeau because alternative to has another way to continue, via [6|2|[]]. 2229*37748cd8SNickeau The key is that we have a single state that has config's only associated 2230*37748cd8SNickeau with a single alternative, 2, and crucially the state transitions 2231*37748cd8SNickeau among the configurations are all non-epsilon transitions. That means 2232*37748cd8SNickeau we don't consider any conflicts that include alternative 2. So, we 2233*37748cd8SNickeau ignore the conflict between alts 1 and 2. We ignore a set of 2234*37748cd8SNickeau conflicting alts when there is an intersection with an alternative 2235*37748cd8SNickeau associated with a single alt state in the state→config-list map. 2236*37748cd8SNickeau 2237*37748cd8SNickeau It's also the case that we might have two conflicting configurations but 2238*37748cd8SNickeau also a 3rd nonconflicting configuration for a different alternative: 2239*37748cd8SNickeau [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: 2240*37748cd8SNickeau 2241*37748cd8SNickeau a : A | A | A B ; 2242*37748cd8SNickeau 2243*37748cd8SNickeau After matching input A, we reach the stop state for rule A, state 1. 2244*37748cd8SNickeau State 8 is the state right before B. Clearly alternatives 1 and 2 2245*37748cd8SNickeau conflict and no amount of further lookahead will separate the two. 2246*37748cd8SNickeau However, alternative 3 will be able to continue and so we do not 2247*37748cd8SNickeau stop working on this state. In the previous example, we're concerned 2248*37748cd8SNickeau with states associated with the conflicting alternatives. Here alt 2249*37748cd8SNickeau 3 is not associated with the conflicting configs, but since we can continue 2250*37748cd8SNickeau looking for input reasonably, I don't declare the state done. We 2251*37748cd8SNickeau ignore a set of conflicting alts when we have an alternative 2252*37748cd8SNickeau that we still need to pursue. 2253*37748cd8SNickeau */ 2254*37748cd8SNickeau protected function getConflictingAltsOrUniqueAlt(ATNConfigSet $configs) : ?BitSet 2255*37748cd8SNickeau { 2256*37748cd8SNickeau if ($configs->uniqueAlt !== ATN::INVALID_ALT_NUMBER) { 2257*37748cd8SNickeau $conflictingAlts = new BitSet(); 2258*37748cd8SNickeau 2259*37748cd8SNickeau $conflictingAlts->add($configs->uniqueAlt); 2260*37748cd8SNickeau 2261*37748cd8SNickeau return $conflictingAlts; 2262*37748cd8SNickeau } 2263*37748cd8SNickeau 2264*37748cd8SNickeau return $configs->getConflictingAlts(); 2265*37748cd8SNickeau } 2266*37748cd8SNickeau 2267*37748cd8SNickeau public function getTokenName(int $t) : string 2268*37748cd8SNickeau { 2269*37748cd8SNickeau if ($t === Token::EOF) { 2270*37748cd8SNickeau return 'EOF'; 2271*37748cd8SNickeau } 2272*37748cd8SNickeau 2273*37748cd8SNickeau $vocabulary = $this->parser !== null ? $this->parser->getVocabulary() : VocabularyImpl::emptyVocabulary(); 2274*37748cd8SNickeau $displayName = $vocabulary->getDisplayName($t); 2275*37748cd8SNickeau 2276*37748cd8SNickeau if ($displayName === (string) $t) { 2277*37748cd8SNickeau return $displayName; 2278*37748cd8SNickeau } 2279*37748cd8SNickeau 2280*37748cd8SNickeau return \sprintf('%s<%d>', $displayName, $t); 2281*37748cd8SNickeau } 2282*37748cd8SNickeau 2283*37748cd8SNickeau public function getLookaheadName(TokenStream $input) : string 2284*37748cd8SNickeau { 2285*37748cd8SNickeau return $this->getTokenName($input->LA(1)); 2286*37748cd8SNickeau } 2287*37748cd8SNickeau 2288*37748cd8SNickeau protected function noViableAlt( 2289*37748cd8SNickeau TokenStream $input, 2290*37748cd8SNickeau $outerContext, 2291*37748cd8SNickeau ?ATNConfigSet $configs, 2292*37748cd8SNickeau int $startIndex 2293*37748cd8SNickeau ) : NoViableAltException { 2294*37748cd8SNickeau return new NoViableAltException( 2295*37748cd8SNickeau $this->parser, 2296*37748cd8SNickeau $input, 2297*37748cd8SNickeau $input->get($startIndex), 2298*37748cd8SNickeau $input->LT(1), 2299*37748cd8SNickeau $configs, 2300*37748cd8SNickeau $outerContext 2301*37748cd8SNickeau ); 2302*37748cd8SNickeau } 2303*37748cd8SNickeau 2304*37748cd8SNickeau protected static function getUniqueAlt(ATNConfigSet $configs) : int 2305*37748cd8SNickeau { 2306*37748cd8SNickeau $alt = ATN::INVALID_ALT_NUMBER; 2307*37748cd8SNickeau 2308*37748cd8SNickeau foreach ($configs->elements() as $c) { 2309*37748cd8SNickeau if ($alt === ATN::INVALID_ALT_NUMBER) { 2310*37748cd8SNickeau $alt = $c->alt; // found first alt 2311*37748cd8SNickeau } elseif ($c->alt !== $alt) { 2312*37748cd8SNickeau return ATN::INVALID_ALT_NUMBER; 2313*37748cd8SNickeau } 2314*37748cd8SNickeau } 2315*37748cd8SNickeau 2316*37748cd8SNickeau return $alt; 2317*37748cd8SNickeau } 2318*37748cd8SNickeau 2319*37748cd8SNickeau /** 2320*37748cd8SNickeau * Add an edge to the DFA, if possible. This method calls 2321*37748cd8SNickeau * {@see ParserATNSimulator::addDFAState()} to ensure the `to` state is 2322*37748cd8SNickeau * present in the DFA. If `from` is `null`, or if `t` is outside the 2323*37748cd8SNickeau * range of edges that can be represented in the DFA tables, this method 2324*37748cd8SNickeau * returns without adding the edge to the DFA. 2325*37748cd8SNickeau * 2326*37748cd8SNickeau * If `to` is `null`, this method returns `null`. Otherwise, this method 2327*37748cd8SNickeau * returns the {@see DFAState} returned by calling 2328*37748cd8SNickeau * {@see ParserATNSimulator::addDFAState()} for the `to` state. 2329*37748cd8SNickeau * 2330*37748cd8SNickeau * @param DFA $dfa The DFA 2331*37748cd8SNickeau * @param DFAState|null $from The source state for the edge 2332*37748cd8SNickeau * @param int $t The input symbol 2333*37748cd8SNickeau * @param DFAState|null $to The target state for the edge 2334*37748cd8SNickeau * 2335*37748cd8SNickeau * @return DFAState If `to` is `null` this method returns `null`, 2336*37748cd8SNickeau * otherwise this method returns the result of calling 2337*37748cd8SNickeau * {@see ParserATNSimulator::addDFAState()} on `to`. 2338*37748cd8SNickeau */ 2339*37748cd8SNickeau protected function addDFAEdge(DFA $dfa, ?DFAState $from, int $t, ?DFAState $to) : ?DFAState 2340*37748cd8SNickeau { 2341*37748cd8SNickeau if (self::$debug) { 2342*37748cd8SNickeau $this->log[] = \sprintf('EDGE %s -> %s upon %s', (string) $from, (string) $to, $this->getTokenName($t)); 2343*37748cd8SNickeau } 2344*37748cd8SNickeau 2345*37748cd8SNickeau if ($to === null) { 2346*37748cd8SNickeau return null; 2347*37748cd8SNickeau } 2348*37748cd8SNickeau 2349*37748cd8SNickeau $to = $this->addDFAState($dfa, $to);// used existing if possible not incoming 2350*37748cd8SNickeau 2351*37748cd8SNickeau if ($from === null || $t < -1 || $t > $this->atn->maxTokenType) { 2352*37748cd8SNickeau return $to; 2353*37748cd8SNickeau } 2354*37748cd8SNickeau 2355*37748cd8SNickeau if ($from->edges === null) { 2356*37748cd8SNickeau $from->edges = new \SplFixedArray($this->atn->maxTokenType + 1 + 1); 2357*37748cd8SNickeau } 2358*37748cd8SNickeau 2359*37748cd8SNickeau $from->edges[$t + 1] = $to; 2360*37748cd8SNickeau 2361*37748cd8SNickeau if (self::$debug) { 2362*37748cd8SNickeau $this->log[] = 'DFA =' . \PHP_EOL . $dfa->toString($this->parser->getVocabulary()); 2363*37748cd8SNickeau } 2364*37748cd8SNickeau 2365*37748cd8SNickeau return $to; 2366*37748cd8SNickeau } 2367*37748cd8SNickeau 2368*37748cd8SNickeau /** 2369*37748cd8SNickeau * Add state `D` to the DFA if it is not already present, and return 2370*37748cd8SNickeau * the actual instance stored in the DFA. If a state equivalent to `D` 2371*37748cd8SNickeau * is already in the DFA, the existing state is returned. Otherwise this 2372*37748cd8SNickeau * method returns `D` after adding it to the DFA. 2373*37748cd8SNickeau * 2374*37748cd8SNickeau * If `D` is {@see ParserATNSimulator::error()}, this method returns 2375*37748cd8SNickeau * {@see ParserATNSimulator::error()} and does not change the DFA. 2376*37748cd8SNickeau * 2377*37748cd8SNickeau * @param DFA $dfa The dfa 2378*37748cd8SNickeau * @param DFAState $D The DFA state to add 2379*37748cd8SNickeau * 2380*37748cd8SNickeau * @return DFAState The state stored in the DFA. This will be either 2381*37748cd8SNickeau * the existing state if `D` is already in the DFA, or `D` 2382*37748cd8SNickeau * itself if the state was not already present. 2383*37748cd8SNickeau * 2384*37748cd8SNickeau * @throws \InvalidArgumentException 2385*37748cd8SNickeau */ 2386*37748cd8SNickeau protected function addDFAState(DFA $dfa, DFAState $D) : DFAState 2387*37748cd8SNickeau { 2388*37748cd8SNickeau if ($D === self::error()) { 2389*37748cd8SNickeau return $D; 2390*37748cd8SNickeau } 2391*37748cd8SNickeau 2392*37748cd8SNickeau $existing = $dfa->states->get($D); 2393*37748cd8SNickeau 2394*37748cd8SNickeau if ($existing !== null && $existing instanceof DFAState) { 2395*37748cd8SNickeau return $existing; 2396*37748cd8SNickeau } 2397*37748cd8SNickeau 2398*37748cd8SNickeau $D->stateNumber = $dfa->states->count(); 2399*37748cd8SNickeau 2400*37748cd8SNickeau if (!$D->configs->isReadOnly()) { 2401*37748cd8SNickeau $D->configs->optimizeConfigs($this); 2402*37748cd8SNickeau $D->configs->setReadonly(true); 2403*37748cd8SNickeau } 2404*37748cd8SNickeau 2405*37748cd8SNickeau $dfa->states->add($D); 2406*37748cd8SNickeau 2407*37748cd8SNickeau if (self::$debug) { 2408*37748cd8SNickeau $this->log[] = \sprintf('Adding new DFA state: %s', (string) $D); 2409*37748cd8SNickeau } 2410*37748cd8SNickeau 2411*37748cd8SNickeau return $D; 2412*37748cd8SNickeau } 2413*37748cd8SNickeau 2414*37748cd8SNickeau protected function reportAttemptingFullContext( 2415*37748cd8SNickeau DFA $dfa, 2416*37748cd8SNickeau ?BitSet $conflictingAlts, 2417*37748cd8SNickeau ATNConfigSet $configs, 2418*37748cd8SNickeau int $startIndex, 2419*37748cd8SNickeau int $stopIndex 2420*37748cd8SNickeau ) : void { 2421*37748cd8SNickeau if (self::$debug || self::$retry_debug) { 2422*37748cd8SNickeau $interval = new Interval($startIndex, $stopIndex); 2423*37748cd8SNickeau $tokenStream = $this->parser->getTokenStream(); 2424*37748cd8SNickeau 2425*37748cd8SNickeau $this->log[] = \sprintf( 2426*37748cd8SNickeau 'reportAttemptingFullContext decision = %d:%s, input = %s', 2427*37748cd8SNickeau $dfa->decision, 2428*37748cd8SNickeau (string) $configs, 2429*37748cd8SNickeau $tokenStream === null ? '' : $tokenStream->getTextByInterval($interval) 2430*37748cd8SNickeau ); 2431*37748cd8SNickeau } 2432*37748cd8SNickeau 2433*37748cd8SNickeau if ($this->parser !== null) { 2434*37748cd8SNickeau $this->parser->getErrorListenerDispatch()->reportAttemptingFullContext( 2435*37748cd8SNickeau $this->parser, 2436*37748cd8SNickeau $dfa, 2437*37748cd8SNickeau $startIndex, 2438*37748cd8SNickeau $stopIndex, 2439*37748cd8SNickeau $conflictingAlts, 2440*37748cd8SNickeau $configs 2441*37748cd8SNickeau ); 2442*37748cd8SNickeau } 2443*37748cd8SNickeau } 2444*37748cd8SNickeau 2445*37748cd8SNickeau protected function reportContextSensitivity( 2446*37748cd8SNickeau DFA $dfa, 2447*37748cd8SNickeau int $prediction, 2448*37748cd8SNickeau ATNConfigSet $configs, 2449*37748cd8SNickeau int $startIndex, 2450*37748cd8SNickeau int $stopIndex 2451*37748cd8SNickeau ) : void { 2452*37748cd8SNickeau if (self::$debug || self::$retry_debug) { 2453*37748cd8SNickeau $interval = new Interval($startIndex, $stopIndex); 2454*37748cd8SNickeau $tokenStream = $this->parser->getTokenStream(); 2455*37748cd8SNickeau 2456*37748cd8SNickeau $this->log[] = \sprintf( 2457*37748cd8SNickeau 'reportContextSensitivity decision = %d:%s, input = %s', 2458*37748cd8SNickeau $dfa->decision, 2459*37748cd8SNickeau (string) $configs, 2460*37748cd8SNickeau $tokenStream === null ? '' : $tokenStream->getTextByInterval($interval) 2461*37748cd8SNickeau ); 2462*37748cd8SNickeau } 2463*37748cd8SNickeau 2464*37748cd8SNickeau if ($this->parser !== null) { 2465*37748cd8SNickeau $this->parser->getErrorListenerDispatch()->reportContextSensitivity( 2466*37748cd8SNickeau $this->parser, 2467*37748cd8SNickeau $dfa, 2468*37748cd8SNickeau $startIndex, 2469*37748cd8SNickeau $stopIndex, 2470*37748cd8SNickeau $prediction, 2471*37748cd8SNickeau $configs 2472*37748cd8SNickeau ); 2473*37748cd8SNickeau } 2474*37748cd8SNickeau } 2475*37748cd8SNickeau 2476*37748cd8SNickeau /** 2477*37748cd8SNickeau * If context sensitive parsing, we know it's ambiguity not conflict. 2478*37748cd8SNickeau */ 2479*37748cd8SNickeau protected function reportAmbiguity( 2480*37748cd8SNickeau DFA $dfa, 2481*37748cd8SNickeau DFAState $D, 2482*37748cd8SNickeau int $startIndex, 2483*37748cd8SNickeau int $stopIndex, 2484*37748cd8SNickeau bool $exact, 2485*37748cd8SNickeau ?BitSet $ambigAlts, 2486*37748cd8SNickeau ATNConfigSet $configs 2487*37748cd8SNickeau ) : void { 2488*37748cd8SNickeau if (self::$debug || self::$retry_debug) { 2489*37748cd8SNickeau $interval = new Interval($startIndex, $stopIndex); 2490*37748cd8SNickeau $tokenStream = $this->parser->getTokenStream(); 2491*37748cd8SNickeau 2492*37748cd8SNickeau $this->log[] = \sprintf( 2493*37748cd8SNickeau 'reportAmbiguity %s:%s, input = %s', 2494*37748cd8SNickeau (string) $ambigAlts, 2495*37748cd8SNickeau (string) $configs, 2496*37748cd8SNickeau $tokenStream === null ? '' : $tokenStream->getTextByInterval($interval) 2497*37748cd8SNickeau ); 2498*37748cd8SNickeau } 2499*37748cd8SNickeau 2500*37748cd8SNickeau if ($this->parser !== null) { 2501*37748cd8SNickeau $this->parser->getErrorListenerDispatch()->reportAmbiguity( 2502*37748cd8SNickeau $this->parser, 2503*37748cd8SNickeau $dfa, 2504*37748cd8SNickeau $startIndex, 2505*37748cd8SNickeau $stopIndex, 2506*37748cd8SNickeau $exact, 2507*37748cd8SNickeau $ambigAlts, 2508*37748cd8SNickeau $configs 2509*37748cd8SNickeau ); 2510*37748cd8SNickeau } 2511*37748cd8SNickeau } 2512*37748cd8SNickeau 2513*37748cd8SNickeau public function setPredictionMode(int $mode) : void 2514*37748cd8SNickeau { 2515*37748cd8SNickeau $this->mode = $mode; 2516*37748cd8SNickeau } 2517*37748cd8SNickeau 2518*37748cd8SNickeau public function getPredictionMode() : int 2519*37748cd8SNickeau { 2520*37748cd8SNickeau return $this->mode; 2521*37748cd8SNickeau } 2522*37748cd8SNickeau 2523*37748cd8SNickeau public function getParser() : Parser 2524*37748cd8SNickeau { 2525*37748cd8SNickeau return $this->parser; 2526*37748cd8SNickeau } 2527*37748cd8SNickeau} 2528