1*37748cd8SNickeau<?php 2*37748cd8SNickeau 3*37748cd8SNickeaudeclare(strict_types=1); 4*37748cd8SNickeau 5*37748cd8SNickeaunamespace Antlr\Antlr4\Runtime\Atn; 6*37748cd8SNickeau 7*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Actions\LexerAction; 8*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Actions\LexerIndexedCustomAction; 9*37748cd8SNickeauuse Antlr\Antlr4\Runtime\CharStream; 10*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Equality; 11*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Equatable; 12*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Hasher; 13*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Lexer; 14*37748cd8SNickeau 15*37748cd8SNickeau/** 16*37748cd8SNickeau * Represents an executor for a sequence of lexer actions which traversed during 17*37748cd8SNickeau * the matching operation of a lexer rule (token). 18*37748cd8SNickeau * 19*37748cd8SNickeau * The executor tracks position information for position-dependent lexer actions 20*37748cd8SNickeau * efficiently, ensuring that actions appearing only at the end of the rule do 21*37748cd8SNickeau * not cause bloating of the {@see DFA} created for the lexer. 22*37748cd8SNickeau * 23*37748cd8SNickeau * @author Sam Harwell 24*37748cd8SNickeau */ 25*37748cd8SNickeaufinal class LexerActionExecutor implements Equatable 26*37748cd8SNickeau{ 27*37748cd8SNickeau /** @var array<LexerAction> */ 28*37748cd8SNickeau private $lexerActions; 29*37748cd8SNickeau 30*37748cd8SNickeau /** 31*37748cd8SNickeau * Caches the result of {@see LexerActionExecutor::hashCode()} since 32*37748cd8SNickeau * the hash code is an element of the performance-critical 33*37748cd8SNickeau * {@see LexerATNConfig::hashCode()} operation. 34*37748cd8SNickeau * 35*37748cd8SNickeau * @var int|null 36*37748cd8SNickeau */ 37*37748cd8SNickeau private $cachedHashCode; 38*37748cd8SNickeau 39*37748cd8SNickeau /** 40*37748cd8SNickeau * @param array<LexerAction> $lexerActions 41*37748cd8SNickeau */ 42*37748cd8SNickeau public function __construct(array $lexerActions) 43*37748cd8SNickeau { 44*37748cd8SNickeau $this->lexerActions = $lexerActions; 45*37748cd8SNickeau } 46*37748cd8SNickeau 47*37748cd8SNickeau /** 48*37748cd8SNickeau * Creates a {@see LexerActionExecutor} which executes the actions for 49*37748cd8SNickeau * the input `lexerActionExecutor` followed by a specified `lexerAction`. 50*37748cd8SNickeau * 51*37748cd8SNickeau * @param LexerActionExecutor|null $lexerActionExecutor The executor for actions 52*37748cd8SNickeau * already traversed by 53*37748cd8SNickeau * the lexer while matching 54*37748cd8SNickeau * a token within a particular 55*37748cd8SNickeau * {@see LexerATNConfig}. 56*37748cd8SNickeau * If this is `null`, 57*37748cd8SNickeau * the method behaves as 58*37748cd8SNickeau * though it were an 59*37748cd8SNickeau * empty executor. 60*37748cd8SNickeau * @param LexerAction $lexerAction The lexer action to 61*37748cd8SNickeau * execute after the 62*37748cd8SNickeau * actions specified in 63*37748cd8SNickeau * `lexerActionExecutor`. 64*37748cd8SNickeau * 65*37748cd8SNickeau * @return self A {@see LexerActionExecutor} for executing the combine actions 66*37748cd8SNickeau * of `lexerActionExecutor` and `lexerAction`. 67*37748cd8SNickeau */ 68*37748cd8SNickeau public static function append( 69*37748cd8SNickeau ?LexerActionExecutor $lexerActionExecutor, 70*37748cd8SNickeau LexerAction $lexerAction 71*37748cd8SNickeau ) : self { 72*37748cd8SNickeau if ($lexerActionExecutor === null) { 73*37748cd8SNickeau return new LexerActionExecutor([$lexerAction]); 74*37748cd8SNickeau } 75*37748cd8SNickeau 76*37748cd8SNickeau $lexerActions = \array_merge($lexerActionExecutor->lexerActions, [$lexerAction]); 77*37748cd8SNickeau 78*37748cd8SNickeau return new LexerActionExecutor($lexerActions); 79*37748cd8SNickeau } 80*37748cd8SNickeau 81*37748cd8SNickeau /** 82*37748cd8SNickeau * Creates a {@see LexerActionExecutor} which encodes the current offset 83*37748cd8SNickeau * for position-dependent lexer actions. 84*37748cd8SNickeau * 85*37748cd8SNickeau * Normally, when the executor encounters lexer actions where 86*37748cd8SNickeau * {@see LexerAction::isPositionDependent()} returns `true`, it calls 87*37748cd8SNickeau * {@see IntStream::seek()} on the input {@see CharStream} to set the input 88*37748cd8SNickeau * position to the <em>end</em> of the current token. This behavior provides 89*37748cd8SNickeau * for efficient DFA representation of lexer actions which appear at the end 90*37748cd8SNickeau * of a lexer rule, even when the lexer rule matches a variable number of 91*37748cd8SNickeau * characters. 92*37748cd8SNickeau * 93*37748cd8SNickeau * Prior to traversing a match transition in the ATN, the current offset 94*37748cd8SNickeau * from the token start index is assigned to all position-dependent lexer 95*37748cd8SNickeau * actions which have not already been assigned a fixed offset. By storing 96*37748cd8SNickeau * the offsets relative to the token start index, the DFA representation of 97*37748cd8SNickeau * lexer actions which appear in the middle of tokens remains efficient due 98*37748cd8SNickeau * to sharing among tokens of the same length, regardless of their absolute 99*37748cd8SNickeau * position in the input stream. 100*37748cd8SNickeau * 101*37748cd8SNickeau * If the current executor already has offsets assigned to all 102*37748cd8SNickeau * position-dependent lexer actions, the method returns `this`. 103*37748cd8SNickeau * 104*37748cd8SNickeau * @param int $offset The current offset to assign to all position-dependent 105*37748cd8SNickeau * lexer actions which do not already have offsets assigned. 106*37748cd8SNickeau * 107*37748cd8SNickeau * @return self A {@see LexerActionExecutor} which stores input stream offsets 108*37748cd8SNickeau * for all position-dependent lexer actions. 109*37748cd8SNickeau */ 110*37748cd8SNickeau public function fixOffsetBeforeMatch(int $offset) : self 111*37748cd8SNickeau { 112*37748cd8SNickeau $updatedLexerActions = null; 113*37748cd8SNickeau 114*37748cd8SNickeau for ($i = 0, $count = \count($this->lexerActions); $i < $count; $i++) { 115*37748cd8SNickeau if ($this->lexerActions[$i]->isPositionDependent() 116*37748cd8SNickeau && !$this->lexerActions[$i] instanceof LexerIndexedCustomAction) { 117*37748cd8SNickeau if ($updatedLexerActions === null) { 118*37748cd8SNickeau $updatedLexerActions = \array_merge($this->lexerActions, []); 119*37748cd8SNickeau } 120*37748cd8SNickeau 121*37748cd8SNickeau $updatedLexerActions[$i] = new LexerIndexedCustomAction($offset, $this->lexerActions[$i]); 122*37748cd8SNickeau } 123*37748cd8SNickeau } 124*37748cd8SNickeau 125*37748cd8SNickeau if ($updatedLexerActions === null) { 126*37748cd8SNickeau return $this; 127*37748cd8SNickeau } 128*37748cd8SNickeau 129*37748cd8SNickeau return new LexerActionExecutor($updatedLexerActions); 130*37748cd8SNickeau } 131*37748cd8SNickeau 132*37748cd8SNickeau /** 133*37748cd8SNickeau * Gets the lexer actions to be executed by this executor. 134*37748cd8SNickeau * 135*37748cd8SNickeau * @return array<LexerAction> The lexer actions to be executed by this executor. 136*37748cd8SNickeau */ 137*37748cd8SNickeau public function getLexerActions() : array 138*37748cd8SNickeau { 139*37748cd8SNickeau return $this->lexerActions; 140*37748cd8SNickeau } 141*37748cd8SNickeau 142*37748cd8SNickeau /** 143*37748cd8SNickeau * Execute the actions encapsulated by this executor within the context of a 144*37748cd8SNickeau * particular {@see Lexer}. 145*37748cd8SNickeau * 146*37748cd8SNickeau * This method calls {@see IntStream::seek()} to set the position of the 147*37748cd8SNickeau * `input` {@see CharStream} prior to calling {@see LexerAction::execute()} 148*37748cd8SNickeau * on a position-dependent action. Before the method returns, the input 149*37748cd8SNickeau * position will be restored to the same position it was in when the method 150*37748cd8SNickeau * was invoked. 151*37748cd8SNickeau * 152*37748cd8SNickeau * @param Lexer $lexer The lexer instance. 153*37748cd8SNickeau * @param CharStream $input The input stream which is the source for 154*37748cd8SNickeau * the current token. When this method is called, 155*37748cd8SNickeau * the current {@see IntStream::getIndex()} for 156*37748cd8SNickeau * `input` should be the start of the following 157*37748cd8SNickeau * token, i.e. 1 character past the end of the 158*37748cd8SNickeau * current token. 159*37748cd8SNickeau * @param int $startIndex The token start index. This value may be 160*37748cd8SNickeau * passed to {@see IntStream::seek()} to set 161*37748cd8SNickeau * the `input` position to the beginning 162*37748cd8SNickeau * of the token. 163*37748cd8SNickeau */ 164*37748cd8SNickeau public function execute(Lexer $lexer, CharStream $input, int $startIndex) : void 165*37748cd8SNickeau { 166*37748cd8SNickeau $requiresSeek = false; 167*37748cd8SNickeau $stopIndex = $input->getIndex(); 168*37748cd8SNickeau 169*37748cd8SNickeau try { 170*37748cd8SNickeau foreach ($this->lexerActions as $lexerAction) { 171*37748cd8SNickeau if ($lexerAction instanceof LexerIndexedCustomAction) { 172*37748cd8SNickeau $offset = $lexerAction->getOffset(); 173*37748cd8SNickeau $input->seek($startIndex + $offset); 174*37748cd8SNickeau $lexerAction = $lexerAction->getAction(); 175*37748cd8SNickeau $requiresSeek = $startIndex + $offset !== $stopIndex; 176*37748cd8SNickeau } elseif ($lexerAction->isPositionDependent()) { 177*37748cd8SNickeau $input->seek($stopIndex); 178*37748cd8SNickeau $requiresSeek = false; 179*37748cd8SNickeau } 180*37748cd8SNickeau 181*37748cd8SNickeau $lexerAction->execute($lexer); 182*37748cd8SNickeau } 183*37748cd8SNickeau } finally { 184*37748cd8SNickeau if ($requiresSeek) { 185*37748cd8SNickeau $input->seek($stopIndex); 186*37748cd8SNickeau } 187*37748cd8SNickeau } 188*37748cd8SNickeau } 189*37748cd8SNickeau 190*37748cd8SNickeau public function hashCode() : int 191*37748cd8SNickeau { 192*37748cd8SNickeau if ($this->cachedHashCode === null) { 193*37748cd8SNickeau $this->cachedHashCode = Hasher::hash($this->lexerActions); 194*37748cd8SNickeau } 195*37748cd8SNickeau 196*37748cd8SNickeau return $this->cachedHashCode; 197*37748cd8SNickeau } 198*37748cd8SNickeau 199*37748cd8SNickeau public function equals(object $other) : bool 200*37748cd8SNickeau { 201*37748cd8SNickeau if ($this === $other) { 202*37748cd8SNickeau return true; 203*37748cd8SNickeau } 204*37748cd8SNickeau 205*37748cd8SNickeau return $other instanceof self 206*37748cd8SNickeau && $this->hashCode() === $other->hashCode() 207*37748cd8SNickeau && Equality::equals($this->lexerActions, $other->lexerActions); 208*37748cd8SNickeau } 209*37748cd8SNickeau 210*37748cd8SNickeau public function __toString() : string 211*37748cd8SNickeau { 212*37748cd8SNickeau return \sprintf('LexerActionExecutor[%s]', \implode(', ', $this->lexerActions)); 213*37748cd8SNickeau } 214*37748cd8SNickeau} 215