xref: /plugin/combo/vendor/antlr/antlr4-php-runtime/src/Atn/LexerActionExecutor.php (revision 37748cd8654635afbeca80942126742f0f4cc346)
1*37748cd8SNickeau<?php
2*37748cd8SNickeau
3*37748cd8SNickeaudeclare(strict_types=1);
4*37748cd8SNickeau
5*37748cd8SNickeaunamespace Antlr\Antlr4\Runtime\Atn;
6*37748cd8SNickeau
7*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Actions\LexerAction;
8*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\Actions\LexerIndexedCustomAction;
9*37748cd8SNickeauuse Antlr\Antlr4\Runtime\CharStream;
10*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Equality;
11*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Equatable;
12*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Hasher;
13*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Lexer;
14*37748cd8SNickeau
15*37748cd8SNickeau/**
16*37748cd8SNickeau * Represents an executor for a sequence of lexer actions which traversed during
17*37748cd8SNickeau * the matching operation of a lexer rule (token).
18*37748cd8SNickeau *
19*37748cd8SNickeau * The executor tracks position information for position-dependent lexer actions
20*37748cd8SNickeau * efficiently, ensuring that actions appearing only at the end of the rule do
21*37748cd8SNickeau * not cause bloating of the {@see DFA} created for the lexer.
22*37748cd8SNickeau *
23*37748cd8SNickeau * @author Sam Harwell
24*37748cd8SNickeau */
25*37748cd8SNickeaufinal class LexerActionExecutor implements Equatable
26*37748cd8SNickeau{
27*37748cd8SNickeau    /** @var array<LexerAction> */
28*37748cd8SNickeau    private $lexerActions;
29*37748cd8SNickeau
30*37748cd8SNickeau    /**
31*37748cd8SNickeau     * Caches the result of {@see LexerActionExecutor::hashCode()} since
32*37748cd8SNickeau     * the hash code is an element of the performance-critical
33*37748cd8SNickeau     * {@see LexerATNConfig::hashCode()} operation.
34*37748cd8SNickeau     *
35*37748cd8SNickeau     * @var int|null
36*37748cd8SNickeau     */
37*37748cd8SNickeau    private $cachedHashCode;
38*37748cd8SNickeau
39*37748cd8SNickeau    /**
40*37748cd8SNickeau     * @param array<LexerAction> $lexerActions
41*37748cd8SNickeau     */
42*37748cd8SNickeau    public function __construct(array $lexerActions)
43*37748cd8SNickeau    {
44*37748cd8SNickeau        $this->lexerActions = $lexerActions;
45*37748cd8SNickeau    }
46*37748cd8SNickeau
47*37748cd8SNickeau    /**
48*37748cd8SNickeau     * Creates a {@see LexerActionExecutor} which executes the actions for
49*37748cd8SNickeau     * the input `lexerActionExecutor` followed by a specified `lexerAction`.
50*37748cd8SNickeau     *
51*37748cd8SNickeau     * @param LexerActionExecutor|null $lexerActionExecutor The executor for actions
52*37748cd8SNickeau     *                                                      already traversed by
53*37748cd8SNickeau     *                                                      the lexer while matching
54*37748cd8SNickeau     *                                                      a token within a particular
55*37748cd8SNickeau     *                                                      {@see LexerATNConfig}.
56*37748cd8SNickeau     *                                                      If this is `null`,
57*37748cd8SNickeau     *                                                      the method behaves as
58*37748cd8SNickeau     *                                                      though it were an
59*37748cd8SNickeau     *                                                      empty executor.
60*37748cd8SNickeau     * @param LexerAction              $lexerAction         The lexer action to
61*37748cd8SNickeau     *                                                      execute after the
62*37748cd8SNickeau     *                                                      actions specified in
63*37748cd8SNickeau     *                                                      `lexerActionExecutor`.
64*37748cd8SNickeau     *
65*37748cd8SNickeau     * @return self A {@see LexerActionExecutor} for executing the combine actions
66*37748cd8SNickeau     *              of `lexerActionExecutor` and `lexerAction`.
67*37748cd8SNickeau     */
68*37748cd8SNickeau    public static function append(
69*37748cd8SNickeau        ?LexerActionExecutor $lexerActionExecutor,
70*37748cd8SNickeau        LexerAction $lexerAction
71*37748cd8SNickeau    ) : self {
72*37748cd8SNickeau        if ($lexerActionExecutor === null) {
73*37748cd8SNickeau            return new LexerActionExecutor([$lexerAction]);
74*37748cd8SNickeau        }
75*37748cd8SNickeau
76*37748cd8SNickeau        $lexerActions = \array_merge($lexerActionExecutor->lexerActions, [$lexerAction]);
77*37748cd8SNickeau
78*37748cd8SNickeau        return new LexerActionExecutor($lexerActions);
79*37748cd8SNickeau    }
80*37748cd8SNickeau
81*37748cd8SNickeau    /**
82*37748cd8SNickeau     * Creates a {@see LexerActionExecutor} which encodes the current offset
83*37748cd8SNickeau     * for position-dependent lexer actions.
84*37748cd8SNickeau     *
85*37748cd8SNickeau     * Normally, when the executor encounters lexer actions where
86*37748cd8SNickeau     * {@see LexerAction::isPositionDependent()} returns `true`, it calls
87*37748cd8SNickeau     * {@see IntStream::seek()} on the input {@see CharStream} to set the input
88*37748cd8SNickeau     * position to the <em>end</em> of the current token. This behavior provides
89*37748cd8SNickeau     * for efficient DFA representation of lexer actions which appear at the end
90*37748cd8SNickeau     * of a lexer rule, even when the lexer rule matches a variable number of
91*37748cd8SNickeau     * characters.
92*37748cd8SNickeau     *
93*37748cd8SNickeau     * Prior to traversing a match transition in the ATN, the current offset
94*37748cd8SNickeau     * from the token start index is assigned to all position-dependent lexer
95*37748cd8SNickeau     * actions which have not already been assigned a fixed offset. By storing
96*37748cd8SNickeau     * the offsets relative to the token start index, the DFA representation of
97*37748cd8SNickeau     * lexer actions which appear in the middle of tokens remains efficient due
98*37748cd8SNickeau     * to sharing among tokens of the same length, regardless of their absolute
99*37748cd8SNickeau     * position in the input stream.
100*37748cd8SNickeau     *
101*37748cd8SNickeau     * If the current executor already has offsets assigned to all
102*37748cd8SNickeau     * position-dependent lexer actions, the method returns `this`.
103*37748cd8SNickeau     *
104*37748cd8SNickeau     * @param int $offset The current offset to assign to all position-dependent
105*37748cd8SNickeau     *                    lexer actions which do not already have offsets assigned.
106*37748cd8SNickeau     *
107*37748cd8SNickeau     * @return self A {@see LexerActionExecutor} which stores input stream offsets
108*37748cd8SNickeau     *              for all position-dependent lexer actions.
109*37748cd8SNickeau     */
110*37748cd8SNickeau    public function fixOffsetBeforeMatch(int $offset) : self
111*37748cd8SNickeau    {
112*37748cd8SNickeau        $updatedLexerActions = null;
113*37748cd8SNickeau
114*37748cd8SNickeau        for ($i = 0, $count = \count($this->lexerActions); $i < $count; $i++) {
115*37748cd8SNickeau            if ($this->lexerActions[$i]->isPositionDependent()
116*37748cd8SNickeau                && !$this->lexerActions[$i] instanceof LexerIndexedCustomAction) {
117*37748cd8SNickeau                if ($updatedLexerActions === null) {
118*37748cd8SNickeau                    $updatedLexerActions = \array_merge($this->lexerActions, []);
119*37748cd8SNickeau                }
120*37748cd8SNickeau
121*37748cd8SNickeau                $updatedLexerActions[$i] = new LexerIndexedCustomAction($offset, $this->lexerActions[$i]);
122*37748cd8SNickeau            }
123*37748cd8SNickeau        }
124*37748cd8SNickeau
125*37748cd8SNickeau        if ($updatedLexerActions === null) {
126*37748cd8SNickeau            return $this;
127*37748cd8SNickeau        }
128*37748cd8SNickeau
129*37748cd8SNickeau        return new LexerActionExecutor($updatedLexerActions);
130*37748cd8SNickeau    }
131*37748cd8SNickeau
132*37748cd8SNickeau    /**
133*37748cd8SNickeau     * Gets the lexer actions to be executed by this executor.
134*37748cd8SNickeau     *
135*37748cd8SNickeau     * @return array<LexerAction> The lexer actions to be executed by this executor.
136*37748cd8SNickeau     */
137*37748cd8SNickeau    public function getLexerActions() : array
138*37748cd8SNickeau    {
139*37748cd8SNickeau        return $this->lexerActions;
140*37748cd8SNickeau    }
141*37748cd8SNickeau
142*37748cd8SNickeau    /**
143*37748cd8SNickeau     * Execute the actions encapsulated by this executor within the context of a
144*37748cd8SNickeau     * particular {@see Lexer}.
145*37748cd8SNickeau     *
146*37748cd8SNickeau     * This method calls {@see IntStream::seek()} to set the position of the
147*37748cd8SNickeau     * `input` {@see CharStream} prior to calling {@see LexerAction::execute()}
148*37748cd8SNickeau     * on a position-dependent action. Before the method returns, the input
149*37748cd8SNickeau     * position will be restored to the same position it was in when the method
150*37748cd8SNickeau     * was invoked.
151*37748cd8SNickeau     *
152*37748cd8SNickeau     * @param Lexer      $lexer      The lexer instance.
153*37748cd8SNickeau     * @param CharStream $input      The input stream which is the source for
154*37748cd8SNickeau     *                               the current token. When this method is called,
155*37748cd8SNickeau     *                               the current {@see IntStream::getIndex()} for
156*37748cd8SNickeau     *                               `input` should be the start of the following
157*37748cd8SNickeau     *                               token, i.e. 1 character past the end of the
158*37748cd8SNickeau     *                               current token.
159*37748cd8SNickeau     * @param int        $startIndex The token start index. This value may be
160*37748cd8SNickeau     *                               passed to {@see IntStream::seek()} to set
161*37748cd8SNickeau     *                               the `input` position to the beginning
162*37748cd8SNickeau     *                               of the token.
163*37748cd8SNickeau     */
164*37748cd8SNickeau    public function execute(Lexer $lexer, CharStream $input, int $startIndex) : void
165*37748cd8SNickeau    {
166*37748cd8SNickeau        $requiresSeek = false;
167*37748cd8SNickeau        $stopIndex = $input->getIndex();
168*37748cd8SNickeau
169*37748cd8SNickeau        try {
170*37748cd8SNickeau            foreach ($this->lexerActions as $lexerAction) {
171*37748cd8SNickeau                if ($lexerAction instanceof LexerIndexedCustomAction) {
172*37748cd8SNickeau                    $offset = $lexerAction->getOffset();
173*37748cd8SNickeau                    $input->seek($startIndex + $offset);
174*37748cd8SNickeau                    $lexerAction = $lexerAction->getAction();
175*37748cd8SNickeau                    $requiresSeek = $startIndex + $offset !== $stopIndex;
176*37748cd8SNickeau                } elseif ($lexerAction->isPositionDependent()) {
177*37748cd8SNickeau                    $input->seek($stopIndex);
178*37748cd8SNickeau                    $requiresSeek = false;
179*37748cd8SNickeau                }
180*37748cd8SNickeau
181*37748cd8SNickeau                $lexerAction->execute($lexer);
182*37748cd8SNickeau            }
183*37748cd8SNickeau        } finally {
184*37748cd8SNickeau            if ($requiresSeek) {
185*37748cd8SNickeau                $input->seek($stopIndex);
186*37748cd8SNickeau            }
187*37748cd8SNickeau        }
188*37748cd8SNickeau    }
189*37748cd8SNickeau
190*37748cd8SNickeau    public function hashCode() : int
191*37748cd8SNickeau    {
192*37748cd8SNickeau        if ($this->cachedHashCode === null) {
193*37748cd8SNickeau            $this->cachedHashCode = Hasher::hash($this->lexerActions);
194*37748cd8SNickeau        }
195*37748cd8SNickeau
196*37748cd8SNickeau        return $this->cachedHashCode;
197*37748cd8SNickeau    }
198*37748cd8SNickeau
199*37748cd8SNickeau    public function equals(object $other) : bool
200*37748cd8SNickeau    {
201*37748cd8SNickeau        if ($this === $other) {
202*37748cd8SNickeau            return true;
203*37748cd8SNickeau        }
204*37748cd8SNickeau
205*37748cd8SNickeau        return $other instanceof self
206*37748cd8SNickeau            && $this->hashCode() === $other->hashCode()
207*37748cd8SNickeau            && Equality::equals($this->lexerActions, $other->lexerActions);
208*37748cd8SNickeau    }
209*37748cd8SNickeau
210*37748cd8SNickeau    public function __toString() : string
211*37748cd8SNickeau    {
212*37748cd8SNickeau        return \sprintf('LexerActionExecutor[%s]', \implode(', ', $this->lexerActions));
213*37748cd8SNickeau    }
214*37748cd8SNickeau}
215