1<?php
2
3declare(strict_types=1);
4
5namespace Antlr\Antlr4\Runtime\Atn;
6
7use Antlr\Antlr4\Runtime\Atn\Actions\LexerAction;
8use Antlr\Antlr4\Runtime\Atn\Actions\LexerIndexedCustomAction;
9use Antlr\Antlr4\Runtime\CharStream;
10use Antlr\Antlr4\Runtime\Comparison\Equality;
11use Antlr\Antlr4\Runtime\Comparison\Equatable;
12use Antlr\Antlr4\Runtime\Comparison\Hasher;
13use Antlr\Antlr4\Runtime\Lexer;
14
15/**
16 * Represents an executor for a sequence of lexer actions which traversed during
17 * the matching operation of a lexer rule (token).
18 *
19 * The executor tracks position information for position-dependent lexer actions
20 * efficiently, ensuring that actions appearing only at the end of the rule do
21 * not cause bloating of the {@see DFA} created for the lexer.
22 *
23 * @author Sam Harwell
24 */
25final class LexerActionExecutor implements Equatable
26{
27    /** @var array<LexerAction> */
28    private $lexerActions;
29
30    /**
31     * Caches the result of {@see LexerActionExecutor::hashCode()} since
32     * the hash code is an element of the performance-critical
33     * {@see LexerATNConfig::hashCode()} operation.
34     *
35     * @var int|null
36     */
37    private $cachedHashCode;
38
39    /**
40     * @param array<LexerAction> $lexerActions
41     */
42    public function __construct(array $lexerActions)
43    {
44        $this->lexerActions = $lexerActions;
45    }
46
47    /**
48     * Creates a {@see LexerActionExecutor} which executes the actions for
49     * the input `lexerActionExecutor` followed by a specified `lexerAction`.
50     *
51     * @param LexerActionExecutor|null $lexerActionExecutor The executor for actions
52     *                                                      already traversed by
53     *                                                      the lexer while matching
54     *                                                      a token within a particular
55     *                                                      {@see LexerATNConfig}.
56     *                                                      If this is `null`,
57     *                                                      the method behaves as
58     *                                                      though it were an
59     *                                                      empty executor.
60     * @param LexerAction              $lexerAction         The lexer action to
61     *                                                      execute after the
62     *                                                      actions specified in
63     *                                                      `lexerActionExecutor`.
64     *
65     * @return self A {@see LexerActionExecutor} for executing the combine actions
66     *              of `lexerActionExecutor` and `lexerAction`.
67     */
68    public static function append(
69        ?LexerActionExecutor $lexerActionExecutor,
70        LexerAction $lexerAction
71    ) : self {
72        if ($lexerActionExecutor === null) {
73            return new LexerActionExecutor([$lexerAction]);
74        }
75
76        $lexerActions = \array_merge($lexerActionExecutor->lexerActions, [$lexerAction]);
77
78        return new LexerActionExecutor($lexerActions);
79    }
80
81    /**
82     * Creates a {@see LexerActionExecutor} which encodes the current offset
83     * for position-dependent lexer actions.
84     *
85     * Normally, when the executor encounters lexer actions where
86     * {@see LexerAction::isPositionDependent()} returns `true`, it calls
87     * {@see IntStream::seek()} on the input {@see CharStream} to set the input
88     * position to the <em>end</em> of the current token. This behavior provides
89     * for efficient DFA representation of lexer actions which appear at the end
90     * of a lexer rule, even when the lexer rule matches a variable number of
91     * characters.
92     *
93     * Prior to traversing a match transition in the ATN, the current offset
94     * from the token start index is assigned to all position-dependent lexer
95     * actions which have not already been assigned a fixed offset. By storing
96     * the offsets relative to the token start index, the DFA representation of
97     * lexer actions which appear in the middle of tokens remains efficient due
98     * to sharing among tokens of the same length, regardless of their absolute
99     * position in the input stream.
100     *
101     * If the current executor already has offsets assigned to all
102     * position-dependent lexer actions, the method returns `this`.
103     *
104     * @param int $offset The current offset to assign to all position-dependent
105     *                    lexer actions which do not already have offsets assigned.
106     *
107     * @return self A {@see LexerActionExecutor} which stores input stream offsets
108     *              for all position-dependent lexer actions.
109     */
110    public function fixOffsetBeforeMatch(int $offset) : self
111    {
112        $updatedLexerActions = null;
113
114        for ($i = 0, $count = \count($this->lexerActions); $i < $count; $i++) {
115            if ($this->lexerActions[$i]->isPositionDependent()
116                && !$this->lexerActions[$i] instanceof LexerIndexedCustomAction) {
117                if ($updatedLexerActions === null) {
118                    $updatedLexerActions = \array_merge($this->lexerActions, []);
119                }
120
121                $updatedLexerActions[$i] = new LexerIndexedCustomAction($offset, $this->lexerActions[$i]);
122            }
123        }
124
125        if ($updatedLexerActions === null) {
126            return $this;
127        }
128
129        return new LexerActionExecutor($updatedLexerActions);
130    }
131
132    /**
133     * Gets the lexer actions to be executed by this executor.
134     *
135     * @return array<LexerAction> The lexer actions to be executed by this executor.
136     */
137    public function getLexerActions() : array
138    {
139        return $this->lexerActions;
140    }
141
142    /**
143     * Execute the actions encapsulated by this executor within the context of a
144     * particular {@see Lexer}.
145     *
146     * This method calls {@see IntStream::seek()} to set the position of the
147     * `input` {@see CharStream} prior to calling {@see LexerAction::execute()}
148     * on a position-dependent action. Before the method returns, the input
149     * position will be restored to the same position it was in when the method
150     * was invoked.
151     *
152     * @param Lexer      $lexer      The lexer instance.
153     * @param CharStream $input      The input stream which is the source for
154     *                               the current token. When this method is called,
155     *                               the current {@see IntStream::getIndex()} for
156     *                               `input` should be the start of the following
157     *                               token, i.e. 1 character past the end of the
158     *                               current token.
159     * @param int        $startIndex The token start index. This value may be
160     *                               passed to {@see IntStream::seek()} to set
161     *                               the `input` position to the beginning
162     *                               of the token.
163     */
164    public function execute(Lexer $lexer, CharStream $input, int $startIndex) : void
165    {
166        $requiresSeek = false;
167        $stopIndex = $input->getIndex();
168
169        try {
170            foreach ($this->lexerActions as $lexerAction) {
171                if ($lexerAction instanceof LexerIndexedCustomAction) {
172                    $offset = $lexerAction->getOffset();
173                    $input->seek($startIndex + $offset);
174                    $lexerAction = $lexerAction->getAction();
175                    $requiresSeek = $startIndex + $offset !== $stopIndex;
176                } elseif ($lexerAction->isPositionDependent()) {
177                    $input->seek($stopIndex);
178                    $requiresSeek = false;
179                }
180
181                $lexerAction->execute($lexer);
182            }
183        } finally {
184            if ($requiresSeek) {
185                $input->seek($stopIndex);
186            }
187        }
188    }
189
190    public function hashCode() : int
191    {
192        if ($this->cachedHashCode === null) {
193            $this->cachedHashCode = Hasher::hash($this->lexerActions);
194        }
195
196        return $this->cachedHashCode;
197    }
198
199    public function equals(object $other) : bool
200    {
201        if ($this === $other) {
202            return true;
203        }
204
205        return $other instanceof self
206            && $this->hashCode() === $other->hashCode()
207            && Equality::equals($this->lexerActions, $other->lexerActions);
208    }
209
210    public function __toString() : string
211    {
212        return \sprintf('LexerActionExecutor[%s]', \implode(', ', $this->lexerActions));
213    }
214}
215