1<?php
2
3declare(strict_types=1);
4
5namespace Antlr\Antlr4\Runtime;
6
7use Antlr\Antlr4\Runtime\Utils\Set;
8
9/**
10 * This implementation of {@see TokenStream} loads tokens from a
11 * {@see TokenSource} on-demand, and places the tokens in a buffer to provide
12 * access to any previous token by index.
13 *
14 * This token stream ignores the value of {@see Token::getChannel()}. If your
15 * parser requires the token stream filter tokens to only those on a particular
16 * channel, such as {@see Token::DEFAULT_CHANNEL} or
17 * {@see Token::HIDDEN_CHANNEL}, use a filtering token stream such a
18 * {@see CommonTokenStream}.
19 */
20class BufferedTokenStream implements TokenStream
21{
22    /**
23     * The {@see TokenSource} from which tokens for this stream are fetched.
24     *
25     * @var TokenSource
26     */
27    protected $tokenSource;
28
29    /**
30     * A collection of all tokens fetched from the token source. The list is
31     * considered a complete view of the input once
32     * {@see BufferedTokenStream::fetchedEOF()} is set to `true`.
33     *
34     * @var array<Token>
35     */
36    protected $tokens = [];
37
38    /**
39     * The index into {@see BufferedTokenStream::tokens()} of the current token
40     * (next token to {@see BufferedTokenStream::consume()}).
41     * {@see BufferedTokenStream::tokens()}`[{@see BufferedTokenStream::p()}]`
42     * should be {@see BufferedTokenStream::LT(1)}.
43     *
44     * This field is set to -1 when the stream is first constructed or when
45     * {@see BufferedTokenStream::setTokenSource()} is called, indicating that
46     * the first token has not yet been fetched from the token source. For
47     * additional information, see the documentation of {@see IntStream} for
48     * a description of Initializing Methods.
49     *
50     * @var int
51     */
52    protected $index = -1;
53
54    /**
55     * Indicates whether the {@see Token::EOF} token has been fetched from
56     * {@see BufferedTokenStream::tokenSource()} and added to
57     * {@see BufferedTokenStream::tokens()}. This field improves  performance
58     * for the following cases:
59     *
60     * - {@see BufferedTokenStream::consume()}: The lookahead check in
61     *    {@see BufferedTokenStream::consume()} to prevent consuming the
62     *    EOF symbol is optimized by checking the values of
63     *    {@see BufferedTokenStream::fetchedEOF()} and
64     *    {@see BufferedTokenStream::p()} instead of calling
65     *    {@see BufferedTokenStream::LA()}.
66     * - {@see BufferedTokenStream::fetch()}: The check to prevent adding multiple
67     *    EOF symbols into {@see BufferedTokenStream::tokens()} is trivial with
68     *    this field.
69     *
70     * @var bool
71     */
72    protected $fetchedEOF = false;
73
74    public function __construct(TokenSource $tokenSource)
75    {
76        $this->tokenSource = $tokenSource;
77    }
78
79    public function getTokenSource() : TokenSource
80    {
81        return $this->tokenSource;
82    }
83
84    public function getIndex() : int
85    {
86        return $this->index;
87    }
88
89    public function mark() : int
90    {
91        return 0;
92    }
93
94    public function release(int $marker) : void
95    {
96        // no resources to release
97    }
98
99    public function seek(int $index) : void
100    {
101        $this->lazyInit();
102
103        $this->index = $this->adjustSeekIndex($index);
104    }
105
106    public function getLength() : int
107    {
108        return \count($this->tokens);
109    }
110
111    public function consume() : void
112    {
113        $skipEofCheck = false;
114
115        if ($this->index >= 0) {
116            if ($this->fetchedEOF) {
117                // the last token in tokens is EOF. skip check if p indexes any
118                // fetched token except the last.
119                $skipEofCheck = $this->index < \count($this->tokens) - 1;
120            } else {
121                // no EOF token in tokens. skip check if p indexes a fetched token.
122                $skipEofCheck = $this->index < \count($this->tokens);
123            }
124        }
125
126        if (!$skipEofCheck && $this->LA(1) === Token::EOF) {
127            throw new \InvalidArgumentException('Cannot consume EOF.');
128        }
129
130        if ($this->sync($this->index + 1)) {
131            $this->index = $this->adjustSeekIndex($this->index + 1);
132        }
133    }
134
135    /**
136     * Make sure index `i` in tokens has a token.
137     *
138     * @return bool `true` if a token is located at index `i`,
139     *              otherwise `false`.
140     *
141     * @see BufferedTokenStream::get()
142     */
143    public function sync(int $i) : bool
144    {
145        $n = $i - \count($this->tokens) + 1; // how many more elements we need?
146
147        if ($n > 0) {
148            $fetched = $this->fetch($n);
149
150            return $fetched >= $n;
151        }
152
153        return true;
154    }
155
156    public function fetch(int $n) : int
157    {
158        if ($this->fetchedEOF) {
159            return 0;
160        }
161
162        for ($i = 0; $i < $n; $i++) {
163            /** @var WritableToken $token */
164            $token = $this->tokenSource->nextToken();
165            $token->setTokenIndex(\count($this->tokens));
166
167            $this->tokens[] = $token;
168
169            if ($token->getType() === Token::EOF) {
170                $this->fetchedEOF = true;
171
172                return $i + 1;
173            }
174        }
175
176        return $n;
177    }
178
179    public function get(int $index) : Token
180    {
181        $count = \count($this->tokens);
182
183        if ($index < 0 || $index >= $count) {
184            throw new \OutOfBoundsException(\sprintf(
185                'Token index %d out of range 0..%d.',
186                $index,
187                $count
188            ));
189        }
190
191        $this->lazyInit();
192
193        return $this->tokens[$index];
194    }
195
196    public function LA(int $i) : int
197    {
198        $token = $this->LT($i);
199
200        return $token === null ? Token::INVALID_TYPE : $token->getType();
201    }
202
203    protected function LB(int $k) : ?Token
204    {
205        if ($this->index - $k < 0) {
206            return null;
207        }
208
209        return $this->tokens[$this->index - $k];
210    }
211
212    public function LT(int $k) : ?Token
213    {
214        $this->lazyInit();
215
216        if ($k === 0) {
217            return null;
218        }
219
220        if ($k < 0) {
221            return $this->LB(-$k);
222        }
223
224        $i = $this->index + $k - 1;
225
226        $this->sync($i);
227
228        if ($i >= \count($this->tokens)) {
229            // return EOF token
230            // EOF must be last token
231            return $this->tokens[\count($this->tokens) - 1];
232        }
233
234        return $this->tokens[$i];
235    }
236
237    /**
238     * Allowed derived classes to modify the behavior of operations which change
239     * the current stream position by adjusting the target token index of a seek
240     * operation. The default implementation simply returns `i`. If an
241     * exception is thrown in this method, the current stream index should not
242     * be changed.
243     *
244     * For example, {@see CommonTokenStream} overrides this method to ensure
245     * that the seek target is always an on-channel token.
246     *
247     * @param int $i The target token index.
248     *
249     * @return int The adjusted target token index.
250     */
251    public function adjustSeekIndex(int $i) : int
252    {
253        return $i;
254    }
255
256    protected function lazyInit() : void
257    {
258        if ($this->index === -1) {
259            $this->setup();
260        }
261    }
262
263    protected function setup() : void
264    {
265        $this->sync(0);
266
267        $this->index = $this->adjustSeekIndex(0);
268    }
269
270    /**
271     * Reset this token stream by setting its token source.
272     */
273    public function setTokenSource(TokenSource $tokenSource) : void
274    {
275        $this->tokenSource = $tokenSource;
276        $this->tokens = [];
277        $this->index = -1;
278        $this->fetchedEOF = false;
279    }
280
281    /**
282     * @return array<Token>
283     */
284    public function getAllTokens() : array
285    {
286        return $this->tokens;
287    }
288
289    /**
290     * Get all tokens from start..stop inclusively
291     *
292     * @return array<Token>|null
293     */
294    public function getTokens(int $start, int $stop, ?Set $types = null) : ?array
295    {
296        if ($start < 0 || $stop < 0) {
297            return null;
298        }
299
300        $this->lazyInit();
301
302        $subset = [];
303        if ($stop >= \count($this->tokens)) {
304            $stop = \count($this->tokens) - 1;
305        }
306
307        for ($i = $start; $i < $stop; $i++) {
308            $t = $this->tokens[$i];
309
310            if ($t->getType() === Token::EOF) {
311                break;
312            }
313
314            if ($types === null || $types->contains($t->getType())) {
315                $subset[] = $t;
316            }
317        }
318
319        return $subset;
320    }
321
322    /**
323     * Given a starting index, return the index of the next token on channel.
324     * Return `i` if `tokens[i]` is on channel. Return the index of the EOF
325     * token if there are no tokens on channel between `i` and EOF.
326     */
327    protected function nextTokenOnChannel(int $i, int $channel) : int
328    {
329        $this->sync($i);
330
331        if ($i >= \count($this->tokens)) {
332            return $this->getLength() - 1;
333        }
334
335        $token = $this->tokens[$i];
336        while ($token->getChannel() !== $channel) {
337            if ($token->getType() === Token::EOF) {
338                return $i;
339            }
340
341            $i++;
342
343            $this->sync($i);
344
345            $token = $this->tokens[$i];
346        }
347
348        return $i;
349    }
350
351    /**
352     * Given a starting index, return the index of the previous token on channel.
353     * Return `i` if `tokens[i]` is on channel. Return -1 if there are no tokens
354     * on channel between `i` and 0.
355     *
356     * If `i` specifies an index at or after the EOF token, the EOF token
357     * index is returned. This is due to the fact that the EOF token is treated
358     * as though it were on every channel.
359     */
360    protected function previousTokenOnChannel(int $i, int $channel) : int
361    {
362        while ($i >= 0 && $this->tokens[$i]->getChannel() !== $channel) {
363            $i--;
364        }
365
366        return $i;
367    }
368
369    /**
370     * Collect all tokens on specified channel to the right of  the current token
371     * up until we see a token on DEFAULT_TOKEN_CHANNEL or EOF. If channel is -1,
372     * find any non default channel token.
373     *
374     * @return array<Token>
375     */
376    public function getHiddenTokensToRight(int $tokenIndex, int $channel) : ?array
377    {
378        $this->lazyInit();
379
380        if ($tokenIndex < 0 || $tokenIndex >= \count($this->tokens)) {
381            throw new \RuntimeException(\sprintf('%d not in 0..%d', $tokenIndex, \count($this->tokens) - 1));
382        }
383
384        $nextOnChannel = $this->nextTokenOnChannel($tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL);
385        $from_ = $tokenIndex + 1;
386        // if none onchannel to right, nextOnChannel=-1 so set to = last token
387        $to = $nextOnChannel === -1 ? \count($this->tokens) - 1 : $nextOnChannel;
388
389        return $this->filterForChannel($from_, $to, $channel);
390    }
391
392    /**
393     * Collect all tokens on specified channel to the left of the current token
394     * up until we see a token on DEFAULT_TOKEN_CHANNEL. If channel is -1, find
395     * any non default channel token.
396     *
397     * @return array<Token>
398     */
399    public function getHiddenTokensToLeft(int $tokenIndex, int $channel) : ?array
400    {
401        $this->lazyInit();
402
403        if ($tokenIndex < 0 || $tokenIndex >= \count($this->tokens)) {
404            throw new \RuntimeException(\sprintf('%d not in 0..%d', $tokenIndex, \count($this->tokens) - 1));
405        }
406
407        $prevOnChannel = $this->previousTokenOnChannel($tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
408
409        if ($prevOnChannel === $tokenIndex - 1) {
410            return null;
411        }
412
413        // if none on channel to left, prevOnChannel=-1 then from=0
414        $from = $prevOnChannel + 1;
415        $to = $tokenIndex - 1;
416
417        return $this->filterForChannel($from, $to, $channel);
418    }
419
420    /**
421     * @return array<Token>|null
422     */
423    protected function filterForChannel(int $left, int $right, int $channel) : ?array
424    {
425        $hidden = [];
426        for ($i = $left; $i < $right + 1; $i++) {
427            $t = $this->tokens[$i];
428
429            if ($channel === -1) {
430                if ($t->getChannel() !== Lexer::DEFAULT_TOKEN_CHANNEL) {
431                    $hidden[] = $t;
432                }
433            } elseif ($t->getChannel() === $channel) {
434                $hidden[] = $t;
435            }
436        }
437
438        if (\count($hidden) === 0) {
439            return null;
440        }
441
442        return $hidden;
443    }
444
445    public function getSourceName() : string
446    {
447        return $this->tokenSource->getSourceName();
448    }
449
450    /**
451     * Get the text of all tokens in this buffer.
452     */
453    public function getTextByInterval(Interval $interval) : string
454    {
455        $this->lazyInit();
456        $this->fill();
457
458        if ($interval->start < 0 || $interval->stop < 0) {
459            return '';
460        }
461
462        $stop = $interval->stop;
463
464        if ($stop >= \count($this->tokens)) {
465            $stop = \count($this->tokens) - 1;
466        }
467
468        $s = '';
469        for ($i = $interval->start; $i <= $stop; $i++) {
470            $t = $this->tokens[$i];
471
472            if ($t->getType() === Token::EOF) {
473                break;
474            }
475
476            $s .= $t->getText();
477        }
478
479        return $s;
480    }
481
482    public function getText() : string
483    {
484        return $this->getTextByInterval(new Interval(0, \count($this->tokens) - 1));
485    }
486
487    public function getTextByTokens(?Token $start = null, ?Token $stop = null) : string
488    {
489        $startIndex = $start === null ? 0 : $start->getTokenIndex();
490        $stopIndex = $stop === null ? \count($this->tokens) - 1 : $stop->getTokenIndex();
491
492        return $this->getTextByInterval(new Interval($startIndex, $stopIndex));
493    }
494
495    public function getTextByContext(RuleContext $context) : string
496    {
497        return $this->getTextByInterval($context->getSourceInterval());
498    }
499
500    /**
501     * Get all tokens from lexer until EOF.
502     */
503    public function fill() : void
504    {
505        $this->lazyInit();
506
507        while ($this->fetch(1000) === 1000) {
508            continue;
509        }
510    }
511}
512