1<?php 2 3declare(strict_types=1); 4 5namespace Antlr\Antlr4\Runtime; 6 7use Antlr\Antlr4\Runtime\Utils\Set; 8 9/** 10 * This implementation of {@see TokenStream} loads tokens from a 11 * {@see TokenSource} on-demand, and places the tokens in a buffer to provide 12 * access to any previous token by index. 13 * 14 * This token stream ignores the value of {@see Token::getChannel()}. If your 15 * parser requires the token stream filter tokens to only those on a particular 16 * channel, such as {@see Token::DEFAULT_CHANNEL} or 17 * {@see Token::HIDDEN_CHANNEL}, use a filtering token stream such a 18 * {@see CommonTokenStream}. 19 */ 20class BufferedTokenStream implements TokenStream 21{ 22 /** 23 * The {@see TokenSource} from which tokens for this stream are fetched. 24 * 25 * @var TokenSource 26 */ 27 protected $tokenSource; 28 29 /** 30 * A collection of all tokens fetched from the token source. The list is 31 * considered a complete view of the input once 32 * {@see BufferedTokenStream::fetchedEOF()} is set to `true`. 33 * 34 * @var array<Token> 35 */ 36 protected $tokens = []; 37 38 /** 39 * The index into {@see BufferedTokenStream::tokens()} of the current token 40 * (next token to {@see BufferedTokenStream::consume()}). 41 * {@see BufferedTokenStream::tokens()}`[{@see BufferedTokenStream::p()}]` 42 * should be {@see BufferedTokenStream::LT(1)}. 43 * 44 * This field is set to -1 when the stream is first constructed or when 45 * {@see BufferedTokenStream::setTokenSource()} is called, indicating that 46 * the first token has not yet been fetched from the token source. For 47 * additional information, see the documentation of {@see IntStream} for 48 * a description of Initializing Methods. 49 * 50 * @var int 51 */ 52 protected $index = -1; 53 54 /** 55 * Indicates whether the {@see Token::EOF} token has been fetched from 56 * {@see BufferedTokenStream::tokenSource()} and added to 57 * {@see BufferedTokenStream::tokens()}. This field improves performance 58 * for the following cases: 59 * 60 * - {@see BufferedTokenStream::consume()}: The lookahead check in 61 * {@see BufferedTokenStream::consume()} to prevent consuming the 62 * EOF symbol is optimized by checking the values of 63 * {@see BufferedTokenStream::fetchedEOF()} and 64 * {@see BufferedTokenStream::p()} instead of calling 65 * {@see BufferedTokenStream::LA()}. 66 * - {@see BufferedTokenStream::fetch()}: The check to prevent adding multiple 67 * EOF symbols into {@see BufferedTokenStream::tokens()} is trivial with 68 * this field. 69 * 70 * @var bool 71 */ 72 protected $fetchedEOF = false; 73 74 public function __construct(TokenSource $tokenSource) 75 { 76 $this->tokenSource = $tokenSource; 77 } 78 79 public function getTokenSource() : TokenSource 80 { 81 return $this->tokenSource; 82 } 83 84 public function getIndex() : int 85 { 86 return $this->index; 87 } 88 89 public function mark() : int 90 { 91 return 0; 92 } 93 94 public function release(int $marker) : void 95 { 96 // no resources to release 97 } 98 99 public function seek(int $index) : void 100 { 101 $this->lazyInit(); 102 103 $this->index = $this->adjustSeekIndex($index); 104 } 105 106 public function getLength() : int 107 { 108 return \count($this->tokens); 109 } 110 111 public function consume() : void 112 { 113 $skipEofCheck = false; 114 115 if ($this->index >= 0) { 116 if ($this->fetchedEOF) { 117 // the last token in tokens is EOF. skip check if p indexes any 118 // fetched token except the last. 119 $skipEofCheck = $this->index < \count($this->tokens) - 1; 120 } else { 121 // no EOF token in tokens. skip check if p indexes a fetched token. 122 $skipEofCheck = $this->index < \count($this->tokens); 123 } 124 } 125 126 if (!$skipEofCheck && $this->LA(1) === Token::EOF) { 127 throw new \InvalidArgumentException('Cannot consume EOF.'); 128 } 129 130 if ($this->sync($this->index + 1)) { 131 $this->index = $this->adjustSeekIndex($this->index + 1); 132 } 133 } 134 135 /** 136 * Make sure index `i` in tokens has a token. 137 * 138 * @return bool `true` if a token is located at index `i`, 139 * otherwise `false`. 140 * 141 * @see BufferedTokenStream::get() 142 */ 143 public function sync(int $i) : bool 144 { 145 $n = $i - \count($this->tokens) + 1; // how many more elements we need? 146 147 if ($n > 0) { 148 $fetched = $this->fetch($n); 149 150 return $fetched >= $n; 151 } 152 153 return true; 154 } 155 156 public function fetch(int $n) : int 157 { 158 if ($this->fetchedEOF) { 159 return 0; 160 } 161 162 for ($i = 0; $i < $n; $i++) { 163 /** @var WritableToken $token */ 164 $token = $this->tokenSource->nextToken(); 165 $token->setTokenIndex(\count($this->tokens)); 166 167 $this->tokens[] = $token; 168 169 if ($token->getType() === Token::EOF) { 170 $this->fetchedEOF = true; 171 172 return $i + 1; 173 } 174 } 175 176 return $n; 177 } 178 179 public function get(int $index) : Token 180 { 181 $count = \count($this->tokens); 182 183 if ($index < 0 || $index >= $count) { 184 throw new \OutOfBoundsException(\sprintf( 185 'Token index %d out of range 0..%d.', 186 $index, 187 $count 188 )); 189 } 190 191 $this->lazyInit(); 192 193 return $this->tokens[$index]; 194 } 195 196 public function LA(int $i) : int 197 { 198 $token = $this->LT($i); 199 200 return $token === null ? Token::INVALID_TYPE : $token->getType(); 201 } 202 203 protected function LB(int $k) : ?Token 204 { 205 if ($this->index - $k < 0) { 206 return null; 207 } 208 209 return $this->tokens[$this->index - $k]; 210 } 211 212 public function LT(int $k) : ?Token 213 { 214 $this->lazyInit(); 215 216 if ($k === 0) { 217 return null; 218 } 219 220 if ($k < 0) { 221 return $this->LB(-$k); 222 } 223 224 $i = $this->index + $k - 1; 225 226 $this->sync($i); 227 228 if ($i >= \count($this->tokens)) { 229 // return EOF token 230 // EOF must be last token 231 return $this->tokens[\count($this->tokens) - 1]; 232 } 233 234 return $this->tokens[$i]; 235 } 236 237 /** 238 * Allowed derived classes to modify the behavior of operations which change 239 * the current stream position by adjusting the target token index of a seek 240 * operation. The default implementation simply returns `i`. If an 241 * exception is thrown in this method, the current stream index should not 242 * be changed. 243 * 244 * For example, {@see CommonTokenStream} overrides this method to ensure 245 * that the seek target is always an on-channel token. 246 * 247 * @param int $i The target token index. 248 * 249 * @return int The adjusted target token index. 250 */ 251 public function adjustSeekIndex(int $i) : int 252 { 253 return $i; 254 } 255 256 protected function lazyInit() : void 257 { 258 if ($this->index === -1) { 259 $this->setup(); 260 } 261 } 262 263 protected function setup() : void 264 { 265 $this->sync(0); 266 267 $this->index = $this->adjustSeekIndex(0); 268 } 269 270 /** 271 * Reset this token stream by setting its token source. 272 */ 273 public function setTokenSource(TokenSource $tokenSource) : void 274 { 275 $this->tokenSource = $tokenSource; 276 $this->tokens = []; 277 $this->index = -1; 278 $this->fetchedEOF = false; 279 } 280 281 /** 282 * @return array<Token> 283 */ 284 public function getAllTokens() : array 285 { 286 return $this->tokens; 287 } 288 289 /** 290 * Get all tokens from start..stop inclusively 291 * 292 * @return array<Token>|null 293 */ 294 public function getTokens(int $start, int $stop, ?Set $types = null) : ?array 295 { 296 if ($start < 0 || $stop < 0) { 297 return null; 298 } 299 300 $this->lazyInit(); 301 302 $subset = []; 303 if ($stop >= \count($this->tokens)) { 304 $stop = \count($this->tokens) - 1; 305 } 306 307 for ($i = $start; $i < $stop; $i++) { 308 $t = $this->tokens[$i]; 309 310 if ($t->getType() === Token::EOF) { 311 break; 312 } 313 314 if ($types === null || $types->contains($t->getType())) { 315 $subset[] = $t; 316 } 317 } 318 319 return $subset; 320 } 321 322 /** 323 * Given a starting index, return the index of the next token on channel. 324 * Return `i` if `tokens[i]` is on channel. Return the index of the EOF 325 * token if there are no tokens on channel between `i` and EOF. 326 */ 327 protected function nextTokenOnChannel(int $i, int $channel) : int 328 { 329 $this->sync($i); 330 331 if ($i >= \count($this->tokens)) { 332 return $this->getLength() - 1; 333 } 334 335 $token = $this->tokens[$i]; 336 while ($token->getChannel() !== $channel) { 337 if ($token->getType() === Token::EOF) { 338 return $i; 339 } 340 341 $i++; 342 343 $this->sync($i); 344 345 $token = $this->tokens[$i]; 346 } 347 348 return $i; 349 } 350 351 /** 352 * Given a starting index, return the index of the previous token on channel. 353 * Return `i` if `tokens[i]` is on channel. Return -1 if there are no tokens 354 * on channel between `i` and 0. 355 * 356 * If `i` specifies an index at or after the EOF token, the EOF token 357 * index is returned. This is due to the fact that the EOF token is treated 358 * as though it were on every channel. 359 */ 360 protected function previousTokenOnChannel(int $i, int $channel) : int 361 { 362 while ($i >= 0 && $this->tokens[$i]->getChannel() !== $channel) { 363 $i--; 364 } 365 366 return $i; 367 } 368 369 /** 370 * Collect all tokens on specified channel to the right of the current token 371 * up until we see a token on DEFAULT_TOKEN_CHANNEL or EOF. If channel is -1, 372 * find any non default channel token. 373 * 374 * @return array<Token> 375 */ 376 public function getHiddenTokensToRight(int $tokenIndex, int $channel) : ?array 377 { 378 $this->lazyInit(); 379 380 if ($tokenIndex < 0 || $tokenIndex >= \count($this->tokens)) { 381 throw new \RuntimeException(\sprintf('%d not in 0..%d', $tokenIndex, \count($this->tokens) - 1)); 382 } 383 384 $nextOnChannel = $this->nextTokenOnChannel($tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL); 385 $from_ = $tokenIndex + 1; 386 // if none onchannel to right, nextOnChannel=-1 so set to = last token 387 $to = $nextOnChannel === -1 ? \count($this->tokens) - 1 : $nextOnChannel; 388 389 return $this->filterForChannel($from_, $to, $channel); 390 } 391 392 /** 393 * Collect all tokens on specified channel to the left of the current token 394 * up until we see a token on DEFAULT_TOKEN_CHANNEL. If channel is -1, find 395 * any non default channel token. 396 * 397 * @return array<Token> 398 */ 399 public function getHiddenTokensToLeft(int $tokenIndex, int $channel) : ?array 400 { 401 $this->lazyInit(); 402 403 if ($tokenIndex < 0 || $tokenIndex >= \count($this->tokens)) { 404 throw new \RuntimeException(\sprintf('%d not in 0..%d', $tokenIndex, \count($this->tokens) - 1)); 405 } 406 407 $prevOnChannel = $this->previousTokenOnChannel($tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL); 408 409 if ($prevOnChannel === $tokenIndex - 1) { 410 return null; 411 } 412 413 // if none on channel to left, prevOnChannel=-1 then from=0 414 $from = $prevOnChannel + 1; 415 $to = $tokenIndex - 1; 416 417 return $this->filterForChannel($from, $to, $channel); 418 } 419 420 /** 421 * @return array<Token>|null 422 */ 423 protected function filterForChannel(int $left, int $right, int $channel) : ?array 424 { 425 $hidden = []; 426 for ($i = $left; $i < $right + 1; $i++) { 427 $t = $this->tokens[$i]; 428 429 if ($channel === -1) { 430 if ($t->getChannel() !== Lexer::DEFAULT_TOKEN_CHANNEL) { 431 $hidden[] = $t; 432 } 433 } elseif ($t->getChannel() === $channel) { 434 $hidden[] = $t; 435 } 436 } 437 438 if (\count($hidden) === 0) { 439 return null; 440 } 441 442 return $hidden; 443 } 444 445 public function getSourceName() : string 446 { 447 return $this->tokenSource->getSourceName(); 448 } 449 450 /** 451 * Get the text of all tokens in this buffer. 452 */ 453 public function getTextByInterval(Interval $interval) : string 454 { 455 $this->lazyInit(); 456 $this->fill(); 457 458 if ($interval->start < 0 || $interval->stop < 0) { 459 return ''; 460 } 461 462 $stop = $interval->stop; 463 464 if ($stop >= \count($this->tokens)) { 465 $stop = \count($this->tokens) - 1; 466 } 467 468 $s = ''; 469 for ($i = $interval->start; $i <= $stop; $i++) { 470 $t = $this->tokens[$i]; 471 472 if ($t->getType() === Token::EOF) { 473 break; 474 } 475 476 $s .= $t->getText(); 477 } 478 479 return $s; 480 } 481 482 public function getText() : string 483 { 484 return $this->getTextByInterval(new Interval(0, \count($this->tokens) - 1)); 485 } 486 487 public function getTextByTokens(?Token $start = null, ?Token $stop = null) : string 488 { 489 $startIndex = $start === null ? 0 : $start->getTokenIndex(); 490 $stopIndex = $stop === null ? \count($this->tokens) - 1 : $stop->getTokenIndex(); 491 492 return $this->getTextByInterval(new Interval($startIndex, $stopIndex)); 493 } 494 495 public function getTextByContext(RuleContext $context) : string 496 { 497 return $this->getTextByInterval($context->getSourceInterval()); 498 } 499 500 /** 501 * Get all tokens from lexer until EOF. 502 */ 503 public function fill() : void 504 { 505 $this->lazyInit(); 506 507 while ($this->fetch(1000) === 1000) { 508 continue; 509 } 510 } 511} 512