1<?php 2/** 3 * This file is part of FPDI 4 * 5 * @package setasign\Fpdi 6 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) 7 * @license http://opensource.org/licenses/mit-license The MIT License 8 */ 9 10namespace setasign\Fpdi\PdfParser; 11 12/** 13 * A tokenizer class. 14 * 15 * @package setasign\Fpdi\PdfParser 16 */ 17class Tokenizer 18{ 19 /** 20 * @var StreamReader 21 */ 22 protected $streamReader; 23 24 /** 25 * A token stack. 26 * 27 * @var string[] 28 */ 29 protected $stack = []; 30 31 /** 32 * Tokenizer constructor. 33 * 34 * @param StreamReader $streamReader 35 */ 36 public function __construct(StreamReader $streamReader) 37 { 38 $this->streamReader = $streamReader; 39 } 40 41 /** 42 * Get the stream reader instance. 43 * 44 * @return StreamReader 45 */ 46 public function getStreamReader() 47 { 48 return $this->streamReader; 49 } 50 51 /** 52 * Clear the token stack. 53 */ 54 public function clearStack() 55 { 56 $this->stack = []; 57 } 58 59 /** 60 * Push a token onto the stack. 61 * 62 * @param string $token 63 */ 64 public function pushStack($token) 65 { 66 $this->stack[] = $token; 67 } 68 69 /** 70 * Get next token. 71 * 72 * @return bool|string 73 */ 74 public function getNextToken() 75 { 76 $token = \array_pop($this->stack); 77 if ($token !== null) { 78 return $token; 79 } 80 81 if (($byte = $this->streamReader->readByte()) === false) { 82 return false; 83 } 84 85 if ($byte === "\x20" || 86 $byte === "\x0A" || 87 $byte === "\x0D" || 88 $byte === "\x0C" || 89 $byte === "\x09" || 90 $byte === "\x00" 91 ) { 92 if ($this->leapWhiteSpaces() === false) { 93 return false; 94 } 95 $byte = $this->streamReader->readByte(); 96 } 97 98 switch ($byte) { 99 case '/': 100 case '[': 101 case ']': 102 case '(': 103 case ')': 104 case '{': 105 case '}': 106 case '<': 107 case '>': 108 return $byte; 109 case '%': 110 $this->streamReader->readLine(); 111 return $this->getNextToken(); 112 } 113 114 /* This way is faster than checking single bytes. 115 */ 116 $bufferOffset = $this->streamReader->getOffset(); 117 do { 118 $lastBuffer = $this->streamReader->getBuffer(false); 119 $pos = \strcspn( 120 $lastBuffer, 121 "\x00\x09\x0A\x0C\x0D\x20()<>[]{}/%", 122 $bufferOffset 123 ); 124 } while ( 125 // Break the loop if a delimiter or white space char is matched 126 // in the current buffer or increase the buffers length 127 $lastBuffer !== false && 128 ( 129 $bufferOffset + $pos === \strlen($lastBuffer) && 130 $this->streamReader->increaseLength() 131 ) 132 ); 133 134 $result = \substr($lastBuffer, $bufferOffset - 1, $pos + 1); 135 $this->streamReader->setOffset($bufferOffset + $pos); 136 137 return $result; 138 } 139 140 /** 141 * Leap white spaces. 142 * 143 * @return boolean 144 */ 145 public function leapWhiteSpaces() 146 { 147 do { 148 if (!$this->streamReader->ensureContent()) { 149 return false; 150 } 151 152 $buffer = $this->streamReader->getBuffer(false); 153 $matches = \strspn($buffer, "\x20\x0A\x0C\x0D\x09\x00", $this->streamReader->getOffset()); 154 if ($matches > 0) { 155 $this->streamReader->addOffset($matches); 156 } 157 } while ($this->streamReader->getOffset() >= $this->streamReader->getBufferLength()); 158 159 return true; 160 } 161} 162