1<?php 2/** 3 * This file is part of FPDI 4 * 5 * @package setasign\Fpdi 6 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) 7 * @license http://opensource.org/licenses/mit-license The MIT License 8 */ 9 10namespace setasign\Fpdi\PdfParser\Type; 11 12use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException; 13use setasign\Fpdi\PdfParser\Filter\Ascii85; 14use setasign\Fpdi\PdfParser\Filter\AsciiHex; 15use setasign\Fpdi\PdfParser\Filter\FilterException; 16use setasign\Fpdi\PdfParser\Filter\Flate; 17use setasign\Fpdi\PdfParser\Filter\Lzw; 18use setasign\Fpdi\PdfParser\PdfParser; 19use setasign\Fpdi\PdfParser\PdfParserException; 20use setasign\Fpdi\PdfParser\StreamReader; 21use setasign\FpdiPdfParser\PdfParser\Filter\Predictor; 22 23/** 24 * Class representing a PDF stream object 25 * 26 * @package setasign\Fpdi\PdfParser\Type 27 */ 28class PdfStream extends PdfType 29{ 30 /** 31 * Parses a stream from a stream reader. 32 * 33 * @param PdfDictionary $dictionary 34 * @param StreamReader $reader 35 * @param PdfParser $parser Optional to keep backwards compatibility 36 * @return self 37 * @throws PdfTypeException 38 */ 39 public static function parse(PdfDictionary $dictionary, StreamReader $reader, PdfParser $parser = null) 40 { 41 $v = new self; 42 $v->value = $dictionary; 43 $v->reader = $reader; 44 $v->parser = $parser; 45 46 $offset = $reader->getOffset(); 47 48 // Find the first "newline" 49 while (($firstByte = $reader->getByte($offset)) !== false) { 50 if ($firstByte !== "\n" && $firstByte !== "\r") { 51 $offset++; 52 } else { 53 break; 54 } 55 } 56 57 if (false === $firstByte) { 58 throw new PdfTypeException( 59 'Unable to parse stream data. No newline after the stream keyword found.', 60 PdfTypeException::NO_NEWLINE_AFTER_STREAM_KEYWORD 61 ); 62 } 63 64 $sndByte = $reader->getByte($offset + 1); 65 if ($firstByte === "\n" || $firstByte === "\r") { 66 $offset++; 67 } 68 69 if ($sndByte === "\n" && $firstByte !== "\n") { 70 $offset++; 71 } 72 73 $reader->setOffset($offset); 74 // let's only save the byte-offset and read the stream only when needed 75 $v->stream = $reader->getPosition() + $reader->getOffset(); 76 77 return $v; 78 } 79 80 /** 81 * Helper method to create an instance. 82 * 83 * @param PdfDictionary $dictionary 84 * @param string $stream 85 * @return self 86 */ 87 public static function create(PdfDictionary $dictionary, $stream) 88 { 89 $v = new self; 90 $v->value = $dictionary; 91 $v->stream = (string) $stream; 92 93 return $v; 94 } 95 96 /** 97 * Ensures that the passed value is a PdfStream instance. 98 * 99 * @param mixed $stream 100 * @return self 101 * @throws PdfTypeException 102 */ 103 public static function ensure($stream) 104 { 105 return PdfType::ensureType(self::class, $stream, 'Stream value expected.'); 106 } 107 108 /** 109 * The stream or its byte-offset position. 110 * 111 * @var int|string 112 */ 113 protected $stream; 114 115 /** 116 * The stream reader instance. 117 * 118 * @var StreamReader 119 */ 120 protected $reader; 121 122 /** 123 * The PDF parser instance. 124 * 125 * @var PdfParser 126 */ 127 protected $parser; 128 129 /** 130 * Get the stream data. 131 * 132 * @param bool $cache Whether cache the stream data or not. 133 * @return bool|string 134 * @throws PdfTypeException 135 * @throws CrossReferenceException 136 * @throws PdfParserException 137 */ 138 public function getStream($cache = false) 139 { 140 if (\is_int($this->stream)) { 141 $length = PdfDictionary::get($this->value, 'Length'); 142 if ($this->parser !== null) { 143 $length = PdfType::resolve($length, $this->parser); 144 } 145 146 if (!($length instanceof PdfNumeric) || $length->value === 0) { 147 $this->reader->reset($this->stream, 100000); 148 $buffer = $this->extractStream(); 149 } else { 150 $this->reader->reset($this->stream, $length->value); 151 $buffer = $this->reader->getBuffer(false); 152 if ($this->parser !== null) { 153 $this->reader->reset($this->stream + strlen($buffer)); 154 $this->parser->getTokenizer()->clearStack(); 155 $token = $this->parser->readValue(); 156 if ($token === false || !($token instanceof PdfToken) || $token->value !== 'endstream') { 157 $this->reader->reset($this->stream, 100000); 158 $buffer = $this->extractStream(); 159 $this->reader->reset($this->stream + strlen($buffer)); 160 } 161 } 162 } 163 164 if ($cache === false) { 165 return $buffer; 166 } 167 168 $this->stream = $buffer; 169 $this->reader = null; 170 } 171 172 return $this->stream; 173 } 174 175 /** 176 * Extract the stream "manually". 177 * 178 * @return string 179 * @throws PdfTypeException 180 */ 181 protected function extractStream() 182 { 183 while (true) { 184 $buffer = $this->reader->getBuffer(false); 185 $length = \strpos($buffer, 'endstream'); 186 if ($length === false) { 187 if (!$this->reader->increaseLength(100000)) { 188 throw new PdfTypeException('Cannot extract stream.'); 189 } 190 continue; 191 } 192 break; 193 } 194 195 $buffer = \substr($buffer, 0, $length); 196 $lastByte = \substr($buffer, -1); 197 198 /* Check for EOL marker = 199 * CARRIAGE RETURN (\r) and a LINE FEED (\n) or just a LINE FEED (\n}, 200 * and not by a CARRIAGE RETURN (\r) alone 201 */ 202 if ($lastByte === "\n") { 203 $buffer = \substr($buffer, 0, -1); 204 205 $lastByte = \substr($buffer, -1); 206 if ($lastByte === "\r") { 207 $buffer = \substr($buffer, 0, -1); 208 } 209 } 210 211 // There are streams in the wild, which have only white signs in them but need to be parsed manually due 212 // to a problem encountered before (e.g. Length === 0). We should set them to empty streams to avoid problems 213 // in further processing (e.g. applying of filters). 214 if (trim($buffer) === '') { 215 $buffer = ''; 216 } 217 218 return $buffer; 219 } 220 221 /** 222 * Get the unfiltered stream data. 223 * 224 * @return string 225 * @throws FilterException 226 * @throws PdfParserException 227 */ 228 public function getUnfilteredStream() 229 { 230 $stream = $this->getStream(); 231 $filters = PdfDictionary::get($this->value, 'Filter'); 232 if ($filters instanceof PdfNull) { 233 return $stream; 234 } 235 236 if ($filters instanceof PdfArray) { 237 $filters = $filters->value; 238 } else { 239 $filters = [$filters]; 240 } 241 242 $decodeParams = PdfDictionary::get($this->value, 'DecodeParms'); 243 if ($decodeParams instanceof PdfArray) { 244 $decodeParams = $decodeParams->value; 245 } else { 246 $decodeParams = [$decodeParams]; 247 } 248 249 foreach ($filters as $key => $filter) { 250 if (!($filter instanceof PdfName)) { 251 continue; 252 } 253 254 $decodeParam = null; 255 if (isset($decodeParams[$key])) { 256 $decodeParam = ($decodeParams[$key] instanceof PdfDictionary ? $decodeParams[$key] : null); 257 } 258 259 switch ($filter->value) { 260 case 'FlateDecode': 261 case 'Fl': 262 case 'LZWDecode': 263 case 'LZW': 264 if (\strpos($filter->value, 'LZW') === 0) { 265 $filterObject = new Lzw(); 266 } else { 267 $filterObject = new Flate(); 268 } 269 270 $stream = $filterObject->decode($stream); 271 272 if ($decodeParam instanceof PdfDictionary) { 273 $predictor = PdfDictionary::get($decodeParam, 'Predictor', PdfNumeric::create(1)); 274 if ($predictor->value !== 1) { 275 if (!\class_exists(Predictor::class)) { 276 throw new PdfParserException( 277 'This PDF document makes use of features which are only implemented in the ' . 278 'commercial "FPDI PDF-Parser" add-on (see https://www.setasign.com/fpdi-pdf-' . 279 'parser).', 280 PdfParserException::IMPLEMENTED_IN_FPDI_PDF_PARSER 281 ); 282 } 283 284 $colors = PdfDictionary::get($decodeParam, 'Colors', PdfNumeric::create(1)); 285 $bitsPerComponent = PdfDictionary::get( 286 $decodeParam, 287 'BitsPerComponent', 288 PdfNumeric::create(8) 289 ); 290 291 $columns = PdfDictionary::get($decodeParam, 'Columns', PdfNumeric::create(1)); 292 293 $filterObject = new Predictor( 294 $predictor->value, 295 $colors->value, 296 $bitsPerComponent->value, 297 $columns->value 298 ); 299 300 $stream = $filterObject->decode($stream); 301 } 302 } 303 304 break; 305 case 'ASCII85Decode': 306 case 'A85': 307 $filterObject = new Ascii85(); 308 $stream = $filterObject->decode($stream); 309 break; 310 311 case 'ASCIIHexDecode': 312 case 'AHx': 313 $filterObject = new AsciiHex(); 314 $stream = $filterObject->decode($stream); 315 break; 316 317 default: 318 throw new FilterException( 319 \sprintf('Unsupported filter "%s".', $filter->value), 320 FilterException::UNSUPPORTED_FILTER 321 ); 322 } 323 } 324 325 return $stream; 326 } 327} 328