1<?php
2/**
3 * This file is part of FPDI
4 *
5 * @package   setasign\Fpdi
6 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
7 * @license   http://opensource.org/licenses/mit-license The MIT License
8 */
9
10namespace setasign\Fpdi\PdfParser\Type;
11
12use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
13use setasign\Fpdi\PdfParser\Filter\Ascii85;
14use setasign\Fpdi\PdfParser\Filter\AsciiHex;
15use setasign\Fpdi\PdfParser\Filter\FilterException;
16use setasign\Fpdi\PdfParser\Filter\Flate;
17use setasign\Fpdi\PdfParser\Filter\Lzw;
18use setasign\Fpdi\PdfParser\PdfParser;
19use setasign\Fpdi\PdfParser\PdfParserException;
20use setasign\Fpdi\PdfParser\StreamReader;
21use setasign\FpdiPdfParser\PdfParser\Filter\Predictor;
22
23/**
24 * Class representing a PDF stream object
25 *
26 * @package setasign\Fpdi\PdfParser\Type
27 */
28class PdfStream extends PdfType
29{
30    /**
31     * Parses a stream from a stream reader.
32     *
33     * @param PdfDictionary $dictionary
34     * @param StreamReader $reader
35     * @param PdfParser $parser Optional to keep backwards compatibility
36     * @return self
37     * @throws PdfTypeException
38     */
39    public static function parse(PdfDictionary $dictionary, StreamReader $reader, PdfParser $parser = null)
40    {
41        $v = new self;
42        $v->value = $dictionary;
43        $v->reader = $reader;
44        $v->parser = $parser;
45
46        $offset = $reader->getOffset();
47
48        // Find the first "newline"
49        while (($firstByte = $reader->getByte($offset)) !== false) {
50            if ($firstByte !== "\n" && $firstByte !== "\r") {
51                $offset++;
52            } else {
53                break;
54            }
55        }
56
57        if (false === $firstByte) {
58            throw new PdfTypeException(
59                'Unable to parse stream data. No newline after the stream keyword found.',
60                PdfTypeException::NO_NEWLINE_AFTER_STREAM_KEYWORD
61            );
62        }
63
64        $sndByte = $reader->getByte($offset + 1);
65        if ($firstByte === "\n" || $firstByte === "\r") {
66            $offset++;
67        }
68
69        if ($sndByte === "\n" && $firstByte !== "\n") {
70            $offset++;
71        }
72
73        $reader->setOffset($offset);
74        // let's only save the byte-offset and read the stream only when needed
75        $v->stream = $reader->getPosition() + $reader->getOffset();
76
77        return $v;
78    }
79
80    /**
81     * Helper method to create an instance.
82     *
83     * @param PdfDictionary $dictionary
84     * @param string $stream
85     * @return self
86     */
87    public static function create(PdfDictionary $dictionary, $stream)
88    {
89        $v = new self;
90        $v->value = $dictionary;
91        $v->stream = (string) $stream;
92
93        return $v;
94    }
95
96    /**
97     * Ensures that the passed value is a PdfStream instance.
98     *
99     * @param mixed $stream
100     * @return self
101     * @throws PdfTypeException
102     */
103    public static function ensure($stream)
104    {
105        return PdfType::ensureType(self::class, $stream, 'Stream value expected.');
106    }
107
108    /**
109     * The stream or its byte-offset position.
110     *
111     * @var int|string
112     */
113    protected $stream;
114
115    /**
116     * The stream reader instance.
117     *
118     * @var StreamReader
119     */
120    protected $reader;
121
122    /**
123     * The PDF parser instance.
124     *
125     * @var PdfParser
126     */
127    protected $parser;
128
129    /**
130     * Get the stream data.
131     *
132     * @param bool $cache Whether cache the stream data or not.
133     * @return bool|string
134     * @throws PdfTypeException
135     * @throws CrossReferenceException
136     * @throws PdfParserException
137     */
138    public function getStream($cache = false)
139    {
140        if (\is_int($this->stream)) {
141            $length = PdfDictionary::get($this->value, 'Length');
142            if ($this->parser !== null) {
143                $length = PdfType::resolve($length, $this->parser);
144            }
145
146            if (!($length instanceof PdfNumeric) || $length->value === 0) {
147                $this->reader->reset($this->stream, 100000);
148                $buffer = $this->extractStream();
149            } else {
150                $this->reader->reset($this->stream, $length->value);
151                $buffer = $this->reader->getBuffer(false);
152                if ($this->parser !== null) {
153                    $this->reader->reset($this->stream + strlen($buffer));
154                    $this->parser->getTokenizer()->clearStack();
155                    $token = $this->parser->readValue();
156                    if ($token === false || !($token instanceof PdfToken) || $token->value !== 'endstream') {
157                        $this->reader->reset($this->stream, 100000);
158                        $buffer = $this->extractStream();
159                        $this->reader->reset($this->stream + strlen($buffer));
160                    }
161                }
162            }
163
164            if ($cache === false) {
165                return $buffer;
166            }
167
168            $this->stream = $buffer;
169            $this->reader = null;
170        }
171
172        return $this->stream;
173    }
174
175    /**
176     * Extract the stream "manually".
177     *
178     * @return string
179     * @throws PdfTypeException
180     */
181    protected function extractStream()
182    {
183        while (true) {
184            $buffer = $this->reader->getBuffer(false);
185            $length = \strpos($buffer, 'endstream');
186            if ($length === false) {
187                if (!$this->reader->increaseLength(100000)) {
188                    throw new PdfTypeException('Cannot extract stream.');
189                }
190                continue;
191            }
192            break;
193        }
194
195        $buffer = \substr($buffer, 0, $length);
196        $lastByte = \substr($buffer, -1);
197
198        /* Check for EOL marker =
199         *   CARRIAGE RETURN (\r) and a LINE FEED (\n) or just a LINE FEED (\n},
200         *   and not by a CARRIAGE RETURN (\r) alone
201         */
202        if ($lastByte === "\n") {
203            $buffer = \substr($buffer, 0, -1);
204
205            $lastByte = \substr($buffer, -1);
206            if ($lastByte === "\r") {
207                $buffer = \substr($buffer, 0, -1);
208            }
209        }
210
211        // There are streams in the wild, which have only white signs in them but need to be parsed manually due
212        // to a problem encountered before (e.g. Length === 0). We should set them to empty streams to avoid problems
213        // in further processing (e.g. applying of filters).
214        if (trim($buffer) === '') {
215            $buffer = '';
216        }
217
218        return $buffer;
219    }
220
221    /**
222     * Get the unfiltered stream data.
223     *
224     * @return string
225     * @throws FilterException
226     * @throws PdfParserException
227     */
228    public function getUnfilteredStream()
229    {
230        $stream = $this->getStream();
231        $filters = PdfDictionary::get($this->value, 'Filter');
232        if ($filters instanceof PdfNull) {
233            return $stream;
234        }
235
236        if ($filters instanceof PdfArray) {
237            $filters = $filters->value;
238        } else {
239            $filters = [$filters];
240        }
241
242        $decodeParams = PdfDictionary::get($this->value, 'DecodeParms');
243        if ($decodeParams instanceof PdfArray) {
244            $decodeParams = $decodeParams->value;
245        } else {
246            $decodeParams = [$decodeParams];
247        }
248
249        foreach ($filters as $key => $filter) {
250            if (!($filter instanceof PdfName)) {
251                continue;
252            }
253
254            $decodeParam = null;
255            if (isset($decodeParams[$key])) {
256                $decodeParam = ($decodeParams[$key] instanceof PdfDictionary ? $decodeParams[$key] : null);
257            }
258
259            switch ($filter->value) {
260                case 'FlateDecode':
261                case 'Fl':
262                case 'LZWDecode':
263                case 'LZW':
264                    if (\strpos($filter->value, 'LZW') === 0) {
265                        $filterObject = new Lzw();
266                    } else {
267                        $filterObject = new Flate();
268                    }
269
270                    $stream = $filterObject->decode($stream);
271
272                    if ($decodeParam instanceof PdfDictionary) {
273                        $predictor = PdfDictionary::get($decodeParam, 'Predictor', PdfNumeric::create(1));
274                        if ($predictor->value !== 1) {
275                            if (!\class_exists(Predictor::class)) {
276                                throw new PdfParserException(
277                                    'This PDF document makes use of features which are only implemented in the ' .
278                                    'commercial "FPDI PDF-Parser" add-on (see https://www.setasign.com/fpdi-pdf-' .
279                                    'parser).',
280                                    PdfParserException::IMPLEMENTED_IN_FPDI_PDF_PARSER
281                                );
282                            }
283
284                            $colors = PdfDictionary::get($decodeParam, 'Colors', PdfNumeric::create(1));
285                            $bitsPerComponent = PdfDictionary::get(
286                                $decodeParam,
287                                'BitsPerComponent',
288                                PdfNumeric::create(8)
289                            );
290
291                            $columns = PdfDictionary::get($decodeParam, 'Columns', PdfNumeric::create(1));
292
293                            $filterObject = new Predictor(
294                                $predictor->value,
295                                $colors->value,
296                                $bitsPerComponent->value,
297                                $columns->value
298                            );
299
300                            $stream = $filterObject->decode($stream);
301                        }
302                    }
303
304                    break;
305                case 'ASCII85Decode':
306                case 'A85':
307                    $filterObject = new Ascii85();
308                    $stream = $filterObject->decode($stream);
309                    break;
310
311                case 'ASCIIHexDecode':
312                case 'AHx':
313                    $filterObject = new AsciiHex();
314                    $stream = $filterObject->decode($stream);
315                    break;
316
317                default:
318                    throw new FilterException(
319                        \sprintf('Unsupported filter "%s".', $filter->value),
320                        FilterException::UNSUPPORTED_FILTER
321                    );
322            }
323        }
324
325        return $stream;
326    }
327}
328