1<?php
2/**
3 * This file is part of FPDI
4 *
5 * @package   setasign\Fpdi
6 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
7 * @license   http://opensource.org/licenses/mit-license The MIT License
8 */
9
10namespace setasign\Fpdi\PdfParser\CrossReference;
11
12use setasign\Fpdi\PdfParser\PdfParser;
13use setasign\Fpdi\PdfParser\Type\PdfDictionary;
14use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
15use setasign\Fpdi\PdfParser\Type\PdfNumeric;
16use setasign\Fpdi\PdfParser\Type\PdfStream;
17use setasign\Fpdi\PdfParser\Type\PdfToken;
18use setasign\Fpdi\PdfParser\Type\PdfTypeException;
19
20/**
21 * Class CrossReference
22 *
23 * This class processes the standard cross reference of a PDF document.
24 *
25 * @package setasign\Fpdi\PdfParser\CrossReference
26 */
27class CrossReference
28{
29    /**
30     * The byte length in which the "startxref" keyword should be searched.
31     *
32     * @var int
33     */
34    static public $trailerSearchLength = 5500;
35
36    /**
37     * @var int
38     */
39    protected $fileHeaderOffset = 0;
40
41    /**
42     * @var PdfParser
43     */
44    protected $parser;
45
46    /**
47     * @var ReaderInterface[]
48     */
49    protected $readers = [];
50
51    /**
52     * CrossReference constructor.
53     *
54     * @param PdfParser $parser
55     * @throws CrossReferenceException
56     * @throws PdfTypeException
57     */
58    public function __construct(PdfParser $parser, $fileHeaderOffset = 0)
59    {
60        $this->parser = $parser;
61        $this->fileHeaderOffset = $fileHeaderOffset;
62
63        $offset = $this->findStartXref();
64        $reader = null;
65        /** @noinspection TypeUnsafeComparisonInspection */
66        while ($offset != false) { // By doing an unsafe comparsion we ignore faulty references to byte offset 0
67            try {
68                $reader = $this->readXref($offset + $this->fileHeaderOffset);
69            } catch (CrossReferenceException $e) {
70                // sometimes the file header offset is part of the byte offsets, so let's retry by resetting it to zero.
71                if ($e->getCode() === CrossReferenceException::INVALID_DATA && $this->fileHeaderOffset !== 0) {
72                    $this->fileHeaderOffset = 0;
73                    $reader = $this->readXref($offset + $this->fileHeaderOffset);
74                } else {
75                    throw $e;
76                }
77            }
78
79            $trailer = $reader->getTrailer();
80            $this->checkForEncryption($trailer);
81            $this->readers[] = $reader;
82
83            if (isset($trailer->value['Prev'])) {
84                $offset = $trailer->value['Prev']->value;
85            } else {
86                $offset = false;
87            }
88        }
89
90        // fix faulty sub-section header
91        if ($reader instanceof FixedReader) {
92            /**
93             * @var FixedReader $reader
94             */
95            $reader->fixFaultySubSectionShift();
96        }
97
98        if ($reader === null) {
99            throw new CrossReferenceException('No cross-reference found.', CrossReferenceException::NO_XREF_FOUND);
100        }
101    }
102
103    /**
104     * Get the size of the cross reference.
105     *
106     * @return integer
107     */
108    public function getSize()
109    {
110        return $this->getTrailer()->value['Size']->value;
111    }
112
113    /**
114     * Get the trailer dictionary.
115     *
116     * @return PdfDictionary
117     */
118    public function getTrailer()
119    {
120        return $this->readers[0]->getTrailer();
121    }
122
123    /**
124     * Get the cross reference readser instances.
125     *
126     * @return ReaderInterface[]
127     */
128    public function getReaders()
129    {
130        return $this->readers;
131    }
132
133    /**
134     * Get the offset by an object number.
135     *
136     * @param int $objectNumber
137     * @return integer|bool
138     */
139    public function getOffsetFor($objectNumber)
140    {
141        foreach ($this->getReaders() as $reader) {
142            $offset = $reader->getOffsetFor($objectNumber);
143            if ($offset !== false) {
144                return $offset;
145            }
146        }
147
148        return false;
149    }
150
151    /**
152     * Get an indirect object by its object number.
153     *
154     * @param int $objectNumber
155     * @return PdfIndirectObject
156     * @throws CrossReferenceException
157     */
158    public function getIndirectObject($objectNumber)
159    {
160        $offset = $this->getOffsetFor($objectNumber);
161        if ($offset === false) {
162            throw new CrossReferenceException(
163                \sprintf('Object (id:%s) not found.', $objectNumber),
164                CrossReferenceException::OBJECT_NOT_FOUND
165            );
166        }
167
168        $parser = $this->parser;
169
170        $parser->getTokenizer()->clearStack();
171        $parser->getStreamReader()->reset($offset + $this->fileHeaderOffset);
172
173        try {
174            /** @var PdfIndirectObject $object */
175            $object = $parser->readValue(null, PdfIndirectObject::class);
176        } catch (PdfTypeException $e) {
177            throw new CrossReferenceException(
178                \sprintf('Object (id:%s) not found at location (%s).', $objectNumber, $offset),
179                CrossReferenceException::OBJECT_NOT_FOUND,
180                $e
181            );
182        }
183
184        if ($object->objectNumber !== $objectNumber) {
185            throw new CrossReferenceException(
186                \sprintf('Wrong object found, got %s while %s was expected.', $object->objectNumber, $objectNumber),
187                CrossReferenceException::OBJECT_NOT_FOUND
188            );
189        }
190
191        return $object;
192    }
193
194    /**
195     * Read the cross-reference table at a given offset.
196     *
197     * Internally the method will try to evaluate the best reader for this cross-reference.
198     *
199     * @param int $offset
200     * @return ReaderInterface
201     * @throws CrossReferenceException
202     * @throws PdfTypeException
203     */
204    protected function readXref($offset)
205    {
206        $this->parser->getStreamReader()->reset($offset);
207        $this->parser->getTokenizer()->clearStack();
208        $initValue = $this->parser->readValue();
209
210        return $this->initReaderInstance($initValue);
211    }
212
213    /**
214     * Get a cross-reference reader instance.
215     *
216     * @param PdfToken|PdfIndirectObject $initValue
217     * @return ReaderInterface|bool
218     * @throws CrossReferenceException
219     * @throws PdfTypeException
220     */
221    protected function initReaderInstance($initValue)
222    {
223        $position = $this->parser->getStreamReader()->getPosition()
224            + $this->parser->getStreamReader()->getOffset() + $this->fileHeaderOffset;
225
226        if ($initValue instanceof PdfToken && $initValue->value === 'xref') {
227            try {
228                return new FixedReader($this->parser);
229            } catch (CrossReferenceException $e) {
230                $this->parser->getStreamReader()->reset($position);
231                $this->parser->getTokenizer()->clearStack();
232
233                return new LineReader($this->parser);
234            }
235        }
236
237        if ($initValue instanceof PdfIndirectObject) {
238            try {
239                $stream = PdfStream::ensure($initValue->value);
240
241            } catch (PdfTypeException $e) {
242                throw new CrossReferenceException(
243                    'Invalid object type at xref reference offset.',
244                    CrossReferenceException::INVALID_DATA,
245                    $e
246                );
247            }
248
249            $type = PdfDictionary::get($stream->value, 'Type');
250            if ($type->value !== 'XRef') {
251                throw new CrossReferenceException(
252                    'The xref position points to an incorrect object type.',
253                    CrossReferenceException::INVALID_DATA
254                );
255            }
256
257            $this->checkForEncryption($stream->value);
258
259            throw new CrossReferenceException(
260                'This PDF document probably uses a compression technique which is not supported by the ' .
261                'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)',
262                CrossReferenceException::COMPRESSED_XREF
263            );
264        }
265
266        throw new CrossReferenceException(
267            'The xref position points to an incorrect object type.',
268            CrossReferenceException::INVALID_DATA
269        );
270    }
271
272    /**
273     * Check for encryption.
274     *
275     * @param PdfDictionary $dictionary
276     * @throws CrossReferenceException
277     */
278    protected function checkForEncryption(PdfDictionary $dictionary)
279    {
280        if (isset($dictionary->value['Encrypt'])) {
281            throw new CrossReferenceException(
282                'This PDF document is encrypted and cannot be processed with FPDI.',
283                CrossReferenceException::ENCRYPTED
284            );
285        }
286    }
287
288    /**
289     * Find the start position for the first cross-reference.
290     *
291     * @return int The byte-offset position of the first cross-reference.
292     * @throws CrossReferenceException
293     */
294    protected function findStartXref()
295    {
296        $reader = $this->parser->getStreamReader();
297        $reader->reset(-self::$trailerSearchLength, self::$trailerSearchLength);
298
299        $buffer = $reader->getBuffer(false);
300        $pos = \strrpos($buffer, 'startxref');
301        $addOffset = 9;
302        if ($pos === false) {
303            // Some corrupted documents uses startref, instead of startxref
304            $pos = \strrpos($buffer, 'startref');
305            if ($pos === false) {
306                throw new CrossReferenceException(
307                    'Unable to find pointer to xref table',
308                    CrossReferenceException::NO_STARTXREF_FOUND
309                );
310            }
311            $addOffset = 8;
312        }
313
314        $reader->setOffset($pos + $addOffset);
315
316        try {
317            $value = $this->parser->readValue(null, PdfNumeric::class);
318        } catch (PdfTypeException $e) {
319            throw new CrossReferenceException(
320                'Invalid data after startxref keyword.',
321                CrossReferenceException::INVALID_DATA,
322                $e
323            );
324        }
325
326        return $value->value;
327    }
328}
329