1<?php
2/**
3 * This file is part of FPDI
4 *
5 * @package   setasign\Fpdi
6 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
7 * @license   http://opensource.org/licenses/mit-license The MIT License
8 */
9
10namespace setasign\Fpdi\PdfParser\CrossReference;
11
12use setasign\Fpdi\PdfParser\PdfParser;
13use setasign\Fpdi\PdfParser\StreamReader;
14
15/**
16 * Class FixedReader
17 *
18 * This reader allows a very less overhead parsing of single entries of the cross-reference, because the main entries
19 * are only read when needed and not in a single run.
20 *
21 * @package setasign\Fpdi\PdfParser\CrossReference
22 */
23class FixedReader extends AbstractReader implements ReaderInterface
24{
25    /**
26     * @var StreamReader
27     */
28    protected $reader;
29
30    /**
31     * Data of subsections.
32     *
33     * @var array
34     */
35    protected $subSections;
36
37    /**
38     * FixedReader constructor.
39     *
40     * @param PdfParser $parser
41     * @throws CrossReferenceException
42     */
43    public function __construct(PdfParser $parser)
44    {
45        $this->reader = $parser->getStreamReader();
46        $this->read();
47        parent::__construct($parser);
48    }
49
50    /**
51     * Get all subsection data.
52     *
53     * @return array
54     */
55    public function getSubSections()
56    {
57        return $this->subSections;
58    }
59
60    /**
61     * @inheritdoc
62     */
63    public function getOffsetFor($objectNumber)
64    {
65        foreach ($this->subSections as $offset => list($startObject, $objectCount)) {
66            if ($objectNumber >= $startObject && $objectNumber < ($startObject + $objectCount)) {
67                $position = $offset + 20 * ($objectNumber - $startObject);
68                $this->reader->ensure($position, 20);
69                $line = $this->reader->readBytes(20);
70                if ($line[17] === 'f') {
71                    return false;
72                }
73
74                return (int) \substr($line, 0, 10);
75            }
76        }
77
78        return false;
79    }
80
81    /**
82     * Read the cross-reference.
83     *
84     * This reader will only read the subsections in this method. The offsets were resolved individually by this
85     * information.
86     *
87     * @throws CrossReferenceException
88     */
89    protected function read()
90    {
91        $subSections = [];
92
93        $startObject = $entryCount = $lastLineStart = null;
94        $validityChecked = false;
95        while (($line = $this->reader->readLine(20)) !== false) {
96            if (\strpos($line, 'trailer') !== false) {
97                $this->reader->reset($lastLineStart);
98                break;
99            }
100
101            // jump over if line content doesn't match the expected string
102            if (\sscanf($line, '%d %d', $startObject, $entryCount) !== 2) {
103                continue;
104            }
105
106            $oldPosition = $this->reader->getPosition();
107            $position = $oldPosition + $this->reader->getOffset();
108
109            if (!$validityChecked && $entryCount > 0) {
110                $nextLine = $this->reader->readBytes(21);
111                /* Check the next line for maximum of 20 bytes and not longer
112                 * By catching 21 bytes and trimming the length should be still 21.
113                 */
114                if (\strlen(\trim($nextLine)) !== 21) {
115                    throw new CrossReferenceException(
116                        'Cross-reference entries are larger than 20 bytes.',
117                        CrossReferenceException::ENTRIES_TOO_LARGE
118                    );
119                }
120
121                /* Check for less than 20 bytes: cut the line to 20 bytes and trim; have to result in exactly 18 bytes.
122                 * If it would have less bytes the substring would get the first bytes of the next line which would
123                 * evaluate to a 20 bytes long string after trimming.
124                 */
125                if (\strlen(\trim(\substr($nextLine, 0, 20))) !== 18) {
126                    throw new CrossReferenceException(
127                        'Cross-reference entries are less than 20 bytes.',
128                        CrossReferenceException::ENTRIES_TOO_SHORT
129                    );
130                }
131
132                $validityChecked = true;
133            }
134
135            $subSections[$position] = [$startObject, $entryCount];
136
137            $lastLineStart = $position + $entryCount * 20;
138            $this->reader->reset($lastLineStart);
139        }
140
141        // reset after the last correct parsed line
142        $this->reader->reset($lastLineStart);
143
144        if (\count($subSections) === 0) {
145            throw new CrossReferenceException(
146                'No entries found in cross-reference.',
147                CrossReferenceException::NO_ENTRIES
148            );
149        }
150
151        $this->subSections = $subSections;
152    }
153
154    /**
155     * Fixes an invalid object number shift.
156     *
157     * This method can be used to repair documents with an invalid subsection header:
158     *
159     * <code>
160     * xref
161     * 1 7
162     * 0000000000 65535 f
163     * 0000000009 00000 n
164     * 0000412075 00000 n
165     * 0000412172 00000 n
166     * 0000412359 00000 n
167     * 0000412417 00000 n
168     * 0000412468 00000 n
169     * </code>
170     *
171     * It shall only be called on the first table.
172     *
173     * @return bool
174     */
175    public function fixFaultySubSectionShift()
176    {
177        $subSections = $this->getSubSections();
178        if (\count($subSections) > 1) {
179            return false;
180        }
181
182        $subSection = \current($subSections);
183        if ($subSection[0] != 1) {
184            return false;
185        }
186
187        if ($this->getOffsetFor(1) === false) {
188            foreach ($subSections as $offset => list($startObject, $objectCount)) {
189                $this->subSections[$offset] = [$startObject - 1, $objectCount];
190            }
191            return true;
192        }
193
194        return false;
195    }
196}
197