xref: /plugin/dw2pdf/vendor/setasign/fpdi/src/PdfParser/CrossReference/FixedReader.php (revision dc4d9dc689082c963d5c1d9ee679553326788c6e)
1*dc4d9dc6SAnna Dabrowska<?php
2*dc4d9dc6SAnna Dabrowska/**
3*dc4d9dc6SAnna Dabrowska * This file is part of FPDI
4*dc4d9dc6SAnna Dabrowska *
5*dc4d9dc6SAnna Dabrowska * @package   setasign\Fpdi
6*dc4d9dc6SAnna Dabrowska * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
7*dc4d9dc6SAnna Dabrowska * @license   http://opensource.org/licenses/mit-license The MIT License
8*dc4d9dc6SAnna Dabrowska */
9*dc4d9dc6SAnna Dabrowska
10*dc4d9dc6SAnna Dabrowskanamespace setasign\Fpdi\PdfParser\CrossReference;
11*dc4d9dc6SAnna Dabrowska
12*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\PdfParser;
13*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\StreamReader;
14*dc4d9dc6SAnna Dabrowska
15*dc4d9dc6SAnna Dabrowska/**
16*dc4d9dc6SAnna Dabrowska * Class FixedReader
17*dc4d9dc6SAnna Dabrowska *
18*dc4d9dc6SAnna Dabrowska * This reader allows a very less overhead parsing of single entries of the cross-reference, because the main entries
19*dc4d9dc6SAnna Dabrowska * are only read when needed and not in a single run.
20*dc4d9dc6SAnna Dabrowska *
21*dc4d9dc6SAnna Dabrowska * @package setasign\Fpdi\PdfParser\CrossReference
22*dc4d9dc6SAnna Dabrowska */
23*dc4d9dc6SAnna Dabrowskaclass FixedReader extends AbstractReader implements ReaderInterface
24*dc4d9dc6SAnna Dabrowska{
25*dc4d9dc6SAnna Dabrowska    /**
26*dc4d9dc6SAnna Dabrowska     * @var StreamReader
27*dc4d9dc6SAnna Dabrowska     */
28*dc4d9dc6SAnna Dabrowska    protected $reader;
29*dc4d9dc6SAnna Dabrowska
30*dc4d9dc6SAnna Dabrowska    /**
31*dc4d9dc6SAnna Dabrowska     * Data of subsections.
32*dc4d9dc6SAnna Dabrowska     *
33*dc4d9dc6SAnna Dabrowska     * @var array
34*dc4d9dc6SAnna Dabrowska     */
35*dc4d9dc6SAnna Dabrowska    protected $subSections;
36*dc4d9dc6SAnna Dabrowska
37*dc4d9dc6SAnna Dabrowska    /**
38*dc4d9dc6SAnna Dabrowska     * FixedReader constructor.
39*dc4d9dc6SAnna Dabrowska     *
40*dc4d9dc6SAnna Dabrowska     * @param PdfParser $parser
41*dc4d9dc6SAnna Dabrowska     * @throws CrossReferenceException
42*dc4d9dc6SAnna Dabrowska     */
43*dc4d9dc6SAnna Dabrowska    public function __construct(PdfParser $parser)
44*dc4d9dc6SAnna Dabrowska    {
45*dc4d9dc6SAnna Dabrowska        $this->reader = $parser->getStreamReader();
46*dc4d9dc6SAnna Dabrowska        $this->read();
47*dc4d9dc6SAnna Dabrowska        parent::__construct($parser);
48*dc4d9dc6SAnna Dabrowska    }
49*dc4d9dc6SAnna Dabrowska
50*dc4d9dc6SAnna Dabrowska    /**
51*dc4d9dc6SAnna Dabrowska     * Get all subsection data.
52*dc4d9dc6SAnna Dabrowska     *
53*dc4d9dc6SAnna Dabrowska     * @return array
54*dc4d9dc6SAnna Dabrowska     */
55*dc4d9dc6SAnna Dabrowska    public function getSubSections()
56*dc4d9dc6SAnna Dabrowska    {
57*dc4d9dc6SAnna Dabrowska        return $this->subSections;
58*dc4d9dc6SAnna Dabrowska    }
59*dc4d9dc6SAnna Dabrowska
60*dc4d9dc6SAnna Dabrowska    /**
61*dc4d9dc6SAnna Dabrowska     * @inheritdoc
62*dc4d9dc6SAnna Dabrowska     */
63*dc4d9dc6SAnna Dabrowska    public function getOffsetFor($objectNumber)
64*dc4d9dc6SAnna Dabrowska    {
65*dc4d9dc6SAnna Dabrowska        foreach ($this->subSections as $offset => list($startObject, $objectCount)) {
66*dc4d9dc6SAnna Dabrowska            if ($objectNumber >= $startObject && $objectNumber < ($startObject + $objectCount)) {
67*dc4d9dc6SAnna Dabrowska                $position = $offset + 20 * ($objectNumber - $startObject);
68*dc4d9dc6SAnna Dabrowska                $this->reader->ensure($position, 20);
69*dc4d9dc6SAnna Dabrowska                $line = $this->reader->readBytes(20);
70*dc4d9dc6SAnna Dabrowska                if ($line[17] === 'f') {
71*dc4d9dc6SAnna Dabrowska                    return false;
72*dc4d9dc6SAnna Dabrowska                }
73*dc4d9dc6SAnna Dabrowska
74*dc4d9dc6SAnna Dabrowska                return (int) \substr($line, 0, 10);
75*dc4d9dc6SAnna Dabrowska            }
76*dc4d9dc6SAnna Dabrowska        }
77*dc4d9dc6SAnna Dabrowska
78*dc4d9dc6SAnna Dabrowska        return false;
79*dc4d9dc6SAnna Dabrowska    }
80*dc4d9dc6SAnna Dabrowska
81*dc4d9dc6SAnna Dabrowska    /**
82*dc4d9dc6SAnna Dabrowska     * Read the cross-reference.
83*dc4d9dc6SAnna Dabrowska     *
84*dc4d9dc6SAnna Dabrowska     * This reader will only read the subsections in this method. The offsets were resolved individually by this
85*dc4d9dc6SAnna Dabrowska     * information.
86*dc4d9dc6SAnna Dabrowska     *
87*dc4d9dc6SAnna Dabrowska     * @throws CrossReferenceException
88*dc4d9dc6SAnna Dabrowska     */
89*dc4d9dc6SAnna Dabrowska    protected function read()
90*dc4d9dc6SAnna Dabrowska    {
91*dc4d9dc6SAnna Dabrowska        $subSections = [];
92*dc4d9dc6SAnna Dabrowska
93*dc4d9dc6SAnna Dabrowska        $startObject = $entryCount = $lastLineStart = null;
94*dc4d9dc6SAnna Dabrowska        $validityChecked = false;
95*dc4d9dc6SAnna Dabrowska        while (($line = $this->reader->readLine(20)) !== false) {
96*dc4d9dc6SAnna Dabrowska            if (\strpos($line, 'trailer') !== false) {
97*dc4d9dc6SAnna Dabrowska                $this->reader->reset($lastLineStart);
98*dc4d9dc6SAnna Dabrowska                break;
99*dc4d9dc6SAnna Dabrowska            }
100*dc4d9dc6SAnna Dabrowska
101*dc4d9dc6SAnna Dabrowska            // jump over if line content doesn't match the expected string
102*dc4d9dc6SAnna Dabrowska            if (\sscanf($line, '%d %d', $startObject, $entryCount) !== 2) {
103*dc4d9dc6SAnna Dabrowska                continue;
104*dc4d9dc6SAnna Dabrowska            }
105*dc4d9dc6SAnna Dabrowska
106*dc4d9dc6SAnna Dabrowska            $oldPosition = $this->reader->getPosition();
107*dc4d9dc6SAnna Dabrowska            $position = $oldPosition + $this->reader->getOffset();
108*dc4d9dc6SAnna Dabrowska
109*dc4d9dc6SAnna Dabrowska            if (!$validityChecked && $entryCount > 0) {
110*dc4d9dc6SAnna Dabrowska                $nextLine = $this->reader->readBytes(21);
111*dc4d9dc6SAnna Dabrowska                /* Check the next line for maximum of 20 bytes and not longer
112*dc4d9dc6SAnna Dabrowska                 * By catching 21 bytes and trimming the length should be still 21.
113*dc4d9dc6SAnna Dabrowska                 */
114*dc4d9dc6SAnna Dabrowska                if (\strlen(\trim($nextLine)) !== 21) {
115*dc4d9dc6SAnna Dabrowska                    throw new CrossReferenceException(
116*dc4d9dc6SAnna Dabrowska                        'Cross-reference entries are larger than 20 bytes.',
117*dc4d9dc6SAnna Dabrowska                        CrossReferenceException::ENTRIES_TOO_LARGE
118*dc4d9dc6SAnna Dabrowska                    );
119*dc4d9dc6SAnna Dabrowska                }
120*dc4d9dc6SAnna Dabrowska
121*dc4d9dc6SAnna Dabrowska                /* Check for less than 20 bytes: cut the line to 20 bytes and trim; have to result in exactly 18 bytes.
122*dc4d9dc6SAnna Dabrowska                 * If it would have less bytes the substring would get the first bytes of the next line which would
123*dc4d9dc6SAnna Dabrowska                 * evaluate to a 20 bytes long string after trimming.
124*dc4d9dc6SAnna Dabrowska                 */
125*dc4d9dc6SAnna Dabrowska                if (\strlen(\trim(\substr($nextLine, 0, 20))) !== 18) {
126*dc4d9dc6SAnna Dabrowska                    throw new CrossReferenceException(
127*dc4d9dc6SAnna Dabrowska                        'Cross-reference entries are less than 20 bytes.',
128*dc4d9dc6SAnna Dabrowska                        CrossReferenceException::ENTRIES_TOO_SHORT
129*dc4d9dc6SAnna Dabrowska                    );
130*dc4d9dc6SAnna Dabrowska                }
131*dc4d9dc6SAnna Dabrowska
132*dc4d9dc6SAnna Dabrowska                $validityChecked = true;
133*dc4d9dc6SAnna Dabrowska            }
134*dc4d9dc6SAnna Dabrowska
135*dc4d9dc6SAnna Dabrowska            $subSections[$position] = [$startObject, $entryCount];
136*dc4d9dc6SAnna Dabrowska
137*dc4d9dc6SAnna Dabrowska            $lastLineStart = $position + $entryCount * 20;
138*dc4d9dc6SAnna Dabrowska            $this->reader->reset($lastLineStart);
139*dc4d9dc6SAnna Dabrowska        }
140*dc4d9dc6SAnna Dabrowska
141*dc4d9dc6SAnna Dabrowska        // reset after the last correct parsed line
142*dc4d9dc6SAnna Dabrowska        $this->reader->reset($lastLineStart);
143*dc4d9dc6SAnna Dabrowska
144*dc4d9dc6SAnna Dabrowska        if (\count($subSections) === 0) {
145*dc4d9dc6SAnna Dabrowska            throw new CrossReferenceException(
146*dc4d9dc6SAnna Dabrowska                'No entries found in cross-reference.',
147*dc4d9dc6SAnna Dabrowska                CrossReferenceException::NO_ENTRIES
148*dc4d9dc6SAnna Dabrowska            );
149*dc4d9dc6SAnna Dabrowska        }
150*dc4d9dc6SAnna Dabrowska
151*dc4d9dc6SAnna Dabrowska        $this->subSections = $subSections;
152*dc4d9dc6SAnna Dabrowska    }
153*dc4d9dc6SAnna Dabrowska
154*dc4d9dc6SAnna Dabrowska    /**
155*dc4d9dc6SAnna Dabrowska     * Fixes an invalid object number shift.
156*dc4d9dc6SAnna Dabrowska     *
157*dc4d9dc6SAnna Dabrowska     * This method can be used to repair documents with an invalid subsection header:
158*dc4d9dc6SAnna Dabrowska     *
159*dc4d9dc6SAnna Dabrowska     * <code>
160*dc4d9dc6SAnna Dabrowska     * xref
161*dc4d9dc6SAnna Dabrowska     * 1 7
162*dc4d9dc6SAnna Dabrowska     * 0000000000 65535 f
163*dc4d9dc6SAnna Dabrowska     * 0000000009 00000 n
164*dc4d9dc6SAnna Dabrowska     * 0000412075 00000 n
165*dc4d9dc6SAnna Dabrowska     * 0000412172 00000 n
166*dc4d9dc6SAnna Dabrowska     * 0000412359 00000 n
167*dc4d9dc6SAnna Dabrowska     * 0000412417 00000 n
168*dc4d9dc6SAnna Dabrowska     * 0000412468 00000 n
169*dc4d9dc6SAnna Dabrowska     * </code>
170*dc4d9dc6SAnna Dabrowska     *
171*dc4d9dc6SAnna Dabrowska     * It shall only be called on the first table.
172*dc4d9dc6SAnna Dabrowska     *
173*dc4d9dc6SAnna Dabrowska     * @return bool
174*dc4d9dc6SAnna Dabrowska     */
175*dc4d9dc6SAnna Dabrowska    public function fixFaultySubSectionShift()
176*dc4d9dc6SAnna Dabrowska    {
177*dc4d9dc6SAnna Dabrowska        $subSections = $this->getSubSections();
178*dc4d9dc6SAnna Dabrowska        if (\count($subSections) > 1) {
179*dc4d9dc6SAnna Dabrowska            return false;
180*dc4d9dc6SAnna Dabrowska        }
181*dc4d9dc6SAnna Dabrowska
182*dc4d9dc6SAnna Dabrowska        $subSection = \current($subSections);
183*dc4d9dc6SAnna Dabrowska        if ($subSection[0] != 1) {
184*dc4d9dc6SAnna Dabrowska            return false;
185*dc4d9dc6SAnna Dabrowska        }
186*dc4d9dc6SAnna Dabrowska
187*dc4d9dc6SAnna Dabrowska        if ($this->getOffsetFor(1) === false) {
188*dc4d9dc6SAnna Dabrowska            foreach ($subSections as $offset => list($startObject, $objectCount)) {
189*dc4d9dc6SAnna Dabrowska                $this->subSections[$offset] = [$startObject - 1, $objectCount];
190*dc4d9dc6SAnna Dabrowska            }
191*dc4d9dc6SAnna Dabrowska            return true;
192*dc4d9dc6SAnna Dabrowska        }
193*dc4d9dc6SAnna Dabrowska
194*dc4d9dc6SAnna Dabrowska        return false;
195*dc4d9dc6SAnna Dabrowska    }
196*dc4d9dc6SAnna Dabrowska}
197