xref: /plugin/dw2pdf/vendor/setasign/fpdi/src/PdfReader/PdfReader.php (revision dc4d9dc689082c963d5c1d9ee679553326788c6e)
1*dc4d9dc6SAnna Dabrowska<?php
2*dc4d9dc6SAnna Dabrowska/**
3*dc4d9dc6SAnna Dabrowska * This file is part of FPDI
4*dc4d9dc6SAnna Dabrowska *
5*dc4d9dc6SAnna Dabrowska * @package   setasign\Fpdi
6*dc4d9dc6SAnna Dabrowska * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
7*dc4d9dc6SAnna Dabrowska * @license   http://opensource.org/licenses/mit-license The MIT License
8*dc4d9dc6SAnna Dabrowska */
9*dc4d9dc6SAnna Dabrowska
10*dc4d9dc6SAnna Dabrowskanamespace setasign\Fpdi\PdfReader;
11*dc4d9dc6SAnna Dabrowska
12*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
13*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\PdfParser;
14*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\PdfParserException;
15*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfArray;
16*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfDictionary;
17*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
18*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference;
19*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfNumeric;
20*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfType;
21*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfTypeException;
22*dc4d9dc6SAnna Dabrowska
23*dc4d9dc6SAnna Dabrowska/**
24*dc4d9dc6SAnna Dabrowska * A PDF reader class
25*dc4d9dc6SAnna Dabrowska *
26*dc4d9dc6SAnna Dabrowska * @package setasign\Fpdi\PdfReader
27*dc4d9dc6SAnna Dabrowska */
28*dc4d9dc6SAnna Dabrowskaclass PdfReader
29*dc4d9dc6SAnna Dabrowska{
30*dc4d9dc6SAnna Dabrowska    /**
31*dc4d9dc6SAnna Dabrowska     * @var PdfParser
32*dc4d9dc6SAnna Dabrowska     */
33*dc4d9dc6SAnna Dabrowska    protected $parser;
34*dc4d9dc6SAnna Dabrowska
35*dc4d9dc6SAnna Dabrowska    /**
36*dc4d9dc6SAnna Dabrowska     * @var int
37*dc4d9dc6SAnna Dabrowska     */
38*dc4d9dc6SAnna Dabrowska    protected $pageCount;
39*dc4d9dc6SAnna Dabrowska
40*dc4d9dc6SAnna Dabrowska    /**
41*dc4d9dc6SAnna Dabrowska     * Indirect objects of resolved pages.
42*dc4d9dc6SAnna Dabrowska     *
43*dc4d9dc6SAnna Dabrowska     * @var PdfIndirectObjectReference[]|PdfIndirectObject[]
44*dc4d9dc6SAnna Dabrowska     */
45*dc4d9dc6SAnna Dabrowska    protected $pages = [];
46*dc4d9dc6SAnna Dabrowska
47*dc4d9dc6SAnna Dabrowska    /**
48*dc4d9dc6SAnna Dabrowska     * PdfReader constructor.
49*dc4d9dc6SAnna Dabrowska     *
50*dc4d9dc6SAnna Dabrowska     * @param PdfParser $parser
51*dc4d9dc6SAnna Dabrowska     */
52*dc4d9dc6SAnna Dabrowska    public function __construct(PdfParser $parser)
53*dc4d9dc6SAnna Dabrowska    {
54*dc4d9dc6SAnna Dabrowska        $this->parser = $parser;
55*dc4d9dc6SAnna Dabrowska    }
56*dc4d9dc6SAnna Dabrowska
57*dc4d9dc6SAnna Dabrowska    /**
58*dc4d9dc6SAnna Dabrowska     * PdfReader destructor.
59*dc4d9dc6SAnna Dabrowska     */
60*dc4d9dc6SAnna Dabrowska    public function __destruct()
61*dc4d9dc6SAnna Dabrowska    {
62*dc4d9dc6SAnna Dabrowska        if ($this->parser !== null) {
63*dc4d9dc6SAnna Dabrowska            $this->parser->cleanUp();
64*dc4d9dc6SAnna Dabrowska        }
65*dc4d9dc6SAnna Dabrowska    }
66*dc4d9dc6SAnna Dabrowska
67*dc4d9dc6SAnna Dabrowska    /**
68*dc4d9dc6SAnna Dabrowska     * Get the pdf parser instance.
69*dc4d9dc6SAnna Dabrowska     *
70*dc4d9dc6SAnna Dabrowska     * @return PdfParser
71*dc4d9dc6SAnna Dabrowska     */
72*dc4d9dc6SAnna Dabrowska    public function getParser()
73*dc4d9dc6SAnna Dabrowska    {
74*dc4d9dc6SAnna Dabrowska        return $this->parser;
75*dc4d9dc6SAnna Dabrowska    }
76*dc4d9dc6SAnna Dabrowska
77*dc4d9dc6SAnna Dabrowska    /**
78*dc4d9dc6SAnna Dabrowska     * Get the PDF version.
79*dc4d9dc6SAnna Dabrowska     *
80*dc4d9dc6SAnna Dabrowska     * @return string
81*dc4d9dc6SAnna Dabrowska     * @throws PdfParserException
82*dc4d9dc6SAnna Dabrowska     */
83*dc4d9dc6SAnna Dabrowska    public function getPdfVersion()
84*dc4d9dc6SAnna Dabrowska    {
85*dc4d9dc6SAnna Dabrowska        return \implode('.', $this->parser->getPdfVersion());
86*dc4d9dc6SAnna Dabrowska    }
87*dc4d9dc6SAnna Dabrowska
88*dc4d9dc6SAnna Dabrowska    /**
89*dc4d9dc6SAnna Dabrowska     * Get the page count.
90*dc4d9dc6SAnna Dabrowska     *
91*dc4d9dc6SAnna Dabrowska     * @return int
92*dc4d9dc6SAnna Dabrowska     * @throws PdfTypeException
93*dc4d9dc6SAnna Dabrowska     * @throws CrossReferenceException
94*dc4d9dc6SAnna Dabrowska     * @throws PdfParserException
95*dc4d9dc6SAnna Dabrowska     */
96*dc4d9dc6SAnna Dabrowska    public function getPageCount()
97*dc4d9dc6SAnna Dabrowska    {
98*dc4d9dc6SAnna Dabrowska        if ($this->pageCount === null) {
99*dc4d9dc6SAnna Dabrowska            $catalog = $this->parser->getCatalog();
100*dc4d9dc6SAnna Dabrowska
101*dc4d9dc6SAnna Dabrowska            $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser);
102*dc4d9dc6SAnna Dabrowska            $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser);
103*dc4d9dc6SAnna Dabrowska
104*dc4d9dc6SAnna Dabrowska            $this->pageCount = PdfNumeric::ensure($count)->value;
105*dc4d9dc6SAnna Dabrowska        }
106*dc4d9dc6SAnna Dabrowska
107*dc4d9dc6SAnna Dabrowska        return $this->pageCount;
108*dc4d9dc6SAnna Dabrowska    }
109*dc4d9dc6SAnna Dabrowska
110*dc4d9dc6SAnna Dabrowska    /**
111*dc4d9dc6SAnna Dabrowska     * Get a page instance.
112*dc4d9dc6SAnna Dabrowska     *
113*dc4d9dc6SAnna Dabrowska     * @param int $pageNumber
114*dc4d9dc6SAnna Dabrowska     * @return Page
115*dc4d9dc6SAnna Dabrowska     * @throws PdfTypeException
116*dc4d9dc6SAnna Dabrowska     * @throws CrossReferenceException
117*dc4d9dc6SAnna Dabrowska     * @throws PdfParserException
118*dc4d9dc6SAnna Dabrowska     * @throws \InvalidArgumentException
119*dc4d9dc6SAnna Dabrowska     */
120*dc4d9dc6SAnna Dabrowska    public function getPage($pageNumber)
121*dc4d9dc6SAnna Dabrowska    {
122*dc4d9dc6SAnna Dabrowska        if (!\is_numeric($pageNumber)) {
123*dc4d9dc6SAnna Dabrowska            throw new \InvalidArgumentException(
124*dc4d9dc6SAnna Dabrowska                'Page number needs to be a number.'
125*dc4d9dc6SAnna Dabrowska            );
126*dc4d9dc6SAnna Dabrowska        }
127*dc4d9dc6SAnna Dabrowska
128*dc4d9dc6SAnna Dabrowska        if ($pageNumber < 1 || $pageNumber > $this->getPageCount()) {
129*dc4d9dc6SAnna Dabrowska            throw new \InvalidArgumentException(
130*dc4d9dc6SAnna Dabrowska                \sprintf(
131*dc4d9dc6SAnna Dabrowska                    'Page number "%s" out of available page range (1 - %s)',
132*dc4d9dc6SAnna Dabrowska                    $pageNumber,
133*dc4d9dc6SAnna Dabrowska                    $this->getPageCount()
134*dc4d9dc6SAnna Dabrowska                )
135*dc4d9dc6SAnna Dabrowska            );
136*dc4d9dc6SAnna Dabrowska        }
137*dc4d9dc6SAnna Dabrowska
138*dc4d9dc6SAnna Dabrowska        $this->readPages();
139*dc4d9dc6SAnna Dabrowska
140*dc4d9dc6SAnna Dabrowska        $page = $this->pages[$pageNumber - 1];
141*dc4d9dc6SAnna Dabrowska
142*dc4d9dc6SAnna Dabrowska        if ($page instanceof PdfIndirectObjectReference) {
143*dc4d9dc6SAnna Dabrowska            $readPages = function ($kids) use (&$readPages) {
144*dc4d9dc6SAnna Dabrowska                $kids = PdfArray::ensure($kids);
145*dc4d9dc6SAnna Dabrowska
146*dc4d9dc6SAnna Dabrowska                /** @noinspection LoopWhichDoesNotLoopInspection */
147*dc4d9dc6SAnna Dabrowska                foreach ($kids->value as $reference) {
148*dc4d9dc6SAnna Dabrowska                    $reference = PdfIndirectObjectReference::ensure($reference);
149*dc4d9dc6SAnna Dabrowska                    $object = $this->parser->getIndirectObject($reference->value);
150*dc4d9dc6SAnna Dabrowska                    $type = PdfDictionary::get($object->value, 'Type');
151*dc4d9dc6SAnna Dabrowska
152*dc4d9dc6SAnna Dabrowska                    if ($type->value === 'Pages') {
153*dc4d9dc6SAnna Dabrowska                        return $readPages(PdfDictionary::get($object->value, 'Kids'));
154*dc4d9dc6SAnna Dabrowska                    }
155*dc4d9dc6SAnna Dabrowska
156*dc4d9dc6SAnna Dabrowska                    return $object;
157*dc4d9dc6SAnna Dabrowska                }
158*dc4d9dc6SAnna Dabrowska
159*dc4d9dc6SAnna Dabrowska                throw new PdfReaderException(
160*dc4d9dc6SAnna Dabrowska                    'Kids array cannot be empty.',
161*dc4d9dc6SAnna Dabrowska                    PdfReaderException::KIDS_EMPTY
162*dc4d9dc6SAnna Dabrowska                );
163*dc4d9dc6SAnna Dabrowska            };
164*dc4d9dc6SAnna Dabrowska
165*dc4d9dc6SAnna Dabrowska            $page = $this->parser->getIndirectObject($page->value);
166*dc4d9dc6SAnna Dabrowska            $dict = PdfType::resolve($page, $this->parser);
167*dc4d9dc6SAnna Dabrowska            $type = PdfDictionary::get($dict, 'Type');
168*dc4d9dc6SAnna Dabrowska
169*dc4d9dc6SAnna Dabrowska            if ($type->value === 'Pages') {
170*dc4d9dc6SAnna Dabrowska                $kids = PdfType::resolve(PdfDictionary::get($dict, 'Kids'), $this->parser);
171*dc4d9dc6SAnna Dabrowska                try {
172*dc4d9dc6SAnna Dabrowska                    $page = $this->pages[$pageNumber - 1] = $readPages($kids);
173*dc4d9dc6SAnna Dabrowska                } catch (PdfReaderException $e) {
174*dc4d9dc6SAnna Dabrowska                    if ($e->getCode() !== PdfReaderException::KIDS_EMPTY) {
175*dc4d9dc6SAnna Dabrowska                        throw $e;
176*dc4d9dc6SAnna Dabrowska                    }
177*dc4d9dc6SAnna Dabrowska
178*dc4d9dc6SAnna Dabrowska                    // let's reset the pages array and read all page objects
179*dc4d9dc6SAnna Dabrowska                    $this->pages = [];
180*dc4d9dc6SAnna Dabrowska                    $this->readPages(true);
181*dc4d9dc6SAnna Dabrowska                    $page = $this->pages[$pageNumber - 1];
182*dc4d9dc6SAnna Dabrowska                }
183*dc4d9dc6SAnna Dabrowska            } else {
184*dc4d9dc6SAnna Dabrowska                $this->pages[$pageNumber - 1] = $page;
185*dc4d9dc6SAnna Dabrowska            }
186*dc4d9dc6SAnna Dabrowska        }
187*dc4d9dc6SAnna Dabrowska
188*dc4d9dc6SAnna Dabrowska        return new Page($page, $this->parser);
189*dc4d9dc6SAnna Dabrowska    }
190*dc4d9dc6SAnna Dabrowska
191*dc4d9dc6SAnna Dabrowska    /**
192*dc4d9dc6SAnna Dabrowska     * Walk the page tree and resolve all indirect objects of all pages.
193*dc4d9dc6SAnna Dabrowska     *
194*dc4d9dc6SAnna Dabrowska     * @param bool $readAll
195*dc4d9dc6SAnna Dabrowska     * @throws CrossReferenceException
196*dc4d9dc6SAnna Dabrowska     * @throws PdfParserException
197*dc4d9dc6SAnna Dabrowska     * @throws PdfTypeException
198*dc4d9dc6SAnna Dabrowska     */
199*dc4d9dc6SAnna Dabrowska    protected function readPages($readAll = false)
200*dc4d9dc6SAnna Dabrowska    {
201*dc4d9dc6SAnna Dabrowska        if (\count($this->pages) > 0) {
202*dc4d9dc6SAnna Dabrowska            return;
203*dc4d9dc6SAnna Dabrowska        }
204*dc4d9dc6SAnna Dabrowska
205*dc4d9dc6SAnna Dabrowska        $readPages = function ($kids, $count) use (&$readPages, $readAll) {
206*dc4d9dc6SAnna Dabrowska            $kids = PdfArray::ensure($kids);
207*dc4d9dc6SAnna Dabrowska            $isLeaf = ($count->value === \count($kids->value));
208*dc4d9dc6SAnna Dabrowska
209*dc4d9dc6SAnna Dabrowska            foreach ($kids->value as $reference) {
210*dc4d9dc6SAnna Dabrowska                $reference = PdfIndirectObjectReference::ensure($reference);
211*dc4d9dc6SAnna Dabrowska
212*dc4d9dc6SAnna Dabrowska                if (!$readAll && $isLeaf) {
213*dc4d9dc6SAnna Dabrowska                    $this->pages[] = $reference;
214*dc4d9dc6SAnna Dabrowska                    continue;
215*dc4d9dc6SAnna Dabrowska                }
216*dc4d9dc6SAnna Dabrowska
217*dc4d9dc6SAnna Dabrowska                $object = $this->parser->getIndirectObject($reference->value);
218*dc4d9dc6SAnna Dabrowska                $type = PdfDictionary::get($object->value, 'Type');
219*dc4d9dc6SAnna Dabrowska
220*dc4d9dc6SAnna Dabrowska                if ($type->value === 'Pages') {
221*dc4d9dc6SAnna Dabrowska                    $readPages(PdfDictionary::get($object->value, 'Kids'), PdfDictionary::get($object->value, 'Count'));
222*dc4d9dc6SAnna Dabrowska                } else {
223*dc4d9dc6SAnna Dabrowska                    $this->pages[] = $object;
224*dc4d9dc6SAnna Dabrowska                }
225*dc4d9dc6SAnna Dabrowska            }
226*dc4d9dc6SAnna Dabrowska        };
227*dc4d9dc6SAnna Dabrowska
228*dc4d9dc6SAnna Dabrowska        $catalog = $this->parser->getCatalog();
229*dc4d9dc6SAnna Dabrowska        $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser);
230*dc4d9dc6SAnna Dabrowska        $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser);
231*dc4d9dc6SAnna Dabrowska        $kids = PdfType::resolve(PdfDictionary::get($pages, 'Kids'), $this->parser);
232*dc4d9dc6SAnna Dabrowska        $readPages($kids, $count);
233*dc4d9dc6SAnna Dabrowska    }
234*dc4d9dc6SAnna Dabrowska}
235