1<?php
2/**
3 * This file is part of FPDI
4 *
5 * @package   setasign\Fpdi
6 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
7 * @license   http://opensource.org/licenses/mit-license The MIT License
8 */
9
10namespace setasign\Fpdi\PdfReader;
11
12use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
13use setasign\Fpdi\PdfParser\PdfParser;
14use setasign\Fpdi\PdfParser\PdfParserException;
15use setasign\Fpdi\PdfParser\Type\PdfArray;
16use setasign\Fpdi\PdfParser\Type\PdfDictionary;
17use setasign\Fpdi\PdfParser\Type\PdfIndirectObject;
18use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference;
19use setasign\Fpdi\PdfParser\Type\PdfNumeric;
20use setasign\Fpdi\PdfParser\Type\PdfType;
21use setasign\Fpdi\PdfParser\Type\PdfTypeException;
22
23/**
24 * A PDF reader class
25 *
26 * @package setasign\Fpdi\PdfReader
27 */
28class PdfReader
29{
30    /**
31     * @var PdfParser
32     */
33    protected $parser;
34
35    /**
36     * @var int
37     */
38    protected $pageCount;
39
40    /**
41     * Indirect objects of resolved pages.
42     *
43     * @var PdfIndirectObjectReference[]|PdfIndirectObject[]
44     */
45    protected $pages = [];
46
47    /**
48     * PdfReader constructor.
49     *
50     * @param PdfParser $parser
51     */
52    public function __construct(PdfParser $parser)
53    {
54        $this->parser = $parser;
55    }
56
57    /**
58     * PdfReader destructor.
59     */
60    public function __destruct()
61    {
62        if ($this->parser !== null) {
63            $this->parser->cleanUp();
64        }
65    }
66
67    /**
68     * Get the pdf parser instance.
69     *
70     * @return PdfParser
71     */
72    public function getParser()
73    {
74        return $this->parser;
75    }
76
77    /**
78     * Get the PDF version.
79     *
80     * @return string
81     * @throws PdfParserException
82     */
83    public function getPdfVersion()
84    {
85        return \implode('.', $this->parser->getPdfVersion());
86    }
87
88    /**
89     * Get the page count.
90     *
91     * @return int
92     * @throws PdfTypeException
93     * @throws CrossReferenceException
94     * @throws PdfParserException
95     */
96    public function getPageCount()
97    {
98        if ($this->pageCount === null) {
99            $catalog = $this->parser->getCatalog();
100
101            $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser);
102            $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser);
103
104            $this->pageCount = PdfNumeric::ensure($count)->value;
105        }
106
107        return $this->pageCount;
108    }
109
110    /**
111     * Get a page instance.
112     *
113     * @param int $pageNumber
114     * @return Page
115     * @throws PdfTypeException
116     * @throws CrossReferenceException
117     * @throws PdfParserException
118     * @throws \InvalidArgumentException
119     */
120    public function getPage($pageNumber)
121    {
122        if (!\is_numeric($pageNumber)) {
123            throw new \InvalidArgumentException(
124                'Page number needs to be a number.'
125            );
126        }
127
128        if ($pageNumber < 1 || $pageNumber > $this->getPageCount()) {
129            throw new \InvalidArgumentException(
130                \sprintf(
131                    'Page number "%s" out of available page range (1 - %s)',
132                    $pageNumber,
133                    $this->getPageCount()
134                )
135            );
136        }
137
138        $this->readPages();
139
140        $page = $this->pages[$pageNumber - 1];
141
142        if ($page instanceof PdfIndirectObjectReference) {
143            $readPages = function ($kids) use (&$readPages) {
144                $kids = PdfArray::ensure($kids);
145
146                /** @noinspection LoopWhichDoesNotLoopInspection */
147                foreach ($kids->value as $reference) {
148                    $reference = PdfIndirectObjectReference::ensure($reference);
149                    $object = $this->parser->getIndirectObject($reference->value);
150                    $type = PdfDictionary::get($object->value, 'Type');
151
152                    if ($type->value === 'Pages') {
153                        return $readPages(PdfDictionary::get($object->value, 'Kids'));
154                    }
155
156                    return $object;
157                }
158
159                throw new PdfReaderException(
160                    'Kids array cannot be empty.',
161                    PdfReaderException::KIDS_EMPTY
162                );
163            };
164
165            $page = $this->parser->getIndirectObject($page->value);
166            $dict = PdfType::resolve($page, $this->parser);
167            $type = PdfDictionary::get($dict, 'Type');
168
169            if ($type->value === 'Pages') {
170                $kids = PdfType::resolve(PdfDictionary::get($dict, 'Kids'), $this->parser);
171                try {
172                    $page = $this->pages[$pageNumber - 1] = $readPages($kids);
173                } catch (PdfReaderException $e) {
174                    if ($e->getCode() !== PdfReaderException::KIDS_EMPTY) {
175                        throw $e;
176                    }
177
178                    // let's reset the pages array and read all page objects
179                    $this->pages = [];
180                    $this->readPages(true);
181                    $page = $this->pages[$pageNumber - 1];
182                }
183            } else {
184                $this->pages[$pageNumber - 1] = $page;
185            }
186        }
187
188        return new Page($page, $this->parser);
189    }
190
191    /**
192     * Walk the page tree and resolve all indirect objects of all pages.
193     *
194     * @param bool $readAll
195     * @throws CrossReferenceException
196     * @throws PdfParserException
197     * @throws PdfTypeException
198     */
199    protected function readPages($readAll = false)
200    {
201        if (\count($this->pages) > 0) {
202            return;
203        }
204
205        $readPages = function ($kids, $count) use (&$readPages, $readAll) {
206            $kids = PdfArray::ensure($kids);
207            $isLeaf = ($count->value === \count($kids->value));
208
209            foreach ($kids->value as $reference) {
210                $reference = PdfIndirectObjectReference::ensure($reference);
211
212                if (!$readAll && $isLeaf) {
213                    $this->pages[] = $reference;
214                    continue;
215                }
216
217                $object = $this->parser->getIndirectObject($reference->value);
218                $type = PdfDictionary::get($object->value, 'Type');
219
220                if ($type->value === 'Pages') {
221                    $readPages(PdfDictionary::get($object->value, 'Kids'), PdfDictionary::get($object->value, 'Count'));
222                } else {
223                    $this->pages[] = $object;
224                }
225            }
226        };
227
228        $catalog = $this->parser->getCatalog();
229        $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser);
230        $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser);
231        $kids = PdfType::resolve(PdfDictionary::get($pages, 'Kids'), $this->parser);
232        $readPages($kids, $count);
233    }
234}
235