1<?php 2/** 3 * This file is part of FPDI 4 * 5 * @package setasign\Fpdi 6 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) 7 * @license http://opensource.org/licenses/mit-license The MIT License 8 */ 9 10namespace setasign\Fpdi\PdfReader; 11 12use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException; 13use setasign\Fpdi\PdfParser\PdfParser; 14use setasign\Fpdi\PdfParser\PdfParserException; 15use setasign\Fpdi\PdfParser\Type\PdfArray; 16use setasign\Fpdi\PdfParser\Type\PdfDictionary; 17use setasign\Fpdi\PdfParser\Type\PdfIndirectObject; 18use setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference; 19use setasign\Fpdi\PdfParser\Type\PdfNumeric; 20use setasign\Fpdi\PdfParser\Type\PdfType; 21use setasign\Fpdi\PdfParser\Type\PdfTypeException; 22 23/** 24 * A PDF reader class 25 * 26 * @package setasign\Fpdi\PdfReader 27 */ 28class PdfReader 29{ 30 /** 31 * @var PdfParser 32 */ 33 protected $parser; 34 35 /** 36 * @var int 37 */ 38 protected $pageCount; 39 40 /** 41 * Indirect objects of resolved pages. 42 * 43 * @var PdfIndirectObjectReference[]|PdfIndirectObject[] 44 */ 45 protected $pages = []; 46 47 /** 48 * PdfReader constructor. 49 * 50 * @param PdfParser $parser 51 */ 52 public function __construct(PdfParser $parser) 53 { 54 $this->parser = $parser; 55 } 56 57 /** 58 * PdfReader destructor. 59 */ 60 public function __destruct() 61 { 62 if ($this->parser !== null) { 63 $this->parser->cleanUp(); 64 } 65 } 66 67 /** 68 * Get the pdf parser instance. 69 * 70 * @return PdfParser 71 */ 72 public function getParser() 73 { 74 return $this->parser; 75 } 76 77 /** 78 * Get the PDF version. 79 * 80 * @return string 81 * @throws PdfParserException 82 */ 83 public function getPdfVersion() 84 { 85 return \implode('.', $this->parser->getPdfVersion()); 86 } 87 88 /** 89 * Get the page count. 90 * 91 * @return int 92 * @throws PdfTypeException 93 * @throws CrossReferenceException 94 * @throws PdfParserException 95 */ 96 public function getPageCount() 97 { 98 if ($this->pageCount === null) { 99 $catalog = $this->parser->getCatalog(); 100 101 $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser); 102 $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser); 103 104 $this->pageCount = PdfNumeric::ensure($count)->value; 105 } 106 107 return $this->pageCount; 108 } 109 110 /** 111 * Get a page instance. 112 * 113 * @param int $pageNumber 114 * @return Page 115 * @throws PdfTypeException 116 * @throws CrossReferenceException 117 * @throws PdfParserException 118 * @throws \InvalidArgumentException 119 */ 120 public function getPage($pageNumber) 121 { 122 if (!\is_numeric($pageNumber)) { 123 throw new \InvalidArgumentException( 124 'Page number needs to be a number.' 125 ); 126 } 127 128 if ($pageNumber < 1 || $pageNumber > $this->getPageCount()) { 129 throw new \InvalidArgumentException( 130 \sprintf( 131 'Page number "%s" out of available page range (1 - %s)', 132 $pageNumber, 133 $this->getPageCount() 134 ) 135 ); 136 } 137 138 $this->readPages(); 139 140 $page = $this->pages[$pageNumber - 1]; 141 142 if ($page instanceof PdfIndirectObjectReference) { 143 $readPages = function ($kids) use (&$readPages) { 144 $kids = PdfArray::ensure($kids); 145 146 /** @noinspection LoopWhichDoesNotLoopInspection */ 147 foreach ($kids->value as $reference) { 148 $reference = PdfIndirectObjectReference::ensure($reference); 149 $object = $this->parser->getIndirectObject($reference->value); 150 $type = PdfDictionary::get($object->value, 'Type'); 151 152 if ($type->value === 'Pages') { 153 return $readPages(PdfDictionary::get($object->value, 'Kids')); 154 } 155 156 return $object; 157 } 158 159 throw new PdfReaderException( 160 'Kids array cannot be empty.', 161 PdfReaderException::KIDS_EMPTY 162 ); 163 }; 164 165 $page = $this->parser->getIndirectObject($page->value); 166 $dict = PdfType::resolve($page, $this->parser); 167 $type = PdfDictionary::get($dict, 'Type'); 168 169 if ($type->value === 'Pages') { 170 $kids = PdfType::resolve(PdfDictionary::get($dict, 'Kids'), $this->parser); 171 try { 172 $page = $this->pages[$pageNumber - 1] = $readPages($kids); 173 } catch (PdfReaderException $e) { 174 if ($e->getCode() !== PdfReaderException::KIDS_EMPTY) { 175 throw $e; 176 } 177 178 // let's reset the pages array and read all page objects 179 $this->pages = []; 180 $this->readPages(true); 181 $page = $this->pages[$pageNumber - 1]; 182 } 183 } else { 184 $this->pages[$pageNumber - 1] = $page; 185 } 186 } 187 188 return new Page($page, $this->parser); 189 } 190 191 /** 192 * Walk the page tree and resolve all indirect objects of all pages. 193 * 194 * @param bool $readAll 195 * @throws CrossReferenceException 196 * @throws PdfParserException 197 * @throws PdfTypeException 198 */ 199 protected function readPages($readAll = false) 200 { 201 if (\count($this->pages) > 0) { 202 return; 203 } 204 205 $readPages = function ($kids, $count) use (&$readPages, $readAll) { 206 $kids = PdfArray::ensure($kids); 207 $isLeaf = ($count->value === \count($kids->value)); 208 209 foreach ($kids->value as $reference) { 210 $reference = PdfIndirectObjectReference::ensure($reference); 211 212 if (!$readAll && $isLeaf) { 213 $this->pages[] = $reference; 214 continue; 215 } 216 217 $object = $this->parser->getIndirectObject($reference->value); 218 $type = PdfDictionary::get($object->value, 'Type'); 219 220 if ($type->value === 'Pages') { 221 $readPages(PdfDictionary::get($object->value, 'Kids'), PdfDictionary::get($object->value, 'Count')); 222 } else { 223 $this->pages[] = $object; 224 } 225 } 226 }; 227 228 $catalog = $this->parser->getCatalog(); 229 $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser); 230 $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser); 231 $kids = PdfType::resolve(PdfDictionary::get($pages, 'Kids'), $this->parser); 232 $readPages($kids, $count); 233 } 234} 235