1*dc4d9dc6SAnna Dabrowska<?php 2*dc4d9dc6SAnna Dabrowska/** 3*dc4d9dc6SAnna Dabrowska * This file is part of FPDI 4*dc4d9dc6SAnna Dabrowska * 5*dc4d9dc6SAnna Dabrowska * @package setasign\Fpdi 6*dc4d9dc6SAnna Dabrowska * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) 7*dc4d9dc6SAnna Dabrowska * @license http://opensource.org/licenses/mit-license The MIT License 8*dc4d9dc6SAnna Dabrowska */ 9*dc4d9dc6SAnna Dabrowska 10*dc4d9dc6SAnna Dabrowskanamespace setasign\Fpdi\PdfReader; 11*dc4d9dc6SAnna Dabrowska 12*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException; 13*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\PdfParser; 14*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\PdfParserException; 15*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfArray; 16*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfDictionary; 17*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfIndirectObject; 18*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfIndirectObjectReference; 19*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfNumeric; 20*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfType; 21*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\Type\PdfTypeException; 22*dc4d9dc6SAnna Dabrowska 23*dc4d9dc6SAnna Dabrowska/** 24*dc4d9dc6SAnna Dabrowska * A PDF reader class 25*dc4d9dc6SAnna Dabrowska * 26*dc4d9dc6SAnna Dabrowska * @package setasign\Fpdi\PdfReader 27*dc4d9dc6SAnna Dabrowska */ 28*dc4d9dc6SAnna Dabrowskaclass PdfReader 29*dc4d9dc6SAnna Dabrowska{ 30*dc4d9dc6SAnna Dabrowska /** 31*dc4d9dc6SAnna Dabrowska * @var PdfParser 32*dc4d9dc6SAnna Dabrowska */ 33*dc4d9dc6SAnna Dabrowska protected $parser; 34*dc4d9dc6SAnna Dabrowska 35*dc4d9dc6SAnna Dabrowska /** 36*dc4d9dc6SAnna Dabrowska * @var int 37*dc4d9dc6SAnna Dabrowska */ 38*dc4d9dc6SAnna Dabrowska protected $pageCount; 39*dc4d9dc6SAnna Dabrowska 40*dc4d9dc6SAnna Dabrowska /** 41*dc4d9dc6SAnna Dabrowska * Indirect objects of resolved pages. 42*dc4d9dc6SAnna Dabrowska * 43*dc4d9dc6SAnna Dabrowska * @var PdfIndirectObjectReference[]|PdfIndirectObject[] 44*dc4d9dc6SAnna Dabrowska */ 45*dc4d9dc6SAnna Dabrowska protected $pages = []; 46*dc4d9dc6SAnna Dabrowska 47*dc4d9dc6SAnna Dabrowska /** 48*dc4d9dc6SAnna Dabrowska * PdfReader constructor. 49*dc4d9dc6SAnna Dabrowska * 50*dc4d9dc6SAnna Dabrowska * @param PdfParser $parser 51*dc4d9dc6SAnna Dabrowska */ 52*dc4d9dc6SAnna Dabrowska public function __construct(PdfParser $parser) 53*dc4d9dc6SAnna Dabrowska { 54*dc4d9dc6SAnna Dabrowska $this->parser = $parser; 55*dc4d9dc6SAnna Dabrowska } 56*dc4d9dc6SAnna Dabrowska 57*dc4d9dc6SAnna Dabrowska /** 58*dc4d9dc6SAnna Dabrowska * PdfReader destructor. 59*dc4d9dc6SAnna Dabrowska */ 60*dc4d9dc6SAnna Dabrowska public function __destruct() 61*dc4d9dc6SAnna Dabrowska { 62*dc4d9dc6SAnna Dabrowska if ($this->parser !== null) { 63*dc4d9dc6SAnna Dabrowska $this->parser->cleanUp(); 64*dc4d9dc6SAnna Dabrowska } 65*dc4d9dc6SAnna Dabrowska } 66*dc4d9dc6SAnna Dabrowska 67*dc4d9dc6SAnna Dabrowska /** 68*dc4d9dc6SAnna Dabrowska * Get the pdf parser instance. 69*dc4d9dc6SAnna Dabrowska * 70*dc4d9dc6SAnna Dabrowska * @return PdfParser 71*dc4d9dc6SAnna Dabrowska */ 72*dc4d9dc6SAnna Dabrowska public function getParser() 73*dc4d9dc6SAnna Dabrowska { 74*dc4d9dc6SAnna Dabrowska return $this->parser; 75*dc4d9dc6SAnna Dabrowska } 76*dc4d9dc6SAnna Dabrowska 77*dc4d9dc6SAnna Dabrowska /** 78*dc4d9dc6SAnna Dabrowska * Get the PDF version. 79*dc4d9dc6SAnna Dabrowska * 80*dc4d9dc6SAnna Dabrowska * @return string 81*dc4d9dc6SAnna Dabrowska * @throws PdfParserException 82*dc4d9dc6SAnna Dabrowska */ 83*dc4d9dc6SAnna Dabrowska public function getPdfVersion() 84*dc4d9dc6SAnna Dabrowska { 85*dc4d9dc6SAnna Dabrowska return \implode('.', $this->parser->getPdfVersion()); 86*dc4d9dc6SAnna Dabrowska } 87*dc4d9dc6SAnna Dabrowska 88*dc4d9dc6SAnna Dabrowska /** 89*dc4d9dc6SAnna Dabrowska * Get the page count. 90*dc4d9dc6SAnna Dabrowska * 91*dc4d9dc6SAnna Dabrowska * @return int 92*dc4d9dc6SAnna Dabrowska * @throws PdfTypeException 93*dc4d9dc6SAnna Dabrowska * @throws CrossReferenceException 94*dc4d9dc6SAnna Dabrowska * @throws PdfParserException 95*dc4d9dc6SAnna Dabrowska */ 96*dc4d9dc6SAnna Dabrowska public function getPageCount() 97*dc4d9dc6SAnna Dabrowska { 98*dc4d9dc6SAnna Dabrowska if ($this->pageCount === null) { 99*dc4d9dc6SAnna Dabrowska $catalog = $this->parser->getCatalog(); 100*dc4d9dc6SAnna Dabrowska 101*dc4d9dc6SAnna Dabrowska $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser); 102*dc4d9dc6SAnna Dabrowska $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser); 103*dc4d9dc6SAnna Dabrowska 104*dc4d9dc6SAnna Dabrowska $this->pageCount = PdfNumeric::ensure($count)->value; 105*dc4d9dc6SAnna Dabrowska } 106*dc4d9dc6SAnna Dabrowska 107*dc4d9dc6SAnna Dabrowska return $this->pageCount; 108*dc4d9dc6SAnna Dabrowska } 109*dc4d9dc6SAnna Dabrowska 110*dc4d9dc6SAnna Dabrowska /** 111*dc4d9dc6SAnna Dabrowska * Get a page instance. 112*dc4d9dc6SAnna Dabrowska * 113*dc4d9dc6SAnna Dabrowska * @param int $pageNumber 114*dc4d9dc6SAnna Dabrowska * @return Page 115*dc4d9dc6SAnna Dabrowska * @throws PdfTypeException 116*dc4d9dc6SAnna Dabrowska * @throws CrossReferenceException 117*dc4d9dc6SAnna Dabrowska * @throws PdfParserException 118*dc4d9dc6SAnna Dabrowska * @throws \InvalidArgumentException 119*dc4d9dc6SAnna Dabrowska */ 120*dc4d9dc6SAnna Dabrowska public function getPage($pageNumber) 121*dc4d9dc6SAnna Dabrowska { 122*dc4d9dc6SAnna Dabrowska if (!\is_numeric($pageNumber)) { 123*dc4d9dc6SAnna Dabrowska throw new \InvalidArgumentException( 124*dc4d9dc6SAnna Dabrowska 'Page number needs to be a number.' 125*dc4d9dc6SAnna Dabrowska ); 126*dc4d9dc6SAnna Dabrowska } 127*dc4d9dc6SAnna Dabrowska 128*dc4d9dc6SAnna Dabrowska if ($pageNumber < 1 || $pageNumber > $this->getPageCount()) { 129*dc4d9dc6SAnna Dabrowska throw new \InvalidArgumentException( 130*dc4d9dc6SAnna Dabrowska \sprintf( 131*dc4d9dc6SAnna Dabrowska 'Page number "%s" out of available page range (1 - %s)', 132*dc4d9dc6SAnna Dabrowska $pageNumber, 133*dc4d9dc6SAnna Dabrowska $this->getPageCount() 134*dc4d9dc6SAnna Dabrowska ) 135*dc4d9dc6SAnna Dabrowska ); 136*dc4d9dc6SAnna Dabrowska } 137*dc4d9dc6SAnna Dabrowska 138*dc4d9dc6SAnna Dabrowska $this->readPages(); 139*dc4d9dc6SAnna Dabrowska 140*dc4d9dc6SAnna Dabrowska $page = $this->pages[$pageNumber - 1]; 141*dc4d9dc6SAnna Dabrowska 142*dc4d9dc6SAnna Dabrowska if ($page instanceof PdfIndirectObjectReference) { 143*dc4d9dc6SAnna Dabrowska $readPages = function ($kids) use (&$readPages) { 144*dc4d9dc6SAnna Dabrowska $kids = PdfArray::ensure($kids); 145*dc4d9dc6SAnna Dabrowska 146*dc4d9dc6SAnna Dabrowska /** @noinspection LoopWhichDoesNotLoopInspection */ 147*dc4d9dc6SAnna Dabrowska foreach ($kids->value as $reference) { 148*dc4d9dc6SAnna Dabrowska $reference = PdfIndirectObjectReference::ensure($reference); 149*dc4d9dc6SAnna Dabrowska $object = $this->parser->getIndirectObject($reference->value); 150*dc4d9dc6SAnna Dabrowska $type = PdfDictionary::get($object->value, 'Type'); 151*dc4d9dc6SAnna Dabrowska 152*dc4d9dc6SAnna Dabrowska if ($type->value === 'Pages') { 153*dc4d9dc6SAnna Dabrowska return $readPages(PdfDictionary::get($object->value, 'Kids')); 154*dc4d9dc6SAnna Dabrowska } 155*dc4d9dc6SAnna Dabrowska 156*dc4d9dc6SAnna Dabrowska return $object; 157*dc4d9dc6SAnna Dabrowska } 158*dc4d9dc6SAnna Dabrowska 159*dc4d9dc6SAnna Dabrowska throw new PdfReaderException( 160*dc4d9dc6SAnna Dabrowska 'Kids array cannot be empty.', 161*dc4d9dc6SAnna Dabrowska PdfReaderException::KIDS_EMPTY 162*dc4d9dc6SAnna Dabrowska ); 163*dc4d9dc6SAnna Dabrowska }; 164*dc4d9dc6SAnna Dabrowska 165*dc4d9dc6SAnna Dabrowska $page = $this->parser->getIndirectObject($page->value); 166*dc4d9dc6SAnna Dabrowska $dict = PdfType::resolve($page, $this->parser); 167*dc4d9dc6SAnna Dabrowska $type = PdfDictionary::get($dict, 'Type'); 168*dc4d9dc6SAnna Dabrowska 169*dc4d9dc6SAnna Dabrowska if ($type->value === 'Pages') { 170*dc4d9dc6SAnna Dabrowska $kids = PdfType::resolve(PdfDictionary::get($dict, 'Kids'), $this->parser); 171*dc4d9dc6SAnna Dabrowska try { 172*dc4d9dc6SAnna Dabrowska $page = $this->pages[$pageNumber - 1] = $readPages($kids); 173*dc4d9dc6SAnna Dabrowska } catch (PdfReaderException $e) { 174*dc4d9dc6SAnna Dabrowska if ($e->getCode() !== PdfReaderException::KIDS_EMPTY) { 175*dc4d9dc6SAnna Dabrowska throw $e; 176*dc4d9dc6SAnna Dabrowska } 177*dc4d9dc6SAnna Dabrowska 178*dc4d9dc6SAnna Dabrowska // let's reset the pages array and read all page objects 179*dc4d9dc6SAnna Dabrowska $this->pages = []; 180*dc4d9dc6SAnna Dabrowska $this->readPages(true); 181*dc4d9dc6SAnna Dabrowska $page = $this->pages[$pageNumber - 1]; 182*dc4d9dc6SAnna Dabrowska } 183*dc4d9dc6SAnna Dabrowska } else { 184*dc4d9dc6SAnna Dabrowska $this->pages[$pageNumber - 1] = $page; 185*dc4d9dc6SAnna Dabrowska } 186*dc4d9dc6SAnna Dabrowska } 187*dc4d9dc6SAnna Dabrowska 188*dc4d9dc6SAnna Dabrowska return new Page($page, $this->parser); 189*dc4d9dc6SAnna Dabrowska } 190*dc4d9dc6SAnna Dabrowska 191*dc4d9dc6SAnna Dabrowska /** 192*dc4d9dc6SAnna Dabrowska * Walk the page tree and resolve all indirect objects of all pages. 193*dc4d9dc6SAnna Dabrowska * 194*dc4d9dc6SAnna Dabrowska * @param bool $readAll 195*dc4d9dc6SAnna Dabrowska * @throws CrossReferenceException 196*dc4d9dc6SAnna Dabrowska * @throws PdfParserException 197*dc4d9dc6SAnna Dabrowska * @throws PdfTypeException 198*dc4d9dc6SAnna Dabrowska */ 199*dc4d9dc6SAnna Dabrowska protected function readPages($readAll = false) 200*dc4d9dc6SAnna Dabrowska { 201*dc4d9dc6SAnna Dabrowska if (\count($this->pages) > 0) { 202*dc4d9dc6SAnna Dabrowska return; 203*dc4d9dc6SAnna Dabrowska } 204*dc4d9dc6SAnna Dabrowska 205*dc4d9dc6SAnna Dabrowska $readPages = function ($kids, $count) use (&$readPages, $readAll) { 206*dc4d9dc6SAnna Dabrowska $kids = PdfArray::ensure($kids); 207*dc4d9dc6SAnna Dabrowska $isLeaf = ($count->value === \count($kids->value)); 208*dc4d9dc6SAnna Dabrowska 209*dc4d9dc6SAnna Dabrowska foreach ($kids->value as $reference) { 210*dc4d9dc6SAnna Dabrowska $reference = PdfIndirectObjectReference::ensure($reference); 211*dc4d9dc6SAnna Dabrowska 212*dc4d9dc6SAnna Dabrowska if (!$readAll && $isLeaf) { 213*dc4d9dc6SAnna Dabrowska $this->pages[] = $reference; 214*dc4d9dc6SAnna Dabrowska continue; 215*dc4d9dc6SAnna Dabrowska } 216*dc4d9dc6SAnna Dabrowska 217*dc4d9dc6SAnna Dabrowska $object = $this->parser->getIndirectObject($reference->value); 218*dc4d9dc6SAnna Dabrowska $type = PdfDictionary::get($object->value, 'Type'); 219*dc4d9dc6SAnna Dabrowska 220*dc4d9dc6SAnna Dabrowska if ($type->value === 'Pages') { 221*dc4d9dc6SAnna Dabrowska $readPages(PdfDictionary::get($object->value, 'Kids'), PdfDictionary::get($object->value, 'Count')); 222*dc4d9dc6SAnna Dabrowska } else { 223*dc4d9dc6SAnna Dabrowska $this->pages[] = $object; 224*dc4d9dc6SAnna Dabrowska } 225*dc4d9dc6SAnna Dabrowska } 226*dc4d9dc6SAnna Dabrowska }; 227*dc4d9dc6SAnna Dabrowska 228*dc4d9dc6SAnna Dabrowska $catalog = $this->parser->getCatalog(); 229*dc4d9dc6SAnna Dabrowska $pages = PdfType::resolve(PdfDictionary::get($catalog, 'Pages'), $this->parser); 230*dc4d9dc6SAnna Dabrowska $count = PdfType::resolve(PdfDictionary::get($pages, 'Count'), $this->parser); 231*dc4d9dc6SAnna Dabrowska $kids = PdfType::resolve(PdfDictionary::get($pages, 'Kids'), $this->parser); 232*dc4d9dc6SAnna Dabrowska $readPages($kids, $count); 233*dc4d9dc6SAnna Dabrowska } 234*dc4d9dc6SAnna Dabrowska} 235