1*dc4d9dc6SAnna Dabrowska<?php 2*dc4d9dc6SAnna Dabrowska/** 3*dc4d9dc6SAnna Dabrowska * This file is part of FPDI 4*dc4d9dc6SAnna Dabrowska * 5*dc4d9dc6SAnna Dabrowska * @package setasign\Fpdi 6*dc4d9dc6SAnna Dabrowska * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) 7*dc4d9dc6SAnna Dabrowska * @license http://opensource.org/licenses/mit-license The MIT License 8*dc4d9dc6SAnna Dabrowska */ 9*dc4d9dc6SAnna Dabrowska 10*dc4d9dc6SAnna Dabrowskanamespace setasign\Fpdi\PdfParser\CrossReference; 11*dc4d9dc6SAnna Dabrowska 12*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\PdfParser; 13*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\StreamReader; 14*dc4d9dc6SAnna Dabrowska 15*dc4d9dc6SAnna Dabrowska/** 16*dc4d9dc6SAnna Dabrowska * Class FixedReader 17*dc4d9dc6SAnna Dabrowska * 18*dc4d9dc6SAnna Dabrowska * This reader allows a very less overhead parsing of single entries of the cross-reference, because the main entries 19*dc4d9dc6SAnna Dabrowska * are only read when needed and not in a single run. 20*dc4d9dc6SAnna Dabrowska * 21*dc4d9dc6SAnna Dabrowska * @package setasign\Fpdi\PdfParser\CrossReference 22*dc4d9dc6SAnna Dabrowska */ 23*dc4d9dc6SAnna Dabrowskaclass FixedReader extends AbstractReader implements ReaderInterface 24*dc4d9dc6SAnna Dabrowska{ 25*dc4d9dc6SAnna Dabrowska /** 26*dc4d9dc6SAnna Dabrowska * @var StreamReader 27*dc4d9dc6SAnna Dabrowska */ 28*dc4d9dc6SAnna Dabrowska protected $reader; 29*dc4d9dc6SAnna Dabrowska 30*dc4d9dc6SAnna Dabrowska /** 31*dc4d9dc6SAnna Dabrowska * Data of subsections. 32*dc4d9dc6SAnna Dabrowska * 33*dc4d9dc6SAnna Dabrowska * @var array 34*dc4d9dc6SAnna Dabrowska */ 35*dc4d9dc6SAnna Dabrowska protected $subSections; 36*dc4d9dc6SAnna Dabrowska 37*dc4d9dc6SAnna Dabrowska /** 38*dc4d9dc6SAnna Dabrowska * FixedReader constructor. 39*dc4d9dc6SAnna Dabrowska * 40*dc4d9dc6SAnna Dabrowska * @param PdfParser $parser 41*dc4d9dc6SAnna Dabrowska * @throws CrossReferenceException 42*dc4d9dc6SAnna Dabrowska */ 43*dc4d9dc6SAnna Dabrowska public function __construct(PdfParser $parser) 44*dc4d9dc6SAnna Dabrowska { 45*dc4d9dc6SAnna Dabrowska $this->reader = $parser->getStreamReader(); 46*dc4d9dc6SAnna Dabrowska $this->read(); 47*dc4d9dc6SAnna Dabrowska parent::__construct($parser); 48*dc4d9dc6SAnna Dabrowska } 49*dc4d9dc6SAnna Dabrowska 50*dc4d9dc6SAnna Dabrowska /** 51*dc4d9dc6SAnna Dabrowska * Get all subsection data. 52*dc4d9dc6SAnna Dabrowska * 53*dc4d9dc6SAnna Dabrowska * @return array 54*dc4d9dc6SAnna Dabrowska */ 55*dc4d9dc6SAnna Dabrowska public function getSubSections() 56*dc4d9dc6SAnna Dabrowska { 57*dc4d9dc6SAnna Dabrowska return $this->subSections; 58*dc4d9dc6SAnna Dabrowska } 59*dc4d9dc6SAnna Dabrowska 60*dc4d9dc6SAnna Dabrowska /** 61*dc4d9dc6SAnna Dabrowska * @inheritdoc 62*dc4d9dc6SAnna Dabrowska */ 63*dc4d9dc6SAnna Dabrowska public function getOffsetFor($objectNumber) 64*dc4d9dc6SAnna Dabrowska { 65*dc4d9dc6SAnna Dabrowska foreach ($this->subSections as $offset => list($startObject, $objectCount)) { 66*dc4d9dc6SAnna Dabrowska if ($objectNumber >= $startObject && $objectNumber < ($startObject + $objectCount)) { 67*dc4d9dc6SAnna Dabrowska $position = $offset + 20 * ($objectNumber - $startObject); 68*dc4d9dc6SAnna Dabrowska $this->reader->ensure($position, 20); 69*dc4d9dc6SAnna Dabrowska $line = $this->reader->readBytes(20); 70*dc4d9dc6SAnna Dabrowska if ($line[17] === 'f') { 71*dc4d9dc6SAnna Dabrowska return false; 72*dc4d9dc6SAnna Dabrowska } 73*dc4d9dc6SAnna Dabrowska 74*dc4d9dc6SAnna Dabrowska return (int) \substr($line, 0, 10); 75*dc4d9dc6SAnna Dabrowska } 76*dc4d9dc6SAnna Dabrowska } 77*dc4d9dc6SAnna Dabrowska 78*dc4d9dc6SAnna Dabrowska return false; 79*dc4d9dc6SAnna Dabrowska } 80*dc4d9dc6SAnna Dabrowska 81*dc4d9dc6SAnna Dabrowska /** 82*dc4d9dc6SAnna Dabrowska * Read the cross-reference. 83*dc4d9dc6SAnna Dabrowska * 84*dc4d9dc6SAnna Dabrowska * This reader will only read the subsections in this method. The offsets were resolved individually by this 85*dc4d9dc6SAnna Dabrowska * information. 86*dc4d9dc6SAnna Dabrowska * 87*dc4d9dc6SAnna Dabrowska * @throws CrossReferenceException 88*dc4d9dc6SAnna Dabrowska */ 89*dc4d9dc6SAnna Dabrowska protected function read() 90*dc4d9dc6SAnna Dabrowska { 91*dc4d9dc6SAnna Dabrowska $subSections = []; 92*dc4d9dc6SAnna Dabrowska 93*dc4d9dc6SAnna Dabrowska $startObject = $entryCount = $lastLineStart = null; 94*dc4d9dc6SAnna Dabrowska $validityChecked = false; 95*dc4d9dc6SAnna Dabrowska while (($line = $this->reader->readLine(20)) !== false) { 96*dc4d9dc6SAnna Dabrowska if (\strpos($line, 'trailer') !== false) { 97*dc4d9dc6SAnna Dabrowska $this->reader->reset($lastLineStart); 98*dc4d9dc6SAnna Dabrowska break; 99*dc4d9dc6SAnna Dabrowska } 100*dc4d9dc6SAnna Dabrowska 101*dc4d9dc6SAnna Dabrowska // jump over if line content doesn't match the expected string 102*dc4d9dc6SAnna Dabrowska if (\sscanf($line, '%d %d', $startObject, $entryCount) !== 2) { 103*dc4d9dc6SAnna Dabrowska continue; 104*dc4d9dc6SAnna Dabrowska } 105*dc4d9dc6SAnna Dabrowska 106*dc4d9dc6SAnna Dabrowska $oldPosition = $this->reader->getPosition(); 107*dc4d9dc6SAnna Dabrowska $position = $oldPosition + $this->reader->getOffset(); 108*dc4d9dc6SAnna Dabrowska 109*dc4d9dc6SAnna Dabrowska if (!$validityChecked && $entryCount > 0) { 110*dc4d9dc6SAnna Dabrowska $nextLine = $this->reader->readBytes(21); 111*dc4d9dc6SAnna Dabrowska /* Check the next line for maximum of 20 bytes and not longer 112*dc4d9dc6SAnna Dabrowska * By catching 21 bytes and trimming the length should be still 21. 113*dc4d9dc6SAnna Dabrowska */ 114*dc4d9dc6SAnna Dabrowska if (\strlen(\trim($nextLine)) !== 21) { 115*dc4d9dc6SAnna Dabrowska throw new CrossReferenceException( 116*dc4d9dc6SAnna Dabrowska 'Cross-reference entries are larger than 20 bytes.', 117*dc4d9dc6SAnna Dabrowska CrossReferenceException::ENTRIES_TOO_LARGE 118*dc4d9dc6SAnna Dabrowska ); 119*dc4d9dc6SAnna Dabrowska } 120*dc4d9dc6SAnna Dabrowska 121*dc4d9dc6SAnna Dabrowska /* Check for less than 20 bytes: cut the line to 20 bytes and trim; have to result in exactly 18 bytes. 122*dc4d9dc6SAnna Dabrowska * If it would have less bytes the substring would get the first bytes of the next line which would 123*dc4d9dc6SAnna Dabrowska * evaluate to a 20 bytes long string after trimming. 124*dc4d9dc6SAnna Dabrowska */ 125*dc4d9dc6SAnna Dabrowska if (\strlen(\trim(\substr($nextLine, 0, 20))) !== 18) { 126*dc4d9dc6SAnna Dabrowska throw new CrossReferenceException( 127*dc4d9dc6SAnna Dabrowska 'Cross-reference entries are less than 20 bytes.', 128*dc4d9dc6SAnna Dabrowska CrossReferenceException::ENTRIES_TOO_SHORT 129*dc4d9dc6SAnna Dabrowska ); 130*dc4d9dc6SAnna Dabrowska } 131*dc4d9dc6SAnna Dabrowska 132*dc4d9dc6SAnna Dabrowska $validityChecked = true; 133*dc4d9dc6SAnna Dabrowska } 134*dc4d9dc6SAnna Dabrowska 135*dc4d9dc6SAnna Dabrowska $subSections[$position] = [$startObject, $entryCount]; 136*dc4d9dc6SAnna Dabrowska 137*dc4d9dc6SAnna Dabrowska $lastLineStart = $position + $entryCount * 20; 138*dc4d9dc6SAnna Dabrowska $this->reader->reset($lastLineStart); 139*dc4d9dc6SAnna Dabrowska } 140*dc4d9dc6SAnna Dabrowska 141*dc4d9dc6SAnna Dabrowska // reset after the last correct parsed line 142*dc4d9dc6SAnna Dabrowska $this->reader->reset($lastLineStart); 143*dc4d9dc6SAnna Dabrowska 144*dc4d9dc6SAnna Dabrowska if (\count($subSections) === 0) { 145*dc4d9dc6SAnna Dabrowska throw new CrossReferenceException( 146*dc4d9dc6SAnna Dabrowska 'No entries found in cross-reference.', 147*dc4d9dc6SAnna Dabrowska CrossReferenceException::NO_ENTRIES 148*dc4d9dc6SAnna Dabrowska ); 149*dc4d9dc6SAnna Dabrowska } 150*dc4d9dc6SAnna Dabrowska 151*dc4d9dc6SAnna Dabrowska $this->subSections = $subSections; 152*dc4d9dc6SAnna Dabrowska } 153*dc4d9dc6SAnna Dabrowska 154*dc4d9dc6SAnna Dabrowska /** 155*dc4d9dc6SAnna Dabrowska * Fixes an invalid object number shift. 156*dc4d9dc6SAnna Dabrowska * 157*dc4d9dc6SAnna Dabrowska * This method can be used to repair documents with an invalid subsection header: 158*dc4d9dc6SAnna Dabrowska * 159*dc4d9dc6SAnna Dabrowska * <code> 160*dc4d9dc6SAnna Dabrowska * xref 161*dc4d9dc6SAnna Dabrowska * 1 7 162*dc4d9dc6SAnna Dabrowska * 0000000000 65535 f 163*dc4d9dc6SAnna Dabrowska * 0000000009 00000 n 164*dc4d9dc6SAnna Dabrowska * 0000412075 00000 n 165*dc4d9dc6SAnna Dabrowska * 0000412172 00000 n 166*dc4d9dc6SAnna Dabrowska * 0000412359 00000 n 167*dc4d9dc6SAnna Dabrowska * 0000412417 00000 n 168*dc4d9dc6SAnna Dabrowska * 0000412468 00000 n 169*dc4d9dc6SAnna Dabrowska * </code> 170*dc4d9dc6SAnna Dabrowska * 171*dc4d9dc6SAnna Dabrowska * It shall only be called on the first table. 172*dc4d9dc6SAnna Dabrowska * 173*dc4d9dc6SAnna Dabrowska * @return bool 174*dc4d9dc6SAnna Dabrowska */ 175*dc4d9dc6SAnna Dabrowska public function fixFaultySubSectionShift() 176*dc4d9dc6SAnna Dabrowska { 177*dc4d9dc6SAnna Dabrowska $subSections = $this->getSubSections(); 178*dc4d9dc6SAnna Dabrowska if (\count($subSections) > 1) { 179*dc4d9dc6SAnna Dabrowska return false; 180*dc4d9dc6SAnna Dabrowska } 181*dc4d9dc6SAnna Dabrowska 182*dc4d9dc6SAnna Dabrowska $subSection = \current($subSections); 183*dc4d9dc6SAnna Dabrowska if ($subSection[0] != 1) { 184*dc4d9dc6SAnna Dabrowska return false; 185*dc4d9dc6SAnna Dabrowska } 186*dc4d9dc6SAnna Dabrowska 187*dc4d9dc6SAnna Dabrowska if ($this->getOffsetFor(1) === false) { 188*dc4d9dc6SAnna Dabrowska foreach ($subSections as $offset => list($startObject, $objectCount)) { 189*dc4d9dc6SAnna Dabrowska $this->subSections[$offset] = [$startObject - 1, $objectCount]; 190*dc4d9dc6SAnna Dabrowska } 191*dc4d9dc6SAnna Dabrowska return true; 192*dc4d9dc6SAnna Dabrowska } 193*dc4d9dc6SAnna Dabrowska 194*dc4d9dc6SAnna Dabrowska return false; 195*dc4d9dc6SAnna Dabrowska } 196*dc4d9dc6SAnna Dabrowska} 197