1<?php 2/** 3 * This file is part of FPDI 4 * 5 * @package setasign\Fpdi 6 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) 7 * @license http://opensource.org/licenses/mit-license The MIT License 8 */ 9 10namespace setasign\Fpdi\PdfParser\CrossReference; 11 12use setasign\Fpdi\PdfParser\PdfParser; 13use setasign\Fpdi\PdfParser\Type\PdfDictionary; 14use setasign\Fpdi\PdfParser\Type\PdfIndirectObject; 15use setasign\Fpdi\PdfParser\Type\PdfNumeric; 16use setasign\Fpdi\PdfParser\Type\PdfStream; 17use setasign\Fpdi\PdfParser\Type\PdfToken; 18use setasign\Fpdi\PdfParser\Type\PdfTypeException; 19 20/** 21 * Class CrossReference 22 * 23 * This class processes the standard cross reference of a PDF document. 24 * 25 * @package setasign\Fpdi\PdfParser\CrossReference 26 */ 27class CrossReference 28{ 29 /** 30 * The byte length in which the "startxref" keyword should be searched. 31 * 32 * @var int 33 */ 34 static public $trailerSearchLength = 5500; 35 36 /** 37 * @var int 38 */ 39 protected $fileHeaderOffset = 0; 40 41 /** 42 * @var PdfParser 43 */ 44 protected $parser; 45 46 /** 47 * @var ReaderInterface[] 48 */ 49 protected $readers = []; 50 51 /** 52 * CrossReference constructor. 53 * 54 * @param PdfParser $parser 55 * @throws CrossReferenceException 56 * @throws PdfTypeException 57 */ 58 public function __construct(PdfParser $parser, $fileHeaderOffset = 0) 59 { 60 $this->parser = $parser; 61 $this->fileHeaderOffset = $fileHeaderOffset; 62 63 $offset = $this->findStartXref(); 64 $reader = null; 65 /** @noinspection TypeUnsafeComparisonInspection */ 66 while ($offset != false) { // By doing an unsafe comparsion we ignore faulty references to byte offset 0 67 try { 68 $reader = $this->readXref($offset + $this->fileHeaderOffset); 69 } catch (CrossReferenceException $e) { 70 // sometimes the file header offset is part of the byte offsets, so let's retry by resetting it to zero. 71 if ($e->getCode() === CrossReferenceException::INVALID_DATA && $this->fileHeaderOffset !== 0) { 72 $this->fileHeaderOffset = 0; 73 $reader = $this->readXref($offset + $this->fileHeaderOffset); 74 } else { 75 throw $e; 76 } 77 } 78 79 $trailer = $reader->getTrailer(); 80 $this->checkForEncryption($trailer); 81 $this->readers[] = $reader; 82 83 if (isset($trailer->value['Prev'])) { 84 $offset = $trailer->value['Prev']->value; 85 } else { 86 $offset = false; 87 } 88 } 89 90 // fix faulty sub-section header 91 if ($reader instanceof FixedReader) { 92 /** 93 * @var FixedReader $reader 94 */ 95 $reader->fixFaultySubSectionShift(); 96 } 97 98 if ($reader === null) { 99 throw new CrossReferenceException('No cross-reference found.', CrossReferenceException::NO_XREF_FOUND); 100 } 101 } 102 103 /** 104 * Get the size of the cross reference. 105 * 106 * @return integer 107 */ 108 public function getSize() 109 { 110 return $this->getTrailer()->value['Size']->value; 111 } 112 113 /** 114 * Get the trailer dictionary. 115 * 116 * @return PdfDictionary 117 */ 118 public function getTrailer() 119 { 120 return $this->readers[0]->getTrailer(); 121 } 122 123 /** 124 * Get the cross reference readser instances. 125 * 126 * @return ReaderInterface[] 127 */ 128 public function getReaders() 129 { 130 return $this->readers; 131 } 132 133 /** 134 * Get the offset by an object number. 135 * 136 * @param int $objectNumber 137 * @return integer|bool 138 */ 139 public function getOffsetFor($objectNumber) 140 { 141 foreach ($this->getReaders() as $reader) { 142 $offset = $reader->getOffsetFor($objectNumber); 143 if ($offset !== false) { 144 return $offset; 145 } 146 } 147 148 return false; 149 } 150 151 /** 152 * Get an indirect object by its object number. 153 * 154 * @param int $objectNumber 155 * @return PdfIndirectObject 156 * @throws CrossReferenceException 157 */ 158 public function getIndirectObject($objectNumber) 159 { 160 $offset = $this->getOffsetFor($objectNumber); 161 if ($offset === false) { 162 throw new CrossReferenceException( 163 \sprintf('Object (id:%s) not found.', $objectNumber), 164 CrossReferenceException::OBJECT_NOT_FOUND 165 ); 166 } 167 168 $parser = $this->parser; 169 170 $parser->getTokenizer()->clearStack(); 171 $parser->getStreamReader()->reset($offset + $this->fileHeaderOffset); 172 173 try { 174 /** @var PdfIndirectObject $object */ 175 $object = $parser->readValue(null, PdfIndirectObject::class); 176 } catch (PdfTypeException $e) { 177 throw new CrossReferenceException( 178 \sprintf('Object (id:%s) not found at location (%s).', $objectNumber, $offset), 179 CrossReferenceException::OBJECT_NOT_FOUND, 180 $e 181 ); 182 } 183 184 if ($object->objectNumber !== $objectNumber) { 185 throw new CrossReferenceException( 186 \sprintf('Wrong object found, got %s while %s was expected.', $object->objectNumber, $objectNumber), 187 CrossReferenceException::OBJECT_NOT_FOUND 188 ); 189 } 190 191 return $object; 192 } 193 194 /** 195 * Read the cross-reference table at a given offset. 196 * 197 * Internally the method will try to evaluate the best reader for this cross-reference. 198 * 199 * @param int $offset 200 * @return ReaderInterface 201 * @throws CrossReferenceException 202 * @throws PdfTypeException 203 */ 204 protected function readXref($offset) 205 { 206 $this->parser->getStreamReader()->reset($offset); 207 $this->parser->getTokenizer()->clearStack(); 208 $initValue = $this->parser->readValue(); 209 210 return $this->initReaderInstance($initValue); 211 } 212 213 /** 214 * Get a cross-reference reader instance. 215 * 216 * @param PdfToken|PdfIndirectObject $initValue 217 * @return ReaderInterface|bool 218 * @throws CrossReferenceException 219 * @throws PdfTypeException 220 */ 221 protected function initReaderInstance($initValue) 222 { 223 $position = $this->parser->getStreamReader()->getPosition() 224 + $this->parser->getStreamReader()->getOffset() + $this->fileHeaderOffset; 225 226 if ($initValue instanceof PdfToken && $initValue->value === 'xref') { 227 try { 228 return new FixedReader($this->parser); 229 } catch (CrossReferenceException $e) { 230 $this->parser->getStreamReader()->reset($position); 231 $this->parser->getTokenizer()->clearStack(); 232 233 return new LineReader($this->parser); 234 } 235 } 236 237 if ($initValue instanceof PdfIndirectObject) { 238 try { 239 $stream = PdfStream::ensure($initValue->value); 240 241 } catch (PdfTypeException $e) { 242 throw new CrossReferenceException( 243 'Invalid object type at xref reference offset.', 244 CrossReferenceException::INVALID_DATA, 245 $e 246 ); 247 } 248 249 $type = PdfDictionary::get($stream->value, 'Type'); 250 if ($type->value !== 'XRef') { 251 throw new CrossReferenceException( 252 'The xref position points to an incorrect object type.', 253 CrossReferenceException::INVALID_DATA 254 ); 255 } 256 257 $this->checkForEncryption($stream->value); 258 259 throw new CrossReferenceException( 260 'This PDF document probably uses a compression technique which is not supported by the ' . 261 'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)', 262 CrossReferenceException::COMPRESSED_XREF 263 ); 264 } 265 266 throw new CrossReferenceException( 267 'The xref position points to an incorrect object type.', 268 CrossReferenceException::INVALID_DATA 269 ); 270 } 271 272 /** 273 * Check for encryption. 274 * 275 * @param PdfDictionary $dictionary 276 * @throws CrossReferenceException 277 */ 278 protected function checkForEncryption(PdfDictionary $dictionary) 279 { 280 if (isset($dictionary->value['Encrypt'])) { 281 throw new CrossReferenceException( 282 'This PDF document is encrypted and cannot be processed with FPDI.', 283 CrossReferenceException::ENCRYPTED 284 ); 285 } 286 } 287 288 /** 289 * Find the start position for the first cross-reference. 290 * 291 * @return int The byte-offset position of the first cross-reference. 292 * @throws CrossReferenceException 293 */ 294 protected function findStartXref() 295 { 296 $reader = $this->parser->getStreamReader(); 297 $reader->reset(-self::$trailerSearchLength, self::$trailerSearchLength); 298 299 $buffer = $reader->getBuffer(false); 300 $pos = \strrpos($buffer, 'startxref'); 301 $addOffset = 9; 302 if ($pos === false) { 303 // Some corrupted documents uses startref, instead of startxref 304 $pos = \strrpos($buffer, 'startref'); 305 if ($pos === false) { 306 throw new CrossReferenceException( 307 'Unable to find pointer to xref table', 308 CrossReferenceException::NO_STARTXREF_FOUND 309 ); 310 } 311 $addOffset = 8; 312 } 313 314 $reader->setOffset($pos + $addOffset); 315 316 try { 317 $value = $this->parser->readValue(null, PdfNumeric::class); 318 } catch (PdfTypeException $e) { 319 throw new CrossReferenceException( 320 'Invalid data after startxref keyword.', 321 CrossReferenceException::INVALID_DATA, 322 $e 323 ); 324 } 325 326 return $value->value; 327 } 328} 329