1*dc4d9dc6SAnna Dabrowska<?php 2*dc4d9dc6SAnna Dabrowska/** 3*dc4d9dc6SAnna Dabrowska * This file is part of FPDI 4*dc4d9dc6SAnna Dabrowska * 5*dc4d9dc6SAnna Dabrowska * @package setasign\Fpdi 6*dc4d9dc6SAnna Dabrowska * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com) 7*dc4d9dc6SAnna Dabrowska * @license http://opensource.org/licenses/mit-license The MIT License 8*dc4d9dc6SAnna Dabrowska */ 9*dc4d9dc6SAnna Dabrowska 10*dc4d9dc6SAnna Dabrowskanamespace setasign\Fpdi\PdfParser\CrossReference; 11*dc4d9dc6SAnna Dabrowska 12*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\PdfParser; 13*dc4d9dc6SAnna Dabrowskause setasign\Fpdi\PdfParser\StreamReader; 14*dc4d9dc6SAnna Dabrowska 15*dc4d9dc6SAnna Dabrowska/** 16*dc4d9dc6SAnna Dabrowska * Class LineReader 17*dc4d9dc6SAnna Dabrowska * 18*dc4d9dc6SAnna Dabrowska * This reader class read all cross-reference entries in a single run. 19*dc4d9dc6SAnna Dabrowska * It supports reading cross-references with e.g. invalid data (e.g. entries with a length < or > 20 bytes). 20*dc4d9dc6SAnna Dabrowska * 21*dc4d9dc6SAnna Dabrowska * @package setasign\Fpdi\PdfParser\CrossReference 22*dc4d9dc6SAnna Dabrowska */ 23*dc4d9dc6SAnna Dabrowskaclass LineReader extends AbstractReader implements ReaderInterface 24*dc4d9dc6SAnna Dabrowska{ 25*dc4d9dc6SAnna Dabrowska /** 26*dc4d9dc6SAnna Dabrowska * The object offsets. 27*dc4d9dc6SAnna Dabrowska * 28*dc4d9dc6SAnna Dabrowska * @var array 29*dc4d9dc6SAnna Dabrowska */ 30*dc4d9dc6SAnna Dabrowska protected $offsets; 31*dc4d9dc6SAnna Dabrowska 32*dc4d9dc6SAnna Dabrowska /** 33*dc4d9dc6SAnna Dabrowska * LineReader constructor. 34*dc4d9dc6SAnna Dabrowska * 35*dc4d9dc6SAnna Dabrowska * @param PdfParser $parser 36*dc4d9dc6SAnna Dabrowska * @throws CrossReferenceException 37*dc4d9dc6SAnna Dabrowska */ 38*dc4d9dc6SAnna Dabrowska public function __construct(PdfParser $parser) 39*dc4d9dc6SAnna Dabrowska { 40*dc4d9dc6SAnna Dabrowska $this->read($this->extract($parser->getStreamReader())); 41*dc4d9dc6SAnna Dabrowska parent::__construct($parser); 42*dc4d9dc6SAnna Dabrowska } 43*dc4d9dc6SAnna Dabrowska 44*dc4d9dc6SAnna Dabrowska /** 45*dc4d9dc6SAnna Dabrowska * @inheritdoc 46*dc4d9dc6SAnna Dabrowska */ 47*dc4d9dc6SAnna Dabrowska public function getOffsetFor($objectNumber) 48*dc4d9dc6SAnna Dabrowska { 49*dc4d9dc6SAnna Dabrowska if (isset($this->offsets[$objectNumber])) { 50*dc4d9dc6SAnna Dabrowska return $this->offsets[$objectNumber][0]; 51*dc4d9dc6SAnna Dabrowska } 52*dc4d9dc6SAnna Dabrowska 53*dc4d9dc6SAnna Dabrowska return false; 54*dc4d9dc6SAnna Dabrowska } 55*dc4d9dc6SAnna Dabrowska 56*dc4d9dc6SAnna Dabrowska /** 57*dc4d9dc6SAnna Dabrowska * Get all found offsets. 58*dc4d9dc6SAnna Dabrowska * 59*dc4d9dc6SAnna Dabrowska * @return array 60*dc4d9dc6SAnna Dabrowska */ 61*dc4d9dc6SAnna Dabrowska public function getOffsets() 62*dc4d9dc6SAnna Dabrowska { 63*dc4d9dc6SAnna Dabrowska return $this->offsets; 64*dc4d9dc6SAnna Dabrowska } 65*dc4d9dc6SAnna Dabrowska 66*dc4d9dc6SAnna Dabrowska /** 67*dc4d9dc6SAnna Dabrowska * Extracts the cross reference data from the stream reader. 68*dc4d9dc6SAnna Dabrowska * 69*dc4d9dc6SAnna Dabrowska * @param StreamReader $reader 70*dc4d9dc6SAnna Dabrowska * @return string 71*dc4d9dc6SAnna Dabrowska * @throws CrossReferenceException 72*dc4d9dc6SAnna Dabrowska */ 73*dc4d9dc6SAnna Dabrowska protected function extract(StreamReader $reader) 74*dc4d9dc6SAnna Dabrowska { 75*dc4d9dc6SAnna Dabrowska $cycles = -1; 76*dc4d9dc6SAnna Dabrowska $bytesPerCycle = 100; 77*dc4d9dc6SAnna Dabrowska 78*dc4d9dc6SAnna Dabrowska $reader->reset(null, $bytesPerCycle); 79*dc4d9dc6SAnna Dabrowska 80*dc4d9dc6SAnna Dabrowska while ( 81*dc4d9dc6SAnna Dabrowska ($trailerPos = \strpos($reader->getBuffer(false), 'trailer', \max($bytesPerCycle * $cycles++, 0))) === false 82*dc4d9dc6SAnna Dabrowska ) { 83*dc4d9dc6SAnna Dabrowska if ($reader->increaseLength($bytesPerCycle) === false) { 84*dc4d9dc6SAnna Dabrowska break; 85*dc4d9dc6SAnna Dabrowska } 86*dc4d9dc6SAnna Dabrowska } 87*dc4d9dc6SAnna Dabrowska 88*dc4d9dc6SAnna Dabrowska if ($trailerPos === false) { 89*dc4d9dc6SAnna Dabrowska throw new CrossReferenceException( 90*dc4d9dc6SAnna Dabrowska 'Unexpected end of cross reference. "trailer"-keyword not found.', 91*dc4d9dc6SAnna Dabrowska CrossReferenceException::NO_TRAILER_FOUND 92*dc4d9dc6SAnna Dabrowska ); 93*dc4d9dc6SAnna Dabrowska } 94*dc4d9dc6SAnna Dabrowska 95*dc4d9dc6SAnna Dabrowska $xrefContent = \substr($reader->getBuffer(false), 0, $trailerPos); 96*dc4d9dc6SAnna Dabrowska $reader->reset($reader->getPosition() + $trailerPos); 97*dc4d9dc6SAnna Dabrowska 98*dc4d9dc6SAnna Dabrowska return $xrefContent; 99*dc4d9dc6SAnna Dabrowska } 100*dc4d9dc6SAnna Dabrowska 101*dc4d9dc6SAnna Dabrowska /** 102*dc4d9dc6SAnna Dabrowska * Read the cross-reference entries. 103*dc4d9dc6SAnna Dabrowska * 104*dc4d9dc6SAnna Dabrowska * @param string $xrefContent 105*dc4d9dc6SAnna Dabrowska * @throws CrossReferenceException 106*dc4d9dc6SAnna Dabrowska */ 107*dc4d9dc6SAnna Dabrowska protected function read($xrefContent) 108*dc4d9dc6SAnna Dabrowska { 109*dc4d9dc6SAnna Dabrowska // get eol markers in the first 100 bytes 110*dc4d9dc6SAnna Dabrowska \preg_match_all("/(\r\n|\n|\r)/", \substr($xrefContent, 0, 100), $m); 111*dc4d9dc6SAnna Dabrowska 112*dc4d9dc6SAnna Dabrowska if (\count($m[0]) === 0) { 113*dc4d9dc6SAnna Dabrowska throw new CrossReferenceException( 114*dc4d9dc6SAnna Dabrowska 'No data found in cross-reference.', 115*dc4d9dc6SAnna Dabrowska CrossReferenceException::INVALID_DATA 116*dc4d9dc6SAnna Dabrowska ); 117*dc4d9dc6SAnna Dabrowska } 118*dc4d9dc6SAnna Dabrowska 119*dc4d9dc6SAnna Dabrowska // count(array_count_values()) is faster then count(array_unique()) 120*dc4d9dc6SAnna Dabrowska // @see https://github.com/symfony/symfony/pull/23731 121*dc4d9dc6SAnna Dabrowska // can be reverted for php7.2 122*dc4d9dc6SAnna Dabrowska $differentLineEndings = \count(\array_count_values($m[0])); 123*dc4d9dc6SAnna Dabrowska if ($differentLineEndings > 1) { 124*dc4d9dc6SAnna Dabrowska $lines = \preg_split("/(\r\n|\n|\r)/", $xrefContent, -1, PREG_SPLIT_NO_EMPTY); 125*dc4d9dc6SAnna Dabrowska } else { 126*dc4d9dc6SAnna Dabrowska $lines = \explode($m[0][0], $xrefContent); 127*dc4d9dc6SAnna Dabrowska } 128*dc4d9dc6SAnna Dabrowska 129*dc4d9dc6SAnna Dabrowska unset($differentLineEndings, $m); 130*dc4d9dc6SAnna Dabrowska $linesCount = \count($lines); 131*dc4d9dc6SAnna Dabrowska $start = null; 132*dc4d9dc6SAnna Dabrowska $entryCount = 0; 133*dc4d9dc6SAnna Dabrowska 134*dc4d9dc6SAnna Dabrowska $offsets = []; 135*dc4d9dc6SAnna Dabrowska 136*dc4d9dc6SAnna Dabrowska /** @noinspection ForeachInvariantsInspection */ 137*dc4d9dc6SAnna Dabrowska for ($i = 0; $i < $linesCount; $i++) { 138*dc4d9dc6SAnna Dabrowska $line = \trim($lines[$i]); 139*dc4d9dc6SAnna Dabrowska if ($line) { 140*dc4d9dc6SAnna Dabrowska $pieces = \explode(' ', $line); 141*dc4d9dc6SAnna Dabrowska 142*dc4d9dc6SAnna Dabrowska $c = \count($pieces); 143*dc4d9dc6SAnna Dabrowska switch ($c) { 144*dc4d9dc6SAnna Dabrowska case 2: 145*dc4d9dc6SAnna Dabrowska $start = (int) $pieces[0]; 146*dc4d9dc6SAnna Dabrowska $entryCount += (int) $pieces[1]; 147*dc4d9dc6SAnna Dabrowska break; 148*dc4d9dc6SAnna Dabrowska 149*dc4d9dc6SAnna Dabrowska /** @noinspection PhpMissingBreakStatementInspection */ 150*dc4d9dc6SAnna Dabrowska case 3: 151*dc4d9dc6SAnna Dabrowska switch ($pieces[2]) { 152*dc4d9dc6SAnna Dabrowska case 'n': 153*dc4d9dc6SAnna Dabrowska $offsets[$start] = [(int) $pieces[0], (int) $pieces[1]]; 154*dc4d9dc6SAnna Dabrowska $start++; 155*dc4d9dc6SAnna Dabrowska break 2; 156*dc4d9dc6SAnna Dabrowska case 'f': 157*dc4d9dc6SAnna Dabrowska $start++; 158*dc4d9dc6SAnna Dabrowska break 2; 159*dc4d9dc6SAnna Dabrowska } 160*dc4d9dc6SAnna Dabrowska // fall through if pieces doesn't match 161*dc4d9dc6SAnna Dabrowska 162*dc4d9dc6SAnna Dabrowska default: 163*dc4d9dc6SAnna Dabrowska throw new CrossReferenceException( 164*dc4d9dc6SAnna Dabrowska \sprintf('Unexpected data in xref table (%s)', \implode(' ', $pieces)), 165*dc4d9dc6SAnna Dabrowska CrossReferenceException::INVALID_DATA 166*dc4d9dc6SAnna Dabrowska ); 167*dc4d9dc6SAnna Dabrowska } 168*dc4d9dc6SAnna Dabrowska } 169*dc4d9dc6SAnna Dabrowska } 170*dc4d9dc6SAnna Dabrowska 171*dc4d9dc6SAnna Dabrowska $this->offsets = $offsets; 172*dc4d9dc6SAnna Dabrowska } 173*dc4d9dc6SAnna Dabrowska} 174