xref: /plugin/dw2pdf/vendor/setasign/fpdi/src/PdfParser/Tokenizer.php (revision dc4d9dc689082c963d5c1d9ee679553326788c6e)
1*dc4d9dc6SAnna Dabrowska<?php
2*dc4d9dc6SAnna Dabrowska/**
3*dc4d9dc6SAnna Dabrowska * This file is part of FPDI
4*dc4d9dc6SAnna Dabrowska *
5*dc4d9dc6SAnna Dabrowska * @package   setasign\Fpdi
6*dc4d9dc6SAnna Dabrowska * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
7*dc4d9dc6SAnna Dabrowska * @license   http://opensource.org/licenses/mit-license The MIT License
8*dc4d9dc6SAnna Dabrowska */
9*dc4d9dc6SAnna Dabrowska
10*dc4d9dc6SAnna Dabrowskanamespace setasign\Fpdi\PdfParser;
11*dc4d9dc6SAnna Dabrowska
12*dc4d9dc6SAnna Dabrowska/**
13*dc4d9dc6SAnna Dabrowska * A tokenizer class.
14*dc4d9dc6SAnna Dabrowska *
15*dc4d9dc6SAnna Dabrowska * @package setasign\Fpdi\PdfParser
16*dc4d9dc6SAnna Dabrowska */
17*dc4d9dc6SAnna Dabrowskaclass Tokenizer
18*dc4d9dc6SAnna Dabrowska{
19*dc4d9dc6SAnna Dabrowska    /**
20*dc4d9dc6SAnna Dabrowska     * @var StreamReader
21*dc4d9dc6SAnna Dabrowska     */
22*dc4d9dc6SAnna Dabrowska    protected $streamReader;
23*dc4d9dc6SAnna Dabrowska
24*dc4d9dc6SAnna Dabrowska    /**
25*dc4d9dc6SAnna Dabrowska     * A token stack.
26*dc4d9dc6SAnna Dabrowska     *
27*dc4d9dc6SAnna Dabrowska     * @var string[]
28*dc4d9dc6SAnna Dabrowska     */
29*dc4d9dc6SAnna Dabrowska    protected $stack = [];
30*dc4d9dc6SAnna Dabrowska
31*dc4d9dc6SAnna Dabrowska    /**
32*dc4d9dc6SAnna Dabrowska     * Tokenizer constructor.
33*dc4d9dc6SAnna Dabrowska     *
34*dc4d9dc6SAnna Dabrowska     * @param StreamReader $streamReader
35*dc4d9dc6SAnna Dabrowska     */
36*dc4d9dc6SAnna Dabrowska    public function __construct(StreamReader $streamReader)
37*dc4d9dc6SAnna Dabrowska    {
38*dc4d9dc6SAnna Dabrowska        $this->streamReader = $streamReader;
39*dc4d9dc6SAnna Dabrowska    }
40*dc4d9dc6SAnna Dabrowska
41*dc4d9dc6SAnna Dabrowska    /**
42*dc4d9dc6SAnna Dabrowska     * Get the stream reader instance.
43*dc4d9dc6SAnna Dabrowska     *
44*dc4d9dc6SAnna Dabrowska     * @return StreamReader
45*dc4d9dc6SAnna Dabrowska     */
46*dc4d9dc6SAnna Dabrowska    public function getStreamReader()
47*dc4d9dc6SAnna Dabrowska    {
48*dc4d9dc6SAnna Dabrowska        return $this->streamReader;
49*dc4d9dc6SAnna Dabrowska    }
50*dc4d9dc6SAnna Dabrowska
51*dc4d9dc6SAnna Dabrowska    /**
52*dc4d9dc6SAnna Dabrowska     * Clear the token stack.
53*dc4d9dc6SAnna Dabrowska     */
54*dc4d9dc6SAnna Dabrowska    public function clearStack()
55*dc4d9dc6SAnna Dabrowska    {
56*dc4d9dc6SAnna Dabrowska        $this->stack = [];
57*dc4d9dc6SAnna Dabrowska    }
58*dc4d9dc6SAnna Dabrowska
59*dc4d9dc6SAnna Dabrowska    /**
60*dc4d9dc6SAnna Dabrowska     * Push a token onto the stack.
61*dc4d9dc6SAnna Dabrowska     *
62*dc4d9dc6SAnna Dabrowska     * @param string $token
63*dc4d9dc6SAnna Dabrowska     */
64*dc4d9dc6SAnna Dabrowska    public function pushStack($token)
65*dc4d9dc6SAnna Dabrowska    {
66*dc4d9dc6SAnna Dabrowska        $this->stack[] = $token;
67*dc4d9dc6SAnna Dabrowska    }
68*dc4d9dc6SAnna Dabrowska
69*dc4d9dc6SAnna Dabrowska    /**
70*dc4d9dc6SAnna Dabrowska     * Get next token.
71*dc4d9dc6SAnna Dabrowska     *
72*dc4d9dc6SAnna Dabrowska     * @return bool|string
73*dc4d9dc6SAnna Dabrowska     */
74*dc4d9dc6SAnna Dabrowska    public function getNextToken()
75*dc4d9dc6SAnna Dabrowska    {
76*dc4d9dc6SAnna Dabrowska        $token = \array_pop($this->stack);
77*dc4d9dc6SAnna Dabrowska        if ($token !== null) {
78*dc4d9dc6SAnna Dabrowska            return $token;
79*dc4d9dc6SAnna Dabrowska        }
80*dc4d9dc6SAnna Dabrowska
81*dc4d9dc6SAnna Dabrowska        if (($byte = $this->streamReader->readByte()) === false) {
82*dc4d9dc6SAnna Dabrowska            return false;
83*dc4d9dc6SAnna Dabrowska        }
84*dc4d9dc6SAnna Dabrowska
85*dc4d9dc6SAnna Dabrowska        if ($byte === "\x20" ||
86*dc4d9dc6SAnna Dabrowska            $byte === "\x0A" ||
87*dc4d9dc6SAnna Dabrowska            $byte === "\x0D" ||
88*dc4d9dc6SAnna Dabrowska            $byte === "\x0C" ||
89*dc4d9dc6SAnna Dabrowska            $byte === "\x09" ||
90*dc4d9dc6SAnna Dabrowska            $byte === "\x00"
91*dc4d9dc6SAnna Dabrowska        ) {
92*dc4d9dc6SAnna Dabrowska            if ($this->leapWhiteSpaces() === false) {
93*dc4d9dc6SAnna Dabrowska                return false;
94*dc4d9dc6SAnna Dabrowska            }
95*dc4d9dc6SAnna Dabrowska            $byte = $this->streamReader->readByte();
96*dc4d9dc6SAnna Dabrowska        }
97*dc4d9dc6SAnna Dabrowska
98*dc4d9dc6SAnna Dabrowska        switch ($byte) {
99*dc4d9dc6SAnna Dabrowska            case '/':
100*dc4d9dc6SAnna Dabrowska            case '[':
101*dc4d9dc6SAnna Dabrowska            case ']':
102*dc4d9dc6SAnna Dabrowska            case '(':
103*dc4d9dc6SAnna Dabrowska            case ')':
104*dc4d9dc6SAnna Dabrowska            case '{':
105*dc4d9dc6SAnna Dabrowska            case '}':
106*dc4d9dc6SAnna Dabrowska            case '<':
107*dc4d9dc6SAnna Dabrowska            case '>':
108*dc4d9dc6SAnna Dabrowska                return $byte;
109*dc4d9dc6SAnna Dabrowska            case '%':
110*dc4d9dc6SAnna Dabrowska                $this->streamReader->readLine();
111*dc4d9dc6SAnna Dabrowska                return $this->getNextToken();
112*dc4d9dc6SAnna Dabrowska        }
113*dc4d9dc6SAnna Dabrowska
114*dc4d9dc6SAnna Dabrowska        /* This way is faster than checking single bytes.
115*dc4d9dc6SAnna Dabrowska         */
116*dc4d9dc6SAnna Dabrowska        $bufferOffset = $this->streamReader->getOffset();
117*dc4d9dc6SAnna Dabrowska        do {
118*dc4d9dc6SAnna Dabrowska            $lastBuffer = $this->streamReader->getBuffer(false);
119*dc4d9dc6SAnna Dabrowska            $pos = \strcspn(
120*dc4d9dc6SAnna Dabrowska                $lastBuffer,
121*dc4d9dc6SAnna Dabrowska                "\x00\x09\x0A\x0C\x0D\x20()<>[]{}/%",
122*dc4d9dc6SAnna Dabrowska                $bufferOffset
123*dc4d9dc6SAnna Dabrowska            );
124*dc4d9dc6SAnna Dabrowska        } while (
125*dc4d9dc6SAnna Dabrowska            // Break the loop if a delimiter or white space char is matched
126*dc4d9dc6SAnna Dabrowska            // in the current buffer or increase the buffers length
127*dc4d9dc6SAnna Dabrowska            $lastBuffer !== false &&
128*dc4d9dc6SAnna Dabrowska            (
129*dc4d9dc6SAnna Dabrowska                $bufferOffset + $pos === \strlen($lastBuffer) &&
130*dc4d9dc6SAnna Dabrowska                $this->streamReader->increaseLength()
131*dc4d9dc6SAnna Dabrowska            )
132*dc4d9dc6SAnna Dabrowska        );
133*dc4d9dc6SAnna Dabrowska
134*dc4d9dc6SAnna Dabrowska        $result = \substr($lastBuffer, $bufferOffset - 1, $pos + 1);
135*dc4d9dc6SAnna Dabrowska        $this->streamReader->setOffset($bufferOffset + $pos);
136*dc4d9dc6SAnna Dabrowska
137*dc4d9dc6SAnna Dabrowska        return $result;
138*dc4d9dc6SAnna Dabrowska    }
139*dc4d9dc6SAnna Dabrowska
140*dc4d9dc6SAnna Dabrowska    /**
141*dc4d9dc6SAnna Dabrowska     * Leap white spaces.
142*dc4d9dc6SAnna Dabrowska     *
143*dc4d9dc6SAnna Dabrowska     * @return boolean
144*dc4d9dc6SAnna Dabrowska     */
145*dc4d9dc6SAnna Dabrowska    public function leapWhiteSpaces()
146*dc4d9dc6SAnna Dabrowska    {
147*dc4d9dc6SAnna Dabrowska        do {
148*dc4d9dc6SAnna Dabrowska            if (!$this->streamReader->ensureContent()) {
149*dc4d9dc6SAnna Dabrowska                return false;
150*dc4d9dc6SAnna Dabrowska            }
151*dc4d9dc6SAnna Dabrowska
152*dc4d9dc6SAnna Dabrowska            $buffer = $this->streamReader->getBuffer(false);
153*dc4d9dc6SAnna Dabrowska            $matches = \strspn($buffer, "\x20\x0A\x0C\x0D\x09\x00", $this->streamReader->getOffset());
154*dc4d9dc6SAnna Dabrowska            if ($matches > 0) {
155*dc4d9dc6SAnna Dabrowska                $this->streamReader->addOffset($matches);
156*dc4d9dc6SAnna Dabrowska            }
157*dc4d9dc6SAnna Dabrowska        } while ($this->streamReader->getOffset() >= $this->streamReader->getBufferLength());
158*dc4d9dc6SAnna Dabrowska
159*dc4d9dc6SAnna Dabrowska        return true;
160*dc4d9dc6SAnna Dabrowska    }
161*dc4d9dc6SAnna Dabrowska}
162