1<?php
2/**
3 * This file is part of FPDI
4 *
5 * @package   setasign\Fpdi
6 * @copyright Copyright (c) 2020 Setasign GmbH & Co. KG (https://www.setasign.com)
7 * @license   http://opensource.org/licenses/mit-license The MIT License
8 */
9
10namespace setasign\Fpdi\PdfParser;
11
12/**
13 * A tokenizer class.
14 *
15 * @package setasign\Fpdi\PdfParser
16 */
17class Tokenizer
18{
19    /**
20     * @var StreamReader
21     */
22    protected $streamReader;
23
24    /**
25     * A token stack.
26     *
27     * @var string[]
28     */
29    protected $stack = [];
30
31    /**
32     * Tokenizer constructor.
33     *
34     * @param StreamReader $streamReader
35     */
36    public function __construct(StreamReader $streamReader)
37    {
38        $this->streamReader = $streamReader;
39    }
40
41    /**
42     * Get the stream reader instance.
43     *
44     * @return StreamReader
45     */
46    public function getStreamReader()
47    {
48        return $this->streamReader;
49    }
50
51    /**
52     * Clear the token stack.
53     */
54    public function clearStack()
55    {
56        $this->stack = [];
57    }
58
59    /**
60     * Push a token onto the stack.
61     *
62     * @param string $token
63     */
64    public function pushStack($token)
65    {
66        $this->stack[] = $token;
67    }
68
69    /**
70     * Get next token.
71     *
72     * @return bool|string
73     */
74    public function getNextToken()
75    {
76        $token = \array_pop($this->stack);
77        if ($token !== null) {
78            return $token;
79        }
80
81        if (($byte = $this->streamReader->readByte()) === false) {
82            return false;
83        }
84
85        if ($byte === "\x20" ||
86            $byte === "\x0A" ||
87            $byte === "\x0D" ||
88            $byte === "\x0C" ||
89            $byte === "\x09" ||
90            $byte === "\x00"
91        ) {
92            if ($this->leapWhiteSpaces() === false) {
93                return false;
94            }
95            $byte = $this->streamReader->readByte();
96        }
97
98        switch ($byte) {
99            case '/':
100            case '[':
101            case ']':
102            case '(':
103            case ')':
104            case '{':
105            case '}':
106            case '<':
107            case '>':
108                return $byte;
109            case '%':
110                $this->streamReader->readLine();
111                return $this->getNextToken();
112        }
113
114        /* This way is faster than checking single bytes.
115         */
116        $bufferOffset = $this->streamReader->getOffset();
117        do {
118            $lastBuffer = $this->streamReader->getBuffer(false);
119            $pos = \strcspn(
120                $lastBuffer,
121                "\x00\x09\x0A\x0C\x0D\x20()<>[]{}/%",
122                $bufferOffset
123            );
124        } while (
125            // Break the loop if a delimiter or white space char is matched
126            // in the current buffer or increase the buffers length
127            $lastBuffer !== false &&
128            (
129                $bufferOffset + $pos === \strlen($lastBuffer) &&
130                $this->streamReader->increaseLength()
131            )
132        );
133
134        $result = \substr($lastBuffer, $bufferOffset - 1, $pos + 1);
135        $this->streamReader->setOffset($bufferOffset + $pos);
136
137        return $result;
138    }
139
140    /**
141     * Leap white spaces.
142     *
143     * @return boolean
144     */
145    public function leapWhiteSpaces()
146    {
147        do {
148            if (!$this->streamReader->ensureContent()) {
149                return false;
150            }
151
152            $buffer = $this->streamReader->getBuffer(false);
153            $matches = \strspn($buffer, "\x20\x0A\x0C\x0D\x09\x00", $this->streamReader->getOffset());
154            if ($matches > 0) {
155                $this->streamReader->addOffset($matches);
156            }
157        } while ($this->streamReader->getOffset() >= $this->streamReader->getBufferLength());
158
159        return true;
160    }
161}
162