1<?php
2/**
3 * Handlebars tokenizer (based on mustache)
4 *
5 * @category  Xamin
6 * @package   Handlebars
7 * @author    Justin Hileman <dontknow@example.org>
8 * @author    fzerorubigd <fzerorubigd@gmail.com>
9 * @author    Behrooz Shabani <everplays@gmail.com>
10 * @author    Mardix <https://github.com/mardix>
11 * @copyright 2012 (c) ParsPooyesh Co
12 * @copyright 2013 (c) Behrooz Shabani
13 * @copyright 2013 (c) Mardix
14 * @license   MIT
15 * @link      http://voodoophp.org/docs/handlebars
16 */
17
18namespace Handlebars;
19
20class Tokenizer
21{
22
23    // Finite state machine states
24    const IN_TEXT = 0;
25    const IN_TAG_TYPE = 1;
26    const IN_TAG = 2;
27
28    // Token types
29    const T_SECTION = '#';
30    const T_INVERTED = '^';
31    const T_END_SECTION = '/';
32    const T_COMMENT = '!';
33    // XXX: remove partials support from tokenizer and make it a helper?
34    const T_PARTIAL = '>';
35    const T_PARTIAL_2 = '<';
36    const T_DELIM_CHANGE = '=';
37    const T_ESCAPED = '_v';
38    const T_UNESCAPED = '{';
39    const T_UNESCAPED_2 = '&';
40    const T_TEXT = '_t';
41
42    // Valid token types
43    private $tagTypes = [
44        self::T_SECTION => true,
45        self::T_INVERTED => true,
46        self::T_END_SECTION => true,
47        self::T_COMMENT => true,
48        self::T_PARTIAL => true,
49        self::T_PARTIAL_2 => true,
50        self::T_DELIM_CHANGE => true,
51        self::T_ESCAPED => true,
52        self::T_UNESCAPED => true,
53        self::T_UNESCAPED_2 => true,
54    ];
55
56    // Interpolated tags
57    private $interpolatedTags = [
58        self::T_ESCAPED => true,
59        self::T_UNESCAPED => true,
60        self::T_UNESCAPED_2 => true,
61    ];
62
63    // Token properties
64    const TYPE = 'type';
65    const NAME = 'name';
66    const OTAG = 'otag';
67    const CTAG = 'ctag';
68    const INDEX = 'index';
69    const END = 'end';
70    const INDENT = 'indent';
71    const NODES = 'nodes';
72    const VALUE = 'value';
73    const ARGS = 'args';
74
75    protected $state;
76    protected $tagType;
77    protected $tag;
78    protected $buffer;
79    protected $tokens;
80    protected $seenTag;
81    protected $lineStart;
82    protected $otag;
83    protected $ctag;
84
85    /**
86     * Scan and tokenize template source.
87     *
88     * @param string $text       Mustache template source to tokenize
89     * @param string $delimiters Optional, pass opening and closing delimiters
90     *
91     * @return array Set of Mustache tokens
92     */
93    public function scan($text, $delimiters = null)
94    {
95        if ($text instanceof HandlebarsString) {
96            $text = $text->getString();
97        }
98
99        $this->reset();
100
101        if ($delimiters !== null && $delimiters = trim($delimiters)) {
102            list($otag, $ctag) = explode(' ', $delimiters);
103            $this->otag = $otag;
104            $this->ctag = $ctag;
105        }
106
107        $openingTagLength = strlen($this->otag);
108        $closingTagLength = strlen($this->ctag);
109        $firstOpeningTagCharacter = $this->otag[0];
110        $firstClosingTagCharacter = $this->ctag[0];
111
112        $len = strlen($text);
113
114        for ($i = 0; $i < $len; $i++) {
115
116            $character = $text[$i];
117
118            switch ($this->state) {
119
120                case self::IN_TEXT:
121                    if ($character === $firstOpeningTagCharacter && $this->tagChange($this->otag, $text, $i, $openingTagLength)
122                    ) {
123                        $i--;
124                        $this->flushBuffer();
125                        $this->state = self::IN_TAG_TYPE;
126                    } else {
127                        if ($character == "\n") {
128                            $this->filterLine();
129                        } else {
130                            $this->buffer .= $character;
131                        }
132                    }
133                    break;
134
135                case self::IN_TAG_TYPE:
136
137                    $i += $openingTagLength - 1;
138                    if (isset($this->tagTypes[$text[$i + 1]])) {
139                        $tag = $text[$i + 1];
140                        $this->tagType = $tag;
141                    } else {
142                        $tag = null;
143                        $this->tagType = self::T_ESCAPED;
144                    }
145
146                    if ($this->tagType === self::T_DELIM_CHANGE) {
147                        $i = $this->changeDelimiters($text, $i);
148                        $openingTagLength = strlen($this->otag);
149                        $closingTagLength = strlen($this->ctag);
150                        $firstOpeningTagCharacter = $this->otag[0];
151                        $firstClosingTagCharacter = $this->ctag[0];
152
153                        $this->state = self::IN_TEXT;
154                    } else {
155                        if ($tag !== null) {
156                            $i++;
157                        }
158                        $this->state = self::IN_TAG;
159                    }
160                    $this->seenTag = $i;
161                    break;
162
163                default:
164                    if ($character === $firstClosingTagCharacter && $this->tagChange($this->ctag, $text, $i, $closingTagLength)) {
165                        // Sections (Helpers) can accept parameters
166                        // Same thing for Partials (little known fact)
167                        if (in_array($this->tagType, [
168                                self::T_SECTION,
169                                self::T_PARTIAL,
170                                self::T_PARTIAL_2]
171                        )) {
172                            $newBuffer = explode(' ', trim($this->buffer), 2);
173                            $args = '';
174                            if (count($newBuffer) == 2) {
175                                $args = $newBuffer[1];
176                            }
177                            $this->buffer = $newBuffer[0];
178                        }
179                        $t = [
180                            self::TYPE => $this->tagType,
181                            self::NAME => trim($this->buffer),
182                            self::OTAG => $this->otag,
183                            self::CTAG => $this->ctag,
184                            self::INDEX => ($this->tagType == self::T_END_SECTION) ?
185                                $this->seenTag - $openingTagLength :
186                                $i + strlen($this->ctag),
187                        ];
188                        if (isset($args)) {
189                            $t[self::ARGS] = $args;
190                        }
191                        $this->tokens[] = $t;
192                        unset($t);
193                        unset($args);
194                        $this->buffer = '';
195                        $i += strlen($this->ctag) - 1;
196                        $this->state = self::IN_TEXT;
197                        if ($this->tagType == self::T_UNESCAPED) {
198                            if ($this->ctag == '}}') {
199                                $i++;
200                            } else {
201                                // Clean up `{{{ tripleStache }}}` style tokens.
202                                $lastIndex = count($this->tokens) - 1;
203                                $lastName = $this->tokens[$lastIndex][self::NAME];
204                                if (substr($lastName, -1) === '}') {
205                                    $this->tokens[$lastIndex][self::NAME] = trim(
206                                        substr($lastName, 0, -1)
207                                    );
208                                }
209                            }
210                        }
211                    } else {
212                        $this->buffer .= $character;
213                    }
214                    break;
215            }
216
217        }
218
219        $this->filterLine(true);
220
221        return $this->tokens;
222    }
223
224    /**
225     * Helper function to reset tokenizer internal state.
226     *
227     * @return void
228     */
229    protected function reset()
230    {
231        $this->state = self::IN_TEXT;
232        $this->tagType = null;
233        $this->tag = null;
234        $this->buffer = '';
235        $this->tokens = [];
236        $this->seenTag = false;
237        $this->lineStart = 0;
238        $this->otag = '{{';
239        $this->ctag = '}}';
240    }
241
242    /**
243     * Flush the current buffer to a token.
244     *
245     * @return void
246     */
247    protected function flushBuffer()
248    {
249        if (!empty($this->buffer)) {
250            $this->tokens[] = [
251                self::TYPE => self::T_TEXT,
252                self::VALUE => $this->buffer
253            ];
254            $this->buffer = '';
255        }
256    }
257
258    /**
259     * Test whether the current line is entirely made up of whitespace.
260     *
261     * @return boolean True if the current line is all whitespace
262     */
263    protected function lineIsWhitespace()
264    {
265        $tokensCount = count($this->tokens);
266        for ($j = $this->lineStart; $j < $tokensCount; $j++) {
267            $token = $this->tokens[$j];
268            if (isset($this->tagTypes[$token[self::TYPE]])) {
269                if (isset($this->interpolatedTags[$token[self::TYPE]])) {
270                    return false;
271                }
272            } elseif ($token[self::TYPE] == self::T_TEXT) {
273                if (preg_match('/\S/', $token[self::VALUE])) {
274                    return false;
275                }
276            }
277        }
278
279        return true;
280    }
281
282    /**
283     * Filter out whitespace-only lines and store indent levels for partials.
284     *
285     * @param bool $noNewLine Suppress the newline? (default: false)
286     *
287     * @return void
288     */
289    protected function filterLine($noNewLine = false)
290    {
291        $this->flushBuffer();
292        if ($this->seenTag && $this->lineIsWhitespace()) {
293            $tokensCount = count($this->tokens);
294            for ($j = $this->lineStart; $j < $tokensCount; $j++) {
295                if ($this->tokens[$j][self::TYPE] == self::T_TEXT) {
296                    if (isset($this->tokens[$j + 1])
297                        && $this->tokens[$j + 1][self::TYPE] == self::T_PARTIAL
298                    ) {
299                        $this->tokens[$j + 1][self::INDENT]
300                            = $this->tokens[$j][self::VALUE];
301                    }
302
303                    $this->tokens[$j] = null;
304                }
305            }
306        } elseif (!$noNewLine) {
307            $this->tokens[] = [self::TYPE => self::T_TEXT, self::VALUE => "\n"];
308        }
309
310        $this->seenTag = false;
311        $this->lineStart = count($this->tokens);
312    }
313
314    /**
315     * Change the current Mustache delimiters. Set new `otag` and `ctag` values.
316     *
317     * @param string $text  Mustache template source
318     * @param int    $index Current tokenizer index
319     *
320     * @return int New index value
321     */
322    protected function changeDelimiters($text, $index)
323    {
324        $startIndex = strpos($text, '=', $index) + 1;
325        $close = '=' . $this->ctag;
326        $closeIndex = strpos($text, $close, $index);
327
328        list($otag, $ctag) = explode(
329            ' ',
330            trim(substr($text, $startIndex, $closeIndex - $startIndex))
331        );
332        $this->otag = $otag;
333        $this->ctag = $ctag;
334
335        return $closeIndex + strlen($close) - 1;
336    }
337
338    /**
339     * Test whether it's time to change tags.
340     *
341     * @param string $tag Current tag name
342     * @param string $text Mustache template source
343     * @param int $index Current tokenizer index
344     * @param int $tagLength Length of the opening/closing tag string
345     *
346     * @return boolean True if this is a closing section tag
347     */
348    protected function tagChange($tag, $text, $index, $tagLength)
349    {
350        return substr($text, $index, $tagLength) === $tag;
351    }
352
353}
354