1<?php 2/** 3 * Handlebars tokenizer (based on mustache) 4 * 5 * @category Xamin 6 * @package Handlebars 7 * @author Justin Hileman <dontknow@example.org> 8 * @author fzerorubigd <fzerorubigd@gmail.com> 9 * @author Behrooz Shabani <everplays@gmail.com> 10 * @author Mardix <https://github.com/mardix> 11 * @copyright 2012 (c) ParsPooyesh Co 12 * @copyright 2013 (c) Behrooz Shabani 13 * @copyright 2013 (c) Mardix 14 * @license MIT 15 * @link http://voodoophp.org/docs/handlebars 16 */ 17 18namespace Handlebars; 19 20class Tokenizer 21{ 22 23 // Finite state machine states 24 const IN_TEXT = 0; 25 const IN_TAG_TYPE = 1; 26 const IN_TAG = 2; 27 28 // Token types 29 const T_SECTION = '#'; 30 const T_INVERTED = '^'; 31 const T_END_SECTION = '/'; 32 const T_COMMENT = '!'; 33 // XXX: remove partials support from tokenizer and make it a helper? 34 const T_PARTIAL = '>'; 35 const T_PARTIAL_2 = '<'; 36 const T_DELIM_CHANGE = '='; 37 const T_ESCAPED = '_v'; 38 const T_UNESCAPED = '{'; 39 const T_UNESCAPED_2 = '&'; 40 const T_TEXT = '_t'; 41 42 // Valid token types 43 private $tagTypes = [ 44 self::T_SECTION => true, 45 self::T_INVERTED => true, 46 self::T_END_SECTION => true, 47 self::T_COMMENT => true, 48 self::T_PARTIAL => true, 49 self::T_PARTIAL_2 => true, 50 self::T_DELIM_CHANGE => true, 51 self::T_ESCAPED => true, 52 self::T_UNESCAPED => true, 53 self::T_UNESCAPED_2 => true, 54 ]; 55 56 // Interpolated tags 57 private $interpolatedTags = [ 58 self::T_ESCAPED => true, 59 self::T_UNESCAPED => true, 60 self::T_UNESCAPED_2 => true, 61 ]; 62 63 // Token properties 64 const TYPE = 'type'; 65 const NAME = 'name'; 66 const OTAG = 'otag'; 67 const CTAG = 'ctag'; 68 const INDEX = 'index'; 69 const END = 'end'; 70 const INDENT = 'indent'; 71 const NODES = 'nodes'; 72 const VALUE = 'value'; 73 const ARGS = 'args'; 74 75 protected $state; 76 protected $tagType; 77 protected $tag; 78 protected $buffer; 79 protected $tokens; 80 protected $seenTag; 81 protected $lineStart; 82 protected $otag; 83 protected $ctag; 84 85 /** 86 * Scan and tokenize template source. 87 * 88 * @param string $text Mustache template source to tokenize 89 * @param string $delimiters Optional, pass opening and closing delimiters 90 * 91 * @return array Set of Mustache tokens 92 */ 93 public function scan($text, $delimiters = null) 94 { 95 if ($text instanceof HandlebarsString) { 96 $text = $text->getString(); 97 } 98 99 $this->reset(); 100 101 if ($delimiters !== null && $delimiters = trim($delimiters)) { 102 list($otag, $ctag) = explode(' ', $delimiters); 103 $this->otag = $otag; 104 $this->ctag = $ctag; 105 } 106 107 $openingTagLength = strlen($this->otag); 108 $closingTagLength = strlen($this->ctag); 109 $firstOpeningTagCharacter = $this->otag[0]; 110 $firstClosingTagCharacter = $this->ctag[0]; 111 112 $len = strlen($text); 113 114 for ($i = 0; $i < $len; $i++) { 115 116 $character = $text[$i]; 117 118 switch ($this->state) { 119 120 case self::IN_TEXT: 121 if ($character === $firstOpeningTagCharacter && $this->tagChange($this->otag, $text, $i, $openingTagLength) 122 ) { 123 $i--; 124 $this->flushBuffer(); 125 $this->state = self::IN_TAG_TYPE; 126 } else { 127 if ($character == "\n") { 128 $this->filterLine(); 129 } else { 130 $this->buffer .= $character; 131 } 132 } 133 break; 134 135 case self::IN_TAG_TYPE: 136 137 $i += $openingTagLength - 1; 138 if (isset($this->tagTypes[$text[$i + 1]])) { 139 $tag = $text[$i + 1]; 140 $this->tagType = $tag; 141 } else { 142 $tag = null; 143 $this->tagType = self::T_ESCAPED; 144 } 145 146 if ($this->tagType === self::T_DELIM_CHANGE) { 147 $i = $this->changeDelimiters($text, $i); 148 $openingTagLength = strlen($this->otag); 149 $closingTagLength = strlen($this->ctag); 150 $firstOpeningTagCharacter = $this->otag[0]; 151 $firstClosingTagCharacter = $this->ctag[0]; 152 153 $this->state = self::IN_TEXT; 154 } else { 155 if ($tag !== null) { 156 $i++; 157 } 158 $this->state = self::IN_TAG; 159 } 160 $this->seenTag = $i; 161 break; 162 163 default: 164 if ($character === $firstClosingTagCharacter && $this->tagChange($this->ctag, $text, $i, $closingTagLength)) { 165 // Sections (Helpers) can accept parameters 166 // Same thing for Partials (little known fact) 167 if (in_array($this->tagType, [ 168 self::T_SECTION, 169 self::T_PARTIAL, 170 self::T_PARTIAL_2] 171 )) { 172 $newBuffer = explode(' ', trim($this->buffer), 2); 173 $args = ''; 174 if (count($newBuffer) == 2) { 175 $args = $newBuffer[1]; 176 } 177 $this->buffer = $newBuffer[0]; 178 } 179 $t = [ 180 self::TYPE => $this->tagType, 181 self::NAME => trim($this->buffer), 182 self::OTAG => $this->otag, 183 self::CTAG => $this->ctag, 184 self::INDEX => ($this->tagType == self::T_END_SECTION) ? 185 $this->seenTag - $openingTagLength : 186 $i + strlen($this->ctag), 187 ]; 188 if (isset($args)) { 189 $t[self::ARGS] = $args; 190 } 191 $this->tokens[] = $t; 192 unset($t); 193 unset($args); 194 $this->buffer = ''; 195 $i += strlen($this->ctag) - 1; 196 $this->state = self::IN_TEXT; 197 if ($this->tagType == self::T_UNESCAPED) { 198 if ($this->ctag == '}}') { 199 $i++; 200 } else { 201 // Clean up `{{{ tripleStache }}}` style tokens. 202 $lastIndex = count($this->tokens) - 1; 203 $lastName = $this->tokens[$lastIndex][self::NAME]; 204 if (substr($lastName, -1) === '}') { 205 $this->tokens[$lastIndex][self::NAME] = trim( 206 substr($lastName, 0, -1) 207 ); 208 } 209 } 210 } 211 } else { 212 $this->buffer .= $character; 213 } 214 break; 215 } 216 217 } 218 219 $this->filterLine(true); 220 221 return $this->tokens; 222 } 223 224 /** 225 * Helper function to reset tokenizer internal state. 226 * 227 * @return void 228 */ 229 protected function reset() 230 { 231 $this->state = self::IN_TEXT; 232 $this->tagType = null; 233 $this->tag = null; 234 $this->buffer = ''; 235 $this->tokens = []; 236 $this->seenTag = false; 237 $this->lineStart = 0; 238 $this->otag = '{{'; 239 $this->ctag = '}}'; 240 } 241 242 /** 243 * Flush the current buffer to a token. 244 * 245 * @return void 246 */ 247 protected function flushBuffer() 248 { 249 if (!empty($this->buffer)) { 250 $this->tokens[] = [ 251 self::TYPE => self::T_TEXT, 252 self::VALUE => $this->buffer 253 ]; 254 $this->buffer = ''; 255 } 256 } 257 258 /** 259 * Test whether the current line is entirely made up of whitespace. 260 * 261 * @return boolean True if the current line is all whitespace 262 */ 263 protected function lineIsWhitespace() 264 { 265 $tokensCount = count($this->tokens); 266 for ($j = $this->lineStart; $j < $tokensCount; $j++) { 267 $token = $this->tokens[$j]; 268 if (isset($this->tagTypes[$token[self::TYPE]])) { 269 if (isset($this->interpolatedTags[$token[self::TYPE]])) { 270 return false; 271 } 272 } elseif ($token[self::TYPE] == self::T_TEXT) { 273 if (preg_match('/\S/', $token[self::VALUE])) { 274 return false; 275 } 276 } 277 } 278 279 return true; 280 } 281 282 /** 283 * Filter out whitespace-only lines and store indent levels for partials. 284 * 285 * @param bool $noNewLine Suppress the newline? (default: false) 286 * 287 * @return void 288 */ 289 protected function filterLine($noNewLine = false) 290 { 291 $this->flushBuffer(); 292 if ($this->seenTag && $this->lineIsWhitespace()) { 293 $tokensCount = count($this->tokens); 294 for ($j = $this->lineStart; $j < $tokensCount; $j++) { 295 if ($this->tokens[$j][self::TYPE] == self::T_TEXT) { 296 if (isset($this->tokens[$j + 1]) 297 && $this->tokens[$j + 1][self::TYPE] == self::T_PARTIAL 298 ) { 299 $this->tokens[$j + 1][self::INDENT] 300 = $this->tokens[$j][self::VALUE]; 301 } 302 303 $this->tokens[$j] = null; 304 } 305 } 306 } elseif (!$noNewLine) { 307 $this->tokens[] = [self::TYPE => self::T_TEXT, self::VALUE => "\n"]; 308 } 309 310 $this->seenTag = false; 311 $this->lineStart = count($this->tokens); 312 } 313 314 /** 315 * Change the current Mustache delimiters. Set new `otag` and `ctag` values. 316 * 317 * @param string $text Mustache template source 318 * @param int $index Current tokenizer index 319 * 320 * @return int New index value 321 */ 322 protected function changeDelimiters($text, $index) 323 { 324 $startIndex = strpos($text, '=', $index) + 1; 325 $close = '=' . $this->ctag; 326 $closeIndex = strpos($text, $close, $index); 327 328 list($otag, $ctag) = explode( 329 ' ', 330 trim(substr($text, $startIndex, $closeIndex - $startIndex)) 331 ); 332 $this->otag = $otag; 333 $this->ctag = $ctag; 334 335 return $closeIndex + strlen($close) - 1; 336 } 337 338 /** 339 * Test whether it's time to change tags. 340 * 341 * @param string $tag Current tag name 342 * @param string $text Mustache template source 343 * @param int $index Current tokenizer index 344 * @param int $tagLength Length of the opening/closing tag string 345 * 346 * @return boolean True if this is a closing section tag 347 */ 348 protected function tagChange($tag, $text, $index, $tagLength) 349 { 350 return substr($text, $index, $tagLength) === $tag; 351 } 352 353} 354