1<?php 2 3/* 4 * This file is part of Mustache.php. 5 * 6 * (c) 2010-2017 Justin Hileman 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12/** 13 * Mustache Tokenizer class. 14 * 15 * This class is responsible for turning raw template source into a set of Mustache tokens. 16 */ 17class Mustache_Tokenizer 18{ 19 // Finite state machine states 20 const IN_TEXT = 0; 21 const IN_TAG_TYPE = 1; 22 const IN_TAG = 2; 23 24 // Token types 25 const T_SECTION = '#'; 26 const T_INVERTED = '^'; 27 const T_END_SECTION = '/'; 28 const T_COMMENT = '!'; 29 const T_PARTIAL = '>'; 30 const T_PARENT = '<'; 31 const T_DELIM_CHANGE = '='; 32 const T_ESCAPED = '_v'; 33 const T_UNESCAPED = '{'; 34 const T_UNESCAPED_2 = '&'; 35 const T_TEXT = '_t'; 36 const T_PRAGMA = '%'; 37 const T_BLOCK_VAR = '$'; 38 const T_BLOCK_ARG = '$arg'; 39 40 // Valid token types 41 private static $tagTypes = array( 42 self::T_SECTION => true, 43 self::T_INVERTED => true, 44 self::T_END_SECTION => true, 45 self::T_COMMENT => true, 46 self::T_PARTIAL => true, 47 self::T_PARENT => true, 48 self::T_DELIM_CHANGE => true, 49 self::T_ESCAPED => true, 50 self::T_UNESCAPED => true, 51 self::T_UNESCAPED_2 => true, 52 self::T_PRAGMA => true, 53 self::T_BLOCK_VAR => true, 54 ); 55 56 // Token properties 57 const TYPE = 'type'; 58 const NAME = 'name'; 59 const OTAG = 'otag'; 60 const CTAG = 'ctag'; 61 const LINE = 'line'; 62 const INDEX = 'index'; 63 const END = 'end'; 64 const INDENT = 'indent'; 65 const NODES = 'nodes'; 66 const VALUE = 'value'; 67 const FILTERS = 'filters'; 68 69 private $state; 70 private $tagType; 71 private $buffer; 72 private $tokens; 73 private $seenTag; 74 private $line; 75 76 private $otag; 77 private $otagChar; 78 private $otagLen; 79 80 private $ctag; 81 private $ctagChar; 82 private $ctagLen; 83 84 /** 85 * Scan and tokenize template source. 86 * 87 * @throws Mustache_Exception_SyntaxException when mismatched section tags are encountered 88 * @throws Mustache_Exception_InvalidArgumentException when $delimiters string is invalid 89 * 90 * @param string $text Mustache template source to tokenize 91 * @param string $delimiters Optionally, pass initial opening and closing delimiters (default: null) 92 * 93 * @return array Set of Mustache tokens 94 */ 95 public function scan($text, $delimiters = null) 96 { 97 // Setting mbstring.func_overload makes things *really* slow. 98 // Let's do everyone a favor and scan this string as ASCII instead. 99 // 100 // @codeCoverageIgnoreStart 101 $encoding = null; 102 if (function_exists('mb_internal_encoding') && ini_get('mbstring.func_overload') & 2) { 103 $encoding = mb_internal_encoding(); 104 mb_internal_encoding('ASCII'); 105 } 106 // @codeCoverageIgnoreEnd 107 108 $this->reset(); 109 110 if ($delimiters = trim($delimiters)) { 111 $this->setDelimiters($delimiters); 112 } 113 114 $len = strlen($text); 115 for ($i = 0; $i < $len; $i++) { 116 switch ($this->state) { 117 case self::IN_TEXT: 118 $char = $text[$i]; 119 // Test whether it's time to change tags. 120 if ($char === $this->otagChar && substr($text, $i, $this->otagLen) === $this->otag) { 121 $i--; 122 $this->flushBuffer(); 123 $this->state = self::IN_TAG_TYPE; 124 } else { 125 $this->buffer .= $char; 126 if ($char === "\n") { 127 $this->flushBuffer(); 128 $this->line++; 129 } 130 } 131 break; 132 133 case self::IN_TAG_TYPE: 134 $i += $this->otagLen - 1; 135 $char = $text[$i + 1]; 136 if (isset(self::$tagTypes[$char])) { 137 $tag = $char; 138 $this->tagType = $tag; 139 } else { 140 $tag = null; 141 $this->tagType = self::T_ESCAPED; 142 } 143 144 if ($this->tagType === self::T_DELIM_CHANGE) { 145 $i = $this->changeDelimiters($text, $i); 146 $this->state = self::IN_TEXT; 147 } elseif ($this->tagType === self::T_PRAGMA) { 148 $i = $this->addPragma($text, $i); 149 $this->state = self::IN_TEXT; 150 } else { 151 if ($tag !== null) { 152 $i++; 153 } 154 $this->state = self::IN_TAG; 155 } 156 $this->seenTag = $i; 157 break; 158 159 default: 160 $char = $text[$i]; 161 // Test whether it's time to change tags. 162 if ($char === $this->ctagChar && substr($text, $i, $this->ctagLen) === $this->ctag) { 163 $token = array( 164 self::TYPE => $this->tagType, 165 self::NAME => trim($this->buffer), 166 self::OTAG => $this->otag, 167 self::CTAG => $this->ctag, 168 self::LINE => $this->line, 169 self::INDEX => ($this->tagType === self::T_END_SECTION) ? $this->seenTag - $this->otagLen : $i + $this->ctagLen, 170 ); 171 172 if ($this->tagType === self::T_UNESCAPED) { 173 // Clean up `{{{ tripleStache }}}` style tokens. 174 if ($this->ctag === '}}') { 175 if (($i + 2 < $len) && $text[$i + 2] === '}') { 176 $i++; 177 } else { 178 $msg = sprintf( 179 'Mismatched tag delimiters: %s on line %d', 180 $token[self::NAME], 181 $token[self::LINE] 182 ); 183 184 throw new Mustache_Exception_SyntaxException($msg, $token); 185 } 186 } else { 187 $lastName = $token[self::NAME]; 188 if (substr($lastName, -1) === '}') { 189 $token[self::NAME] = trim(substr($lastName, 0, -1)); 190 } else { 191 $msg = sprintf( 192 'Mismatched tag delimiters: %s on line %d', 193 $token[self::NAME], 194 $token[self::LINE] 195 ); 196 197 throw new Mustache_Exception_SyntaxException($msg, $token); 198 } 199 } 200 } 201 202 $this->buffer = ''; 203 $i += $this->ctagLen - 1; 204 $this->state = self::IN_TEXT; 205 $this->tokens[] = $token; 206 } else { 207 $this->buffer .= $char; 208 } 209 break; 210 } 211 } 212 213 $this->flushBuffer(); 214 215 // Restore the user's encoding... 216 // @codeCoverageIgnoreStart 217 if ($encoding) { 218 mb_internal_encoding($encoding); 219 } 220 // @codeCoverageIgnoreEnd 221 222 return $this->tokens; 223 } 224 225 /** 226 * Helper function to reset tokenizer internal state. 227 */ 228 private function reset() 229 { 230 $this->state = self::IN_TEXT; 231 $this->tagType = null; 232 $this->buffer = ''; 233 $this->tokens = array(); 234 $this->seenTag = false; 235 $this->line = 0; 236 237 $this->otag = '{{'; 238 $this->otagChar = '{'; 239 $this->otagLen = 2; 240 241 $this->ctag = '}}'; 242 $this->ctagChar = '}'; 243 $this->ctagLen = 2; 244 } 245 246 /** 247 * Flush the current buffer to a token. 248 */ 249 private function flushBuffer() 250 { 251 if (strlen($this->buffer) > 0) { 252 $this->tokens[] = array( 253 self::TYPE => self::T_TEXT, 254 self::LINE => $this->line, 255 self::VALUE => $this->buffer, 256 ); 257 $this->buffer = ''; 258 } 259 } 260 261 /** 262 * Change the current Mustache delimiters. Set new `otag` and `ctag` values. 263 * 264 * @throws Mustache_Exception_SyntaxException when delimiter string is invalid 265 * 266 * @param string $text Mustache template source 267 * @param int $index Current tokenizer index 268 * 269 * @return int New index value 270 */ 271 private function changeDelimiters($text, $index) 272 { 273 $startIndex = strpos($text, '=', $index) + 1; 274 $close = '=' . $this->ctag; 275 $closeIndex = strpos($text, $close, $index); 276 277 $token = array( 278 self::TYPE => self::T_DELIM_CHANGE, 279 self::LINE => $this->line, 280 ); 281 282 try { 283 $this->setDelimiters(trim(substr($text, $startIndex, $closeIndex - $startIndex))); 284 } catch (Mustache_Exception_InvalidArgumentException $e) { 285 throw new Mustache_Exception_SyntaxException($e->getMessage(), $token); 286 } 287 288 $this->tokens[] = $token; 289 290 return $closeIndex + strlen($close) - 1; 291 } 292 293 /** 294 * Set the current Mustache `otag` and `ctag` delimiters. 295 * 296 * @throws Mustache_Exception_InvalidArgumentException when delimiter string is invalid 297 * 298 * @param string $delimiters 299 */ 300 private function setDelimiters($delimiters) 301 { 302 if (!preg_match('/^\s*(\S+)\s+(\S+)\s*$/', $delimiters, $matches)) { 303 throw new Mustache_Exception_InvalidArgumentException(sprintf('Invalid delimiters: %s', $delimiters)); 304 } 305 306 list($_, $otag, $ctag) = $matches; 307 308 $this->otag = $otag; 309 $this->otagChar = $otag[0]; 310 $this->otagLen = strlen($otag); 311 312 $this->ctag = $ctag; 313 $this->ctagChar = $ctag[0]; 314 $this->ctagLen = strlen($ctag); 315 } 316 317 /** 318 * Add pragma token. 319 * 320 * Pragmas are hoisted to the front of the template, so all pragma tokens 321 * will appear at the front of the token list. 322 * 323 * @param string $text 324 * @param int $index 325 * 326 * @return int New index value 327 */ 328 private function addPragma($text, $index) 329 { 330 $end = strpos($text, $this->ctag, $index); 331 $pragma = trim(substr($text, $index + 2, $end - $index - 2)); 332 333 // Pragmas are hoisted to the front of the template. 334 array_unshift($this->tokens, array( 335 self::TYPE => self::T_PRAGMA, 336 self::NAME => $pragma, 337 self::LINE => 0, 338 )); 339 340 return $end + $this->ctagLen - 1; 341 } 342} 343