1'use strict'; 2 3var lib = require('./lib'); 4var whitespaceChars = " \n\t\r\xA0"; 5var delimChars = '()[]{}%*-+~/#,:|.<>=!'; 6var intChars = '0123456789'; 7var BLOCK_START = '{%'; 8var BLOCK_END = '%}'; 9var VARIABLE_START = '{{'; 10var VARIABLE_END = '}}'; 11var COMMENT_START = '{#'; 12var COMMENT_END = '#}'; 13var TOKEN_STRING = 'string'; 14var TOKEN_WHITESPACE = 'whitespace'; 15var TOKEN_DATA = 'data'; 16var TOKEN_BLOCK_START = 'block-start'; 17var TOKEN_BLOCK_END = 'block-end'; 18var TOKEN_VARIABLE_START = 'variable-start'; 19var TOKEN_VARIABLE_END = 'variable-end'; 20var TOKEN_COMMENT = 'comment'; 21var TOKEN_LEFT_PAREN = 'left-paren'; 22var TOKEN_RIGHT_PAREN = 'right-paren'; 23var TOKEN_LEFT_BRACKET = 'left-bracket'; 24var TOKEN_RIGHT_BRACKET = 'right-bracket'; 25var TOKEN_LEFT_CURLY = 'left-curly'; 26var TOKEN_RIGHT_CURLY = 'right-curly'; 27var TOKEN_OPERATOR = 'operator'; 28var TOKEN_COMMA = 'comma'; 29var TOKEN_COLON = 'colon'; 30var TOKEN_TILDE = 'tilde'; 31var TOKEN_PIPE = 'pipe'; 32var TOKEN_INT = 'int'; 33var TOKEN_FLOAT = 'float'; 34var TOKEN_BOOLEAN = 'boolean'; 35var TOKEN_NONE = 'none'; 36var TOKEN_SYMBOL = 'symbol'; 37var TOKEN_SPECIAL = 'special'; 38var TOKEN_REGEX = 'regex'; 39function token(type, value, lineno, colno) { 40 return { 41 type: type, 42 value: value, 43 lineno: lineno, 44 colno: colno 45 }; 46} 47var Tokenizer = /*#__PURE__*/function () { 48 function Tokenizer(str, opts) { 49 this.str = str; 50 this.index = 0; 51 this.len = str.length; 52 this.lineno = 0; 53 this.colno = 0; 54 this.in_code = false; 55 opts = opts || {}; 56 var tags = opts.tags || {}; 57 this.tags = { 58 BLOCK_START: tags.blockStart || BLOCK_START, 59 BLOCK_END: tags.blockEnd || BLOCK_END, 60 VARIABLE_START: tags.variableStart || VARIABLE_START, 61 VARIABLE_END: tags.variableEnd || VARIABLE_END, 62 COMMENT_START: tags.commentStart || COMMENT_START, 63 COMMENT_END: tags.commentEnd || COMMENT_END 64 }; 65 this.trimBlocks = !!opts.trimBlocks; 66 this.lstripBlocks = !!opts.lstripBlocks; 67 } 68 var _proto = Tokenizer.prototype; 69 _proto.nextToken = function nextToken() { 70 var lineno = this.lineno; 71 var colno = this.colno; 72 var tok; 73 if (this.in_code) { 74 // Otherwise, if we are in a block parse it as code 75 var cur = this.current(); 76 if (this.isFinished()) { 77 // We have nothing else to parse 78 return null; 79 } else if (cur === '"' || cur === '\'') { 80 // We've hit a string 81 return token(TOKEN_STRING, this._parseString(cur), lineno, colno); 82 } else if (tok = this._extract(whitespaceChars)) { 83 // We hit some whitespace 84 return token(TOKEN_WHITESPACE, tok, lineno, colno); 85 } else if ((tok = this._extractString(this.tags.BLOCK_END)) || (tok = this._extractString('-' + this.tags.BLOCK_END))) { 86 // Special check for the block end tag 87 // 88 // It is a requirement that start and end tags are composed of 89 // delimiter characters (%{}[] etc), and our code always 90 // breaks on delimiters so we can assume the token parsing 91 // doesn't consume these elsewhere 92 this.in_code = false; 93 if (this.trimBlocks) { 94 cur = this.current(); 95 if (cur === '\n') { 96 // Skip newline 97 this.forward(); 98 } else if (cur === '\r') { 99 // Skip CRLF newline 100 this.forward(); 101 cur = this.current(); 102 if (cur === '\n') { 103 this.forward(); 104 } else { 105 // Was not a CRLF, so go back 106 this.back(); 107 } 108 } 109 } 110 return token(TOKEN_BLOCK_END, tok, lineno, colno); 111 } else if ((tok = this._extractString(this.tags.VARIABLE_END)) || (tok = this._extractString('-' + this.tags.VARIABLE_END))) { 112 // Special check for variable end tag (see above) 113 this.in_code = false; 114 return token(TOKEN_VARIABLE_END, tok, lineno, colno); 115 } else if (cur === 'r' && this.str.charAt(this.index + 1) === '/') { 116 // Skip past 'r/'. 117 this.forwardN(2); 118 119 // Extract until the end of the regex -- / ends it, \/ does not. 120 var regexBody = ''; 121 while (!this.isFinished()) { 122 if (this.current() === '/' && this.previous() !== '\\') { 123 this.forward(); 124 break; 125 } else { 126 regexBody += this.current(); 127 this.forward(); 128 } 129 } 130 131 // Check for flags. 132 // The possible flags are according to https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/RegExp) 133 var POSSIBLE_FLAGS = ['g', 'i', 'm', 'y']; 134 var regexFlags = ''; 135 while (!this.isFinished()) { 136 var isCurrentAFlag = POSSIBLE_FLAGS.indexOf(this.current()) !== -1; 137 if (isCurrentAFlag) { 138 regexFlags += this.current(); 139 this.forward(); 140 } else { 141 break; 142 } 143 } 144 return token(TOKEN_REGEX, { 145 body: regexBody, 146 flags: regexFlags 147 }, lineno, colno); 148 } else if (delimChars.indexOf(cur) !== -1) { 149 // We've hit a delimiter (a special char like a bracket) 150 this.forward(); 151 var complexOps = ['==', '===', '!=', '!==', '<=', '>=', '//', '**']; 152 var curComplex = cur + this.current(); 153 var type; 154 if (lib.indexOf(complexOps, curComplex) !== -1) { 155 this.forward(); 156 cur = curComplex; 157 158 // See if this is a strict equality/inequality comparator 159 if (lib.indexOf(complexOps, curComplex + this.current()) !== -1) { 160 cur = curComplex + this.current(); 161 this.forward(); 162 } 163 } 164 switch (cur) { 165 case '(': 166 type = TOKEN_LEFT_PAREN; 167 break; 168 case ')': 169 type = TOKEN_RIGHT_PAREN; 170 break; 171 case '[': 172 type = TOKEN_LEFT_BRACKET; 173 break; 174 case ']': 175 type = TOKEN_RIGHT_BRACKET; 176 break; 177 case '{': 178 type = TOKEN_LEFT_CURLY; 179 break; 180 case '}': 181 type = TOKEN_RIGHT_CURLY; 182 break; 183 case ',': 184 type = TOKEN_COMMA; 185 break; 186 case ':': 187 type = TOKEN_COLON; 188 break; 189 case '~': 190 type = TOKEN_TILDE; 191 break; 192 case '|': 193 type = TOKEN_PIPE; 194 break; 195 default: 196 type = TOKEN_OPERATOR; 197 } 198 return token(type, cur, lineno, colno); 199 } else { 200 // We are not at whitespace or a delimiter, so extract the 201 // text and parse it 202 tok = this._extractUntil(whitespaceChars + delimChars); 203 if (tok.match(/^[-+]?[0-9]+$/)) { 204 if (this.current() === '.') { 205 this.forward(); 206 var dec = this._extract(intChars); 207 return token(TOKEN_FLOAT, tok + '.' + dec, lineno, colno); 208 } else { 209 return token(TOKEN_INT, tok, lineno, colno); 210 } 211 } else if (tok.match(/^(true|false)$/)) { 212 return token(TOKEN_BOOLEAN, tok, lineno, colno); 213 } else if (tok === 'none') { 214 return token(TOKEN_NONE, tok, lineno, colno); 215 /* 216 * Added to make the test `null is null` evaluate truthily. 217 * Otherwise, Nunjucks will look up null in the context and 218 * return `undefined`, which is not what we want. This *may* have 219 * consequences is someone is using null in their templates as a 220 * variable. 221 */ 222 } else if (tok === 'null') { 223 return token(TOKEN_NONE, tok, lineno, colno); 224 } else if (tok) { 225 return token(TOKEN_SYMBOL, tok, lineno, colno); 226 } else { 227 throw new Error('Unexpected value while parsing: ' + tok); 228 } 229 } 230 } else { 231 // Parse out the template text, breaking on tag 232 // delimiters because we need to look for block/variable start 233 // tags (don't use the full delimChars for optimization) 234 var beginChars = this.tags.BLOCK_START.charAt(0) + this.tags.VARIABLE_START.charAt(0) + this.tags.COMMENT_START.charAt(0) + this.tags.COMMENT_END.charAt(0); 235 if (this.isFinished()) { 236 return null; 237 } else if ((tok = this._extractString(this.tags.BLOCK_START + '-')) || (tok = this._extractString(this.tags.BLOCK_START))) { 238 this.in_code = true; 239 return token(TOKEN_BLOCK_START, tok, lineno, colno); 240 } else if ((tok = this._extractString(this.tags.VARIABLE_START + '-')) || (tok = this._extractString(this.tags.VARIABLE_START))) { 241 this.in_code = true; 242 return token(TOKEN_VARIABLE_START, tok, lineno, colno); 243 } else { 244 tok = ''; 245 var data; 246 var inComment = false; 247 if (this._matches(this.tags.COMMENT_START)) { 248 inComment = true; 249 tok = this._extractString(this.tags.COMMENT_START); 250 } 251 252 // Continually consume text, breaking on the tag delimiter 253 // characters and checking to see if it's a start tag. 254 // 255 // We could hit the end of the template in the middle of 256 // our looping, so check for the null return value from 257 // _extractUntil 258 while ((data = this._extractUntil(beginChars)) !== null) { 259 tok += data; 260 if ((this._matches(this.tags.BLOCK_START) || this._matches(this.tags.VARIABLE_START) || this._matches(this.tags.COMMENT_START)) && !inComment) { 261 if (this.lstripBlocks && this._matches(this.tags.BLOCK_START) && this.colno > 0 && this.colno <= tok.length) { 262 var lastLine = tok.slice(-this.colno); 263 if (/^\s+$/.test(lastLine)) { 264 // Remove block leading whitespace from beginning of the string 265 tok = tok.slice(0, -this.colno); 266 if (!tok.length) { 267 // All data removed, collapse to avoid unnecessary nodes 268 // by returning next token (block start) 269 return this.nextToken(); 270 } 271 } 272 } 273 // If it is a start tag, stop looping 274 break; 275 } else if (this._matches(this.tags.COMMENT_END)) { 276 if (!inComment) { 277 throw new Error('unexpected end of comment'); 278 } 279 tok += this._extractString(this.tags.COMMENT_END); 280 break; 281 } else { 282 // It does not match any tag, so add the character and 283 // carry on 284 tok += this.current(); 285 this.forward(); 286 } 287 } 288 if (data === null && inComment) { 289 throw new Error('expected end of comment, got end of file'); 290 } 291 return token(inComment ? TOKEN_COMMENT : TOKEN_DATA, tok, lineno, colno); 292 } 293 } 294 }; 295 _proto._parseString = function _parseString(delimiter) { 296 this.forward(); 297 var str = ''; 298 while (!this.isFinished() && this.current() !== delimiter) { 299 var cur = this.current(); 300 if (cur === '\\') { 301 this.forward(); 302 switch (this.current()) { 303 case 'n': 304 str += '\n'; 305 break; 306 case 't': 307 str += '\t'; 308 break; 309 case 'r': 310 str += '\r'; 311 break; 312 default: 313 str += this.current(); 314 } 315 this.forward(); 316 } else { 317 str += cur; 318 this.forward(); 319 } 320 } 321 this.forward(); 322 return str; 323 }; 324 _proto._matches = function _matches(str) { 325 if (this.index + str.length > this.len) { 326 return null; 327 } 328 var m = this.str.slice(this.index, this.index + str.length); 329 return m === str; 330 }; 331 _proto._extractString = function _extractString(str) { 332 if (this._matches(str)) { 333 this.forwardN(str.length); 334 return str; 335 } 336 return null; 337 }; 338 _proto._extractUntil = function _extractUntil(charString) { 339 // Extract all non-matching chars, with the default matching set 340 // to everything 341 return this._extractMatching(true, charString || ''); 342 }; 343 _proto._extract = function _extract(charString) { 344 // Extract all matching chars (no default, so charString must be 345 // explicit) 346 return this._extractMatching(false, charString); 347 }; 348 _proto._extractMatching = function _extractMatching(breakOnMatch, charString) { 349 // Pull out characters until a breaking char is hit. 350 // If breakOnMatch is false, a non-matching char stops it. 351 // If breakOnMatch is true, a matching char stops it. 352 353 if (this.isFinished()) { 354 return null; 355 } 356 var first = charString.indexOf(this.current()); 357 358 // Only proceed if the first character doesn't meet our condition 359 if (breakOnMatch && first === -1 || !breakOnMatch && first !== -1) { 360 var t = this.current(); 361 this.forward(); 362 363 // And pull out all the chars one at a time until we hit a 364 // breaking char 365 var idx = charString.indexOf(this.current()); 366 while ((breakOnMatch && idx === -1 || !breakOnMatch && idx !== -1) && !this.isFinished()) { 367 t += this.current(); 368 this.forward(); 369 idx = charString.indexOf(this.current()); 370 } 371 return t; 372 } 373 return ''; 374 }; 375 _proto._extractRegex = function _extractRegex(regex) { 376 var matches = this.currentStr().match(regex); 377 if (!matches) { 378 return null; 379 } 380 381 // Move forward whatever was matched 382 this.forwardN(matches[0].length); 383 return matches; 384 }; 385 _proto.isFinished = function isFinished() { 386 return this.index >= this.len; 387 }; 388 _proto.forwardN = function forwardN(n) { 389 for (var i = 0; i < n; i++) { 390 this.forward(); 391 } 392 }; 393 _proto.forward = function forward() { 394 this.index++; 395 if (this.previous() === '\n') { 396 this.lineno++; 397 this.colno = 0; 398 } else { 399 this.colno++; 400 } 401 }; 402 _proto.backN = function backN(n) { 403 for (var i = 0; i < n; i++) { 404 this.back(); 405 } 406 }; 407 _proto.back = function back() { 408 this.index--; 409 if (this.current() === '\n') { 410 this.lineno--; 411 var idx = this.src.lastIndexOf('\n', this.index - 1); 412 if (idx === -1) { 413 this.colno = this.index; 414 } else { 415 this.colno = this.index - idx; 416 } 417 } else { 418 this.colno--; 419 } 420 } 421 422 // current returns current character 423 ; 424 _proto.current = function current() { 425 if (!this.isFinished()) { 426 return this.str.charAt(this.index); 427 } 428 return ''; 429 } 430 431 // currentStr returns what's left of the unparsed string 432 ; 433 _proto.currentStr = function currentStr() { 434 if (!this.isFinished()) { 435 return this.str.substr(this.index); 436 } 437 return ''; 438 }; 439 _proto.previous = function previous() { 440 return this.str.charAt(this.index - 1); 441 }; 442 return Tokenizer; 443}(); 444module.exports = { 445 lex: function lex(src, opts) { 446 return new Tokenizer(src, opts); 447 }, 448 TOKEN_STRING: TOKEN_STRING, 449 TOKEN_WHITESPACE: TOKEN_WHITESPACE, 450 TOKEN_DATA: TOKEN_DATA, 451 TOKEN_BLOCK_START: TOKEN_BLOCK_START, 452 TOKEN_BLOCK_END: TOKEN_BLOCK_END, 453 TOKEN_VARIABLE_START: TOKEN_VARIABLE_START, 454 TOKEN_VARIABLE_END: TOKEN_VARIABLE_END, 455 TOKEN_COMMENT: TOKEN_COMMENT, 456 TOKEN_LEFT_PAREN: TOKEN_LEFT_PAREN, 457 TOKEN_RIGHT_PAREN: TOKEN_RIGHT_PAREN, 458 TOKEN_LEFT_BRACKET: TOKEN_LEFT_BRACKET, 459 TOKEN_RIGHT_BRACKET: TOKEN_RIGHT_BRACKET, 460 TOKEN_LEFT_CURLY: TOKEN_LEFT_CURLY, 461 TOKEN_RIGHT_CURLY: TOKEN_RIGHT_CURLY, 462 TOKEN_OPERATOR: TOKEN_OPERATOR, 463 TOKEN_COMMA: TOKEN_COMMA, 464 TOKEN_COLON: TOKEN_COLON, 465 TOKEN_TILDE: TOKEN_TILDE, 466 TOKEN_PIPE: TOKEN_PIPE, 467 TOKEN_INT: TOKEN_INT, 468 TOKEN_FLOAT: TOKEN_FLOAT, 469 TOKEN_BOOLEAN: TOKEN_BOOLEAN, 470 TOKEN_NONE: TOKEN_NONE, 471 TOKEN_SYMBOL: TOKEN_SYMBOL, 472 TOKEN_SPECIAL: TOKEN_SPECIAL, 473 TOKEN_REGEX: TOKEN_REGEX 474};