1'use strict';
2
3var lib = require('./lib');
4var whitespaceChars = " \n\t\r\xA0";
5var delimChars = '()[]{}%*-+~/#,:|.<>=!';
6var intChars = '0123456789';
7var BLOCK_START = '{%';
8var BLOCK_END = '%}';
9var VARIABLE_START = '{{';
10var VARIABLE_END = '}}';
11var COMMENT_START = '{#';
12var COMMENT_END = '#}';
13var TOKEN_STRING = 'string';
14var TOKEN_WHITESPACE = 'whitespace';
15var TOKEN_DATA = 'data';
16var TOKEN_BLOCK_START = 'block-start';
17var TOKEN_BLOCK_END = 'block-end';
18var TOKEN_VARIABLE_START = 'variable-start';
19var TOKEN_VARIABLE_END = 'variable-end';
20var TOKEN_COMMENT = 'comment';
21var TOKEN_LEFT_PAREN = 'left-paren';
22var TOKEN_RIGHT_PAREN = 'right-paren';
23var TOKEN_LEFT_BRACKET = 'left-bracket';
24var TOKEN_RIGHT_BRACKET = 'right-bracket';
25var TOKEN_LEFT_CURLY = 'left-curly';
26var TOKEN_RIGHT_CURLY = 'right-curly';
27var TOKEN_OPERATOR = 'operator';
28var TOKEN_COMMA = 'comma';
29var TOKEN_COLON = 'colon';
30var TOKEN_TILDE = 'tilde';
31var TOKEN_PIPE = 'pipe';
32var TOKEN_INT = 'int';
33var TOKEN_FLOAT = 'float';
34var TOKEN_BOOLEAN = 'boolean';
35var TOKEN_NONE = 'none';
36var TOKEN_SYMBOL = 'symbol';
37var TOKEN_SPECIAL = 'special';
38var TOKEN_REGEX = 'regex';
39function token(type, value, lineno, colno) {
40  return {
41    type: type,
42    value: value,
43    lineno: lineno,
44    colno: colno
45  };
46}
47var Tokenizer = /*#__PURE__*/function () {
48  function Tokenizer(str, opts) {
49    this.str = str;
50    this.index = 0;
51    this.len = str.length;
52    this.lineno = 0;
53    this.colno = 0;
54    this.in_code = false;
55    opts = opts || {};
56    var tags = opts.tags || {};
57    this.tags = {
58      BLOCK_START: tags.blockStart || BLOCK_START,
59      BLOCK_END: tags.blockEnd || BLOCK_END,
60      VARIABLE_START: tags.variableStart || VARIABLE_START,
61      VARIABLE_END: tags.variableEnd || VARIABLE_END,
62      COMMENT_START: tags.commentStart || COMMENT_START,
63      COMMENT_END: tags.commentEnd || COMMENT_END
64    };
65    this.trimBlocks = !!opts.trimBlocks;
66    this.lstripBlocks = !!opts.lstripBlocks;
67  }
68  var _proto = Tokenizer.prototype;
69  _proto.nextToken = function nextToken() {
70    var lineno = this.lineno;
71    var colno = this.colno;
72    var tok;
73    if (this.in_code) {
74      // Otherwise, if we are in a block parse it as code
75      var cur = this.current();
76      if (this.isFinished()) {
77        // We have nothing else to parse
78        return null;
79      } else if (cur === '"' || cur === '\'') {
80        // We've hit a string
81        return token(TOKEN_STRING, this._parseString(cur), lineno, colno);
82      } else if (tok = this._extract(whitespaceChars)) {
83        // We hit some whitespace
84        return token(TOKEN_WHITESPACE, tok, lineno, colno);
85      } else if ((tok = this._extractString(this.tags.BLOCK_END)) || (tok = this._extractString('-' + this.tags.BLOCK_END))) {
86        // Special check for the block end tag
87        //
88        // It is a requirement that start and end tags are composed of
89        // delimiter characters (%{}[] etc), and our code always
90        // breaks on delimiters so we can assume the token parsing
91        // doesn't consume these elsewhere
92        this.in_code = false;
93        if (this.trimBlocks) {
94          cur = this.current();
95          if (cur === '\n') {
96            // Skip newline
97            this.forward();
98          } else if (cur === '\r') {
99            // Skip CRLF newline
100            this.forward();
101            cur = this.current();
102            if (cur === '\n') {
103              this.forward();
104            } else {
105              // Was not a CRLF, so go back
106              this.back();
107            }
108          }
109        }
110        return token(TOKEN_BLOCK_END, tok, lineno, colno);
111      } else if ((tok = this._extractString(this.tags.VARIABLE_END)) || (tok = this._extractString('-' + this.tags.VARIABLE_END))) {
112        // Special check for variable end tag (see above)
113        this.in_code = false;
114        return token(TOKEN_VARIABLE_END, tok, lineno, colno);
115      } else if (cur === 'r' && this.str.charAt(this.index + 1) === '/') {
116        // Skip past 'r/'.
117        this.forwardN(2);
118
119        // Extract until the end of the regex -- / ends it, \/ does not.
120        var regexBody = '';
121        while (!this.isFinished()) {
122          if (this.current() === '/' && this.previous() !== '\\') {
123            this.forward();
124            break;
125          } else {
126            regexBody += this.current();
127            this.forward();
128          }
129        }
130
131        // Check for flags.
132        // The possible flags are according to https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/RegExp)
133        var POSSIBLE_FLAGS = ['g', 'i', 'm', 'y'];
134        var regexFlags = '';
135        while (!this.isFinished()) {
136          var isCurrentAFlag = POSSIBLE_FLAGS.indexOf(this.current()) !== -1;
137          if (isCurrentAFlag) {
138            regexFlags += this.current();
139            this.forward();
140          } else {
141            break;
142          }
143        }
144        return token(TOKEN_REGEX, {
145          body: regexBody,
146          flags: regexFlags
147        }, lineno, colno);
148      } else if (delimChars.indexOf(cur) !== -1) {
149        // We've hit a delimiter (a special char like a bracket)
150        this.forward();
151        var complexOps = ['==', '===', '!=', '!==', '<=', '>=', '//', '**'];
152        var curComplex = cur + this.current();
153        var type;
154        if (lib.indexOf(complexOps, curComplex) !== -1) {
155          this.forward();
156          cur = curComplex;
157
158          // See if this is a strict equality/inequality comparator
159          if (lib.indexOf(complexOps, curComplex + this.current()) !== -1) {
160            cur = curComplex + this.current();
161            this.forward();
162          }
163        }
164        switch (cur) {
165          case '(':
166            type = TOKEN_LEFT_PAREN;
167            break;
168          case ')':
169            type = TOKEN_RIGHT_PAREN;
170            break;
171          case '[':
172            type = TOKEN_LEFT_BRACKET;
173            break;
174          case ']':
175            type = TOKEN_RIGHT_BRACKET;
176            break;
177          case '{':
178            type = TOKEN_LEFT_CURLY;
179            break;
180          case '}':
181            type = TOKEN_RIGHT_CURLY;
182            break;
183          case ',':
184            type = TOKEN_COMMA;
185            break;
186          case ':':
187            type = TOKEN_COLON;
188            break;
189          case '~':
190            type = TOKEN_TILDE;
191            break;
192          case '|':
193            type = TOKEN_PIPE;
194            break;
195          default:
196            type = TOKEN_OPERATOR;
197        }
198        return token(type, cur, lineno, colno);
199      } else {
200        // We are not at whitespace or a delimiter, so extract the
201        // text and parse it
202        tok = this._extractUntil(whitespaceChars + delimChars);
203        if (tok.match(/^[-+]?[0-9]+$/)) {
204          if (this.current() === '.') {
205            this.forward();
206            var dec = this._extract(intChars);
207            return token(TOKEN_FLOAT, tok + '.' + dec, lineno, colno);
208          } else {
209            return token(TOKEN_INT, tok, lineno, colno);
210          }
211        } else if (tok.match(/^(true|false)$/)) {
212          return token(TOKEN_BOOLEAN, tok, lineno, colno);
213        } else if (tok === 'none') {
214          return token(TOKEN_NONE, tok, lineno, colno);
215          /*
216           * Added to make the test `null is null` evaluate truthily.
217           * Otherwise, Nunjucks will look up null in the context and
218           * return `undefined`, which is not what we want. This *may* have
219           * consequences is someone is using null in their templates as a
220           * variable.
221           */
222        } else if (tok === 'null') {
223          return token(TOKEN_NONE, tok, lineno, colno);
224        } else if (tok) {
225          return token(TOKEN_SYMBOL, tok, lineno, colno);
226        } else {
227          throw new Error('Unexpected value while parsing: ' + tok);
228        }
229      }
230    } else {
231      // Parse out the template text, breaking on tag
232      // delimiters because we need to look for block/variable start
233      // tags (don't use the full delimChars for optimization)
234      var beginChars = this.tags.BLOCK_START.charAt(0) + this.tags.VARIABLE_START.charAt(0) + this.tags.COMMENT_START.charAt(0) + this.tags.COMMENT_END.charAt(0);
235      if (this.isFinished()) {
236        return null;
237      } else if ((tok = this._extractString(this.tags.BLOCK_START + '-')) || (tok = this._extractString(this.tags.BLOCK_START))) {
238        this.in_code = true;
239        return token(TOKEN_BLOCK_START, tok, lineno, colno);
240      } else if ((tok = this._extractString(this.tags.VARIABLE_START + '-')) || (tok = this._extractString(this.tags.VARIABLE_START))) {
241        this.in_code = true;
242        return token(TOKEN_VARIABLE_START, tok, lineno, colno);
243      } else {
244        tok = '';
245        var data;
246        var inComment = false;
247        if (this._matches(this.tags.COMMENT_START)) {
248          inComment = true;
249          tok = this._extractString(this.tags.COMMENT_START);
250        }
251
252        // Continually consume text, breaking on the tag delimiter
253        // characters and checking to see if it's a start tag.
254        //
255        // We could hit the end of the template in the middle of
256        // our looping, so check for the null return value from
257        // _extractUntil
258        while ((data = this._extractUntil(beginChars)) !== null) {
259          tok += data;
260          if ((this._matches(this.tags.BLOCK_START) || this._matches(this.tags.VARIABLE_START) || this._matches(this.tags.COMMENT_START)) && !inComment) {
261            if (this.lstripBlocks && this._matches(this.tags.BLOCK_START) && this.colno > 0 && this.colno <= tok.length) {
262              var lastLine = tok.slice(-this.colno);
263              if (/^\s+$/.test(lastLine)) {
264                // Remove block leading whitespace from beginning of the string
265                tok = tok.slice(0, -this.colno);
266                if (!tok.length) {
267                  // All data removed, collapse to avoid unnecessary nodes
268                  // by returning next token (block start)
269                  return this.nextToken();
270                }
271              }
272            }
273            // If it is a start tag, stop looping
274            break;
275          } else if (this._matches(this.tags.COMMENT_END)) {
276            if (!inComment) {
277              throw new Error('unexpected end of comment');
278            }
279            tok += this._extractString(this.tags.COMMENT_END);
280            break;
281          } else {
282            // It does not match any tag, so add the character and
283            // carry on
284            tok += this.current();
285            this.forward();
286          }
287        }
288        if (data === null && inComment) {
289          throw new Error('expected end of comment, got end of file');
290        }
291        return token(inComment ? TOKEN_COMMENT : TOKEN_DATA, tok, lineno, colno);
292      }
293    }
294  };
295  _proto._parseString = function _parseString(delimiter) {
296    this.forward();
297    var str = '';
298    while (!this.isFinished() && this.current() !== delimiter) {
299      var cur = this.current();
300      if (cur === '\\') {
301        this.forward();
302        switch (this.current()) {
303          case 'n':
304            str += '\n';
305            break;
306          case 't':
307            str += '\t';
308            break;
309          case 'r':
310            str += '\r';
311            break;
312          default:
313            str += this.current();
314        }
315        this.forward();
316      } else {
317        str += cur;
318        this.forward();
319      }
320    }
321    this.forward();
322    return str;
323  };
324  _proto._matches = function _matches(str) {
325    if (this.index + str.length > this.len) {
326      return null;
327    }
328    var m = this.str.slice(this.index, this.index + str.length);
329    return m === str;
330  };
331  _proto._extractString = function _extractString(str) {
332    if (this._matches(str)) {
333      this.forwardN(str.length);
334      return str;
335    }
336    return null;
337  };
338  _proto._extractUntil = function _extractUntil(charString) {
339    // Extract all non-matching chars, with the default matching set
340    // to everything
341    return this._extractMatching(true, charString || '');
342  };
343  _proto._extract = function _extract(charString) {
344    // Extract all matching chars (no default, so charString must be
345    // explicit)
346    return this._extractMatching(false, charString);
347  };
348  _proto._extractMatching = function _extractMatching(breakOnMatch, charString) {
349    // Pull out characters until a breaking char is hit.
350    // If breakOnMatch is false, a non-matching char stops it.
351    // If breakOnMatch is true, a matching char stops it.
352
353    if (this.isFinished()) {
354      return null;
355    }
356    var first = charString.indexOf(this.current());
357
358    // Only proceed if the first character doesn't meet our condition
359    if (breakOnMatch && first === -1 || !breakOnMatch && first !== -1) {
360      var t = this.current();
361      this.forward();
362
363      // And pull out all the chars one at a time until we hit a
364      // breaking char
365      var idx = charString.indexOf(this.current());
366      while ((breakOnMatch && idx === -1 || !breakOnMatch && idx !== -1) && !this.isFinished()) {
367        t += this.current();
368        this.forward();
369        idx = charString.indexOf(this.current());
370      }
371      return t;
372    }
373    return '';
374  };
375  _proto._extractRegex = function _extractRegex(regex) {
376    var matches = this.currentStr().match(regex);
377    if (!matches) {
378      return null;
379    }
380
381    // Move forward whatever was matched
382    this.forwardN(matches[0].length);
383    return matches;
384  };
385  _proto.isFinished = function isFinished() {
386    return this.index >= this.len;
387  };
388  _proto.forwardN = function forwardN(n) {
389    for (var i = 0; i < n; i++) {
390      this.forward();
391    }
392  };
393  _proto.forward = function forward() {
394    this.index++;
395    if (this.previous() === '\n') {
396      this.lineno++;
397      this.colno = 0;
398    } else {
399      this.colno++;
400    }
401  };
402  _proto.backN = function backN(n) {
403    for (var i = 0; i < n; i++) {
404      this.back();
405    }
406  };
407  _proto.back = function back() {
408    this.index--;
409    if (this.current() === '\n') {
410      this.lineno--;
411      var idx = this.src.lastIndexOf('\n', this.index - 1);
412      if (idx === -1) {
413        this.colno = this.index;
414      } else {
415        this.colno = this.index - idx;
416      }
417    } else {
418      this.colno--;
419    }
420  }
421
422  // current returns current character
423  ;
424  _proto.current = function current() {
425    if (!this.isFinished()) {
426      return this.str.charAt(this.index);
427    }
428    return '';
429  }
430
431  // currentStr returns what's left of the unparsed string
432  ;
433  _proto.currentStr = function currentStr() {
434    if (!this.isFinished()) {
435      return this.str.substr(this.index);
436    }
437    return '';
438  };
439  _proto.previous = function previous() {
440    return this.str.charAt(this.index - 1);
441  };
442  return Tokenizer;
443}();
444module.exports = {
445  lex: function lex(src, opts) {
446    return new Tokenizer(src, opts);
447  },
448  TOKEN_STRING: TOKEN_STRING,
449  TOKEN_WHITESPACE: TOKEN_WHITESPACE,
450  TOKEN_DATA: TOKEN_DATA,
451  TOKEN_BLOCK_START: TOKEN_BLOCK_START,
452  TOKEN_BLOCK_END: TOKEN_BLOCK_END,
453  TOKEN_VARIABLE_START: TOKEN_VARIABLE_START,
454  TOKEN_VARIABLE_END: TOKEN_VARIABLE_END,
455  TOKEN_COMMENT: TOKEN_COMMENT,
456  TOKEN_LEFT_PAREN: TOKEN_LEFT_PAREN,
457  TOKEN_RIGHT_PAREN: TOKEN_RIGHT_PAREN,
458  TOKEN_LEFT_BRACKET: TOKEN_LEFT_BRACKET,
459  TOKEN_RIGHT_BRACKET: TOKEN_RIGHT_BRACKET,
460  TOKEN_LEFT_CURLY: TOKEN_LEFT_CURLY,
461  TOKEN_RIGHT_CURLY: TOKEN_RIGHT_CURLY,
462  TOKEN_OPERATOR: TOKEN_OPERATOR,
463  TOKEN_COMMA: TOKEN_COMMA,
464  TOKEN_COLON: TOKEN_COLON,
465  TOKEN_TILDE: TOKEN_TILDE,
466  TOKEN_PIPE: TOKEN_PIPE,
467  TOKEN_INT: TOKEN_INT,
468  TOKEN_FLOAT: TOKEN_FLOAT,
469  TOKEN_BOOLEAN: TOKEN_BOOLEAN,
470  TOKEN_NONE: TOKEN_NONE,
471  TOKEN_SYMBOL: TOKEN_SYMBOL,
472  TOKEN_SPECIAL: TOKEN_SPECIAL,
473  TOKEN_REGEX: TOKEN_REGEX
474};