123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542 |
- 'use strict';
-
- var lib = require('./lib');
-
- var whitespaceChars = " \n\t\r\xA0";
- var delimChars = '()[]{}%*-+~/#,:|.<>=!';
- var intChars = '0123456789';
- var BLOCK_START = '{%';
- var BLOCK_END = '%}';
- var VARIABLE_START = '{{';
- var VARIABLE_END = '}}';
- var COMMENT_START = '{#';
- var COMMENT_END = '#}';
- var TOKEN_STRING = 'string';
- var TOKEN_WHITESPACE = 'whitespace';
- var TOKEN_DATA = 'data';
- var TOKEN_BLOCK_START = 'block-start';
- var TOKEN_BLOCK_END = 'block-end';
- var TOKEN_VARIABLE_START = 'variable-start';
- var TOKEN_VARIABLE_END = 'variable-end';
- var TOKEN_COMMENT = 'comment';
- var TOKEN_LEFT_PAREN = 'left-paren';
- var TOKEN_RIGHT_PAREN = 'right-paren';
- var TOKEN_LEFT_BRACKET = 'left-bracket';
- var TOKEN_RIGHT_BRACKET = 'right-bracket';
- var TOKEN_LEFT_CURLY = 'left-curly';
- var TOKEN_RIGHT_CURLY = 'right-curly';
- var TOKEN_OPERATOR = 'operator';
- var TOKEN_COMMA = 'comma';
- var TOKEN_COLON = 'colon';
- var TOKEN_TILDE = 'tilde';
- var TOKEN_PIPE = 'pipe';
- var TOKEN_INT = 'int';
- var TOKEN_FLOAT = 'float';
- var TOKEN_BOOLEAN = 'boolean';
- var TOKEN_NONE = 'none';
- var TOKEN_SYMBOL = 'symbol';
- var TOKEN_SPECIAL = 'special';
- var TOKEN_REGEX = 'regex';
-
- function token(type, value, lineno, colno) {
- return {
- type: type,
- value: value,
- lineno: lineno,
- colno: colno
- };
- }
-
- var Tokenizer = /*#__PURE__*/function () {
- function Tokenizer(str, opts) {
- this.str = str;
- this.index = 0;
- this.len = str.length;
- this.lineno = 0;
- this.colno = 0;
- this.in_code = false;
- opts = opts || {};
- var tags = opts.tags || {};
- this.tags = {
- BLOCK_START: tags.blockStart || BLOCK_START,
- BLOCK_END: tags.blockEnd || BLOCK_END,
- VARIABLE_START: tags.variableStart || VARIABLE_START,
- VARIABLE_END: tags.variableEnd || VARIABLE_END,
- COMMENT_START: tags.commentStart || COMMENT_START,
- COMMENT_END: tags.commentEnd || COMMENT_END
- };
- this.trimBlocks = !!opts.trimBlocks;
- this.lstripBlocks = !!opts.lstripBlocks;
- }
-
- var _proto = Tokenizer.prototype;
-
- _proto.nextToken = function nextToken() {
- var lineno = this.lineno;
- var colno = this.colno;
- var tok;
-
- if (this.in_code) {
- // Otherwise, if we are in a block parse it as code
- var cur = this.current();
-
- if (this.isFinished()) {
- // We have nothing else to parse
- return null;
- } else if (cur === '"' || cur === '\'') {
- // We've hit a string
- return token(TOKEN_STRING, this._parseString(cur), lineno, colno);
- } else if (tok = this._extract(whitespaceChars)) {
- // We hit some whitespace
- return token(TOKEN_WHITESPACE, tok, lineno, colno);
- } else if ((tok = this._extractString(this.tags.BLOCK_END)) || (tok = this._extractString('-' + this.tags.BLOCK_END))) {
- // Special check for the block end tag
- //
- // It is a requirement that start and end tags are composed of
- // delimiter characters (%{}[] etc), and our code always
- // breaks on delimiters so we can assume the token parsing
- // doesn't consume these elsewhere
- this.in_code = false;
-
- if (this.trimBlocks) {
- cur = this.current();
-
- if (cur === '\n') {
- // Skip newline
- this.forward();
- } else if (cur === '\r') {
- // Skip CRLF newline
- this.forward();
- cur = this.current();
-
- if (cur === '\n') {
- this.forward();
- } else {
- // Was not a CRLF, so go back
- this.back();
- }
- }
- }
-
- return token(TOKEN_BLOCK_END, tok, lineno, colno);
- } else if ((tok = this._extractString(this.tags.VARIABLE_END)) || (tok = this._extractString('-' + this.tags.VARIABLE_END))) {
- // Special check for variable end tag (see above)
- this.in_code = false;
- return token(TOKEN_VARIABLE_END, tok, lineno, colno);
- } else if (cur === 'r' && this.str.charAt(this.index + 1) === '/') {
- // Skip past 'r/'.
- this.forwardN(2); // Extract until the end of the regex -- / ends it, \/ does not.
-
- var regexBody = '';
-
- while (!this.isFinished()) {
- if (this.current() === '/' && this.previous() !== '\\') {
- this.forward();
- break;
- } else {
- regexBody += this.current();
- this.forward();
- }
- } // Check for flags.
- // The possible flags are according to https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/RegExp)
-
-
- var POSSIBLE_FLAGS = ['g', 'i', 'm', 'y'];
- var regexFlags = '';
-
- while (!this.isFinished()) {
- var isCurrentAFlag = POSSIBLE_FLAGS.indexOf(this.current()) !== -1;
-
- if (isCurrentAFlag) {
- regexFlags += this.current();
- this.forward();
- } else {
- break;
- }
- }
-
- return token(TOKEN_REGEX, {
- body: regexBody,
- flags: regexFlags
- }, lineno, colno);
- } else if (delimChars.indexOf(cur) !== -1) {
- // We've hit a delimiter (a special char like a bracket)
- this.forward();
- var complexOps = ['==', '===', '!=', '!==', '<=', '>=', '//', '**'];
- var curComplex = cur + this.current();
- var type;
-
- if (lib.indexOf(complexOps, curComplex) !== -1) {
- this.forward();
- cur = curComplex; // See if this is a strict equality/inequality comparator
-
- if (lib.indexOf(complexOps, curComplex + this.current()) !== -1) {
- cur = curComplex + this.current();
- this.forward();
- }
- }
-
- switch (cur) {
- case '(':
- type = TOKEN_LEFT_PAREN;
- break;
-
- case ')':
- type = TOKEN_RIGHT_PAREN;
- break;
-
- case '[':
- type = TOKEN_LEFT_BRACKET;
- break;
-
- case ']':
- type = TOKEN_RIGHT_BRACKET;
- break;
-
- case '{':
- type = TOKEN_LEFT_CURLY;
- break;
-
- case '}':
- type = TOKEN_RIGHT_CURLY;
- break;
-
- case ',':
- type = TOKEN_COMMA;
- break;
-
- case ':':
- type = TOKEN_COLON;
- break;
-
- case '~':
- type = TOKEN_TILDE;
- break;
-
- case '|':
- type = TOKEN_PIPE;
- break;
-
- default:
- type = TOKEN_OPERATOR;
- }
-
- return token(type, cur, lineno, colno);
- } else {
- // We are not at whitespace or a delimiter, so extract the
- // text and parse it
- tok = this._extractUntil(whitespaceChars + delimChars);
-
- if (tok.match(/^[-+]?[0-9]+$/)) {
- if (this.current() === '.') {
- this.forward();
-
- var dec = this._extract(intChars);
-
- return token(TOKEN_FLOAT, tok + '.' + dec, lineno, colno);
- } else {
- return token(TOKEN_INT, tok, lineno, colno);
- }
- } else if (tok.match(/^(true|false)$/)) {
- return token(TOKEN_BOOLEAN, tok, lineno, colno);
- } else if (tok === 'none') {
- return token(TOKEN_NONE, tok, lineno, colno);
- /*
- * Added to make the test `null is null` evaluate truthily.
- * Otherwise, Nunjucks will look up null in the context and
- * return `undefined`, which is not what we want. This *may* have
- * consequences is someone is using null in their templates as a
- * variable.
- */
- } else if (tok === 'null') {
- return token(TOKEN_NONE, tok, lineno, colno);
- } else if (tok) {
- return token(TOKEN_SYMBOL, tok, lineno, colno);
- } else {
- throw new Error('Unexpected value while parsing: ' + tok);
- }
- }
- } else {
- // Parse out the template text, breaking on tag
- // delimiters because we need to look for block/variable start
- // tags (don't use the full delimChars for optimization)
- var beginChars = this.tags.BLOCK_START.charAt(0) + this.tags.VARIABLE_START.charAt(0) + this.tags.COMMENT_START.charAt(0) + this.tags.COMMENT_END.charAt(0);
-
- if (this.isFinished()) {
- return null;
- } else if ((tok = this._extractString(this.tags.BLOCK_START + '-')) || (tok = this._extractString(this.tags.BLOCK_START))) {
- this.in_code = true;
- return token(TOKEN_BLOCK_START, tok, lineno, colno);
- } else if ((tok = this._extractString(this.tags.VARIABLE_START + '-')) || (tok = this._extractString(this.tags.VARIABLE_START))) {
- this.in_code = true;
- return token(TOKEN_VARIABLE_START, tok, lineno, colno);
- } else {
- tok = '';
- var data;
- var inComment = false;
-
- if (this._matches(this.tags.COMMENT_START)) {
- inComment = true;
- tok = this._extractString(this.tags.COMMENT_START);
- } // Continually consume text, breaking on the tag delimiter
- // characters and checking to see if it's a start tag.
- //
- // We could hit the end of the template in the middle of
- // our looping, so check for the null return value from
- // _extractUntil
-
-
- while ((data = this._extractUntil(beginChars)) !== null) {
- tok += data;
-
- if ((this._matches(this.tags.BLOCK_START) || this._matches(this.tags.VARIABLE_START) || this._matches(this.tags.COMMENT_START)) && !inComment) {
- if (this.lstripBlocks && this._matches(this.tags.BLOCK_START) && this.colno > 0 && this.colno <= tok.length) {
- var lastLine = tok.slice(-this.colno);
-
- if (/^\s+$/.test(lastLine)) {
- // Remove block leading whitespace from beginning of the string
- tok = tok.slice(0, -this.colno);
-
- if (!tok.length) {
- // All data removed, collapse to avoid unnecessary nodes
- // by returning next token (block start)
- return this.nextToken();
- }
- }
- } // If it is a start tag, stop looping
-
-
- break;
- } else if (this._matches(this.tags.COMMENT_END)) {
- if (!inComment) {
- throw new Error('unexpected end of comment');
- }
-
- tok += this._extractString(this.tags.COMMENT_END);
- break;
- } else {
- // It does not match any tag, so add the character and
- // carry on
- tok += this.current();
- this.forward();
- }
- }
-
- if (data === null && inComment) {
- throw new Error('expected end of comment, got end of file');
- }
-
- return token(inComment ? TOKEN_COMMENT : TOKEN_DATA, tok, lineno, colno);
- }
- }
- };
-
- _proto._parseString = function _parseString(delimiter) {
- this.forward();
- var str = '';
-
- while (!this.isFinished() && this.current() !== delimiter) {
- var cur = this.current();
-
- if (cur === '\\') {
- this.forward();
-
- switch (this.current()) {
- case 'n':
- str += '\n';
- break;
-
- case 't':
- str += '\t';
- break;
-
- case 'r':
- str += '\r';
- break;
-
- default:
- str += this.current();
- }
-
- this.forward();
- } else {
- str += cur;
- this.forward();
- }
- }
-
- this.forward();
- return str;
- };
-
- _proto._matches = function _matches(str) {
- if (this.index + str.length > this.len) {
- return null;
- }
-
- var m = this.str.slice(this.index, this.index + str.length);
- return m === str;
- };
-
- _proto._extractString = function _extractString(str) {
- if (this._matches(str)) {
- this.forwardN(str.length);
- return str;
- }
-
- return null;
- };
-
- _proto._extractUntil = function _extractUntil(charString) {
- // Extract all non-matching chars, with the default matching set
- // to everything
- return this._extractMatching(true, charString || '');
- };
-
- _proto._extract = function _extract(charString) {
- // Extract all matching chars (no default, so charString must be
- // explicit)
- return this._extractMatching(false, charString);
- };
-
- _proto._extractMatching = function _extractMatching(breakOnMatch, charString) {
- // Pull out characters until a breaking char is hit.
- // If breakOnMatch is false, a non-matching char stops it.
- // If breakOnMatch is true, a matching char stops it.
- if (this.isFinished()) {
- return null;
- }
-
- var first = charString.indexOf(this.current()); // Only proceed if the first character doesn't meet our condition
-
- if (breakOnMatch && first === -1 || !breakOnMatch && first !== -1) {
- var t = this.current();
- this.forward(); // And pull out all the chars one at a time until we hit a
- // breaking char
-
- var idx = charString.indexOf(this.current());
-
- while ((breakOnMatch && idx === -1 || !breakOnMatch && idx !== -1) && !this.isFinished()) {
- t += this.current();
- this.forward();
- idx = charString.indexOf(this.current());
- }
-
- return t;
- }
-
- return '';
- };
-
- _proto._extractRegex = function _extractRegex(regex) {
- var matches = this.currentStr().match(regex);
-
- if (!matches) {
- return null;
- } // Move forward whatever was matched
-
-
- this.forwardN(matches[0].length);
- return matches;
- };
-
- _proto.isFinished = function isFinished() {
- return this.index >= this.len;
- };
-
- _proto.forwardN = function forwardN(n) {
- for (var i = 0; i < n; i++) {
- this.forward();
- }
- };
-
- _proto.forward = function forward() {
- this.index++;
-
- if (this.previous() === '\n') {
- this.lineno++;
- this.colno = 0;
- } else {
- this.colno++;
- }
- };
-
- _proto.backN = function backN(n) {
- for (var i = 0; i < n; i++) {
- this.back();
- }
- };
-
- _proto.back = function back() {
- this.index--;
-
- if (this.current() === '\n') {
- this.lineno--;
- var idx = this.src.lastIndexOf('\n', this.index - 1);
-
- if (idx === -1) {
- this.colno = this.index;
- } else {
- this.colno = this.index - idx;
- }
- } else {
- this.colno--;
- }
- } // current returns current character
- ;
-
- _proto.current = function current() {
- if (!this.isFinished()) {
- return this.str.charAt(this.index);
- }
-
- return '';
- } // currentStr returns what's left of the unparsed string
- ;
-
- _proto.currentStr = function currentStr() {
- if (!this.isFinished()) {
- return this.str.substr(this.index);
- }
-
- return '';
- };
-
- _proto.previous = function previous() {
- return this.str.charAt(this.index - 1);
- };
-
- return Tokenizer;
- }();
-
- module.exports = {
- lex: function lex(src, opts) {
- return new Tokenizer(src, opts);
- },
- TOKEN_STRING: TOKEN_STRING,
- TOKEN_WHITESPACE: TOKEN_WHITESPACE,
- TOKEN_DATA: TOKEN_DATA,
- TOKEN_BLOCK_START: TOKEN_BLOCK_START,
- TOKEN_BLOCK_END: TOKEN_BLOCK_END,
- TOKEN_VARIABLE_START: TOKEN_VARIABLE_START,
- TOKEN_VARIABLE_END: TOKEN_VARIABLE_END,
- TOKEN_COMMENT: TOKEN_COMMENT,
- TOKEN_LEFT_PAREN: TOKEN_LEFT_PAREN,
- TOKEN_RIGHT_PAREN: TOKEN_RIGHT_PAREN,
- TOKEN_LEFT_BRACKET: TOKEN_LEFT_BRACKET,
- TOKEN_RIGHT_BRACKET: TOKEN_RIGHT_BRACKET,
- TOKEN_LEFT_CURLY: TOKEN_LEFT_CURLY,
- TOKEN_RIGHT_CURLY: TOKEN_RIGHT_CURLY,
- TOKEN_OPERATOR: TOKEN_OPERATOR,
- TOKEN_COMMA: TOKEN_COMMA,
- TOKEN_COLON: TOKEN_COLON,
- TOKEN_TILDE: TOKEN_TILDE,
- TOKEN_PIPE: TOKEN_PIPE,
- TOKEN_INT: TOKEN_INT,
- TOKEN_FLOAT: TOKEN_FLOAT,
- TOKEN_BOOLEAN: TOKEN_BOOLEAN,
- TOKEN_NONE: TOKEN_NONE,
- TOKEN_SYMBOL: TOKEN_SYMBOL,
- TOKEN_SPECIAL: TOKEN_SPECIAL,
- TOKEN_REGEX: TOKEN_REGEX
- };
|