|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682 |
- ;(function (clarinet) {
- "use strict";
-
- // non node-js needs to set clarinet debug on root
- var env =(typeof process === 'object' && process.env)
- ? process.env
- : self;
-
- clarinet.parser = function (opt) { return new CParser(opt);};
- clarinet.CParser = CParser;
- clarinet.CStream = CStream;
- clarinet.createStream = createStream;
- clarinet.MAX_BUFFER_LENGTH = 64 * 1024;
- clarinet.DEBUG = (env.CDEBUG==='debug');
- clarinet.INFO = (env.CDEBUG==='debug' || env.CDEBUG==='info');
- clarinet.EVENTS =
- [ "value"
- , "string"
- , "key"
- , "openobject"
- , "closeobject"
- , "openarray"
- , "closearray"
- , "error"
- , "end"
- , "ready"
- ];
-
- var buffers = {
- textNode: undefined,
- numberNode: ""
- }
- , streamWraps = clarinet.EVENTS.filter(function (ev) {
- return ev !== "error" && ev !== "end";
- })
- , S = 0
- , Stream
- ;
-
- clarinet.STATE =
- { BEGIN : S++
- , VALUE : S++ // general stuff
- , OPEN_OBJECT : S++ // {
- , CLOSE_OBJECT : S++ // }
- , OPEN_ARRAY : S++ // [
- , CLOSE_ARRAY : S++ // ]
- , TEXT_ESCAPE : S++ // \ stuff
- , STRING : S++ // ""
- , BACKSLASH : S++
- , END : S++ // No more stack
- , OPEN_KEY : S++ // , "a"
- , CLOSE_KEY : S++ // :
- , TRUE : S++ // r
- , TRUE2 : S++ // u
- , TRUE3 : S++ // e
- , FALSE : S++ // a
- , FALSE2 : S++ // l
- , FALSE3 : S++ // s
- , FALSE4 : S++ // e
- , NULL : S++ // u
- , NULL2 : S++ // l
- , NULL3 : S++ // l
- , NUMBER_DECIMAL_POINT : S++ // .
- , NUMBER_DIGIT : S++ // [0-9]
- };
-
- for (var s_ in clarinet.STATE) clarinet.STATE[clarinet.STATE[s_]] = s_;
-
- // switcharoo
- S = clarinet.STATE;
-
- const Char = {
- tab : 0x09, // \t
- lineFeed : 0x0A, // \n
- carriageReturn : 0x0D, // \r
- space : 0x20, // " "
-
- doubleQuote : 0x22, // "
- plus : 0x2B, // +
- comma : 0x2C, // ,
- minus : 0x2D, // -
- period : 0x2E, // .
-
- _0 : 0x30, // 0
- _9 : 0x39, // 9
-
- colon : 0x3A, // :
-
- E : 0x45, // E
-
- openBracket : 0x5B, // [
- backslash : 0x5C, // \
- closeBracket : 0x5D, // ]
-
- a : 0x61, // a
- b : 0x62, // b
- e : 0x65, // e
- f : 0x66, // f
- l : 0x6C, // l
- n : 0x6E, // n
- r : 0x72, // r
- s : 0x73, // s
- t : 0x74, // t
- u : 0x75, // u
-
- openBrace : 0x7B, // {
- closeBrace : 0x7D, // }
- }
-
- if (!Object.create) {
- Object.create = function (o) {
- function f () { this["__proto__"] = o; }
- f.prototype = o;
- return new f;
- };
- }
-
- if (!Object.getPrototypeOf) {
- Object.getPrototypeOf = function (o) {
- return o["__proto__"];
- };
- }
-
- if (!Object.keys) {
- Object.keys = function (o) {
- var a = [];
- for (var i in o) if (o.hasOwnProperty(i)) a.push(i);
- return a;
- };
- }
-
- function checkBufferLength (parser) {
- var maxAllowed = Math.max(clarinet.MAX_BUFFER_LENGTH, 10)
- , maxActual = 0
- ;
- for (var buffer in buffers) {
- var len = parser[buffer] === undefined ? 0 : parser[buffer].length;
- if (len > maxAllowed) {
- switch (buffer) {
- case "text":
- closeText(parser);
- break;
-
- default:
- error(parser, "Max buffer length exceeded: "+ buffer);
- }
- }
- maxActual = Math.max(maxActual, len);
- }
- parser.bufferCheckPosition = (clarinet.MAX_BUFFER_LENGTH - maxActual)
- + parser.position;
- }
-
- function clearBuffers (parser) {
- for (var buffer in buffers) {
- parser[buffer] = buffers[buffer];
- }
- }
-
- var stringTokenPattern = /[\\"\n]/g;
-
- function CParser (opt) {
- if (!(this instanceof CParser)) return new CParser (opt);
-
- var parser = this;
- clearBuffers(parser);
- parser.bufferCheckPosition = clarinet.MAX_BUFFER_LENGTH;
- parser.q = parser.c = parser.p = "";
- parser.opt = opt || {};
- parser.closed = parser.closedRoot = parser.sawRoot = false;
- parser.tag = parser.error = null;
- parser.state = S.BEGIN;
- parser.stack = new Array();
- // mostly just for error reporting
- parser.position = parser.column = 0;
- parser.line = 1;
- parser.slashed = false;
- parser.unicodeI = 0;
- parser.unicodeS = null;
- parser.depth = 0;
- emit(parser, "onready");
- }
-
- CParser.prototype =
- { end : function () { end(this); }
- , write : write
- , resume : function () { this.error = null; return this; }
- , close : function () { return this.write(null); }
- };
-
- try { Stream = require("stream").Stream; }
- catch (ex) { Stream = function () {}; }
-
- function createStream (opt) { return new CStream(opt); }
-
- function CStream (opt) {
- if (!(this instanceof CStream)) return new CStream(opt);
-
- this._parser = new CParser(opt);
- this.writable = true;
- this.readable = true;
-
- //var Buffer = this.Buffer || function Buffer () {}; // if we don't have Buffers, fake it so we can do `var instanceof Buffer` and not throw an error
- this.bytes_remaining = 0; // number of bytes remaining in multi byte utf8 char to read after split boundary
- this.bytes_in_sequence = 0; // bytes in multi byte utf8 char to read
- this.temp_buffs = { "2": new Buffer(2), "3": new Buffer(3), "4": new Buffer(4) }; // for rebuilding chars split before boundary is reached
- this.string = '';
-
- var me = this;
- Stream.apply(me);
-
- this._parser.onend = function () { me.emit("end"); };
- this._parser.onerror = function (er) {
- me.emit("error", er);
- me._parser.error = null;
- };
-
- streamWraps.forEach(function (ev) {
- Object.defineProperty(me, "on" + ev,
- { get : function () { return me._parser["on" + ev]; }
- , set : function (h) {
- if (!h) {
- me.removeAllListeners(ev);
- me._parser["on"+ev] = h;
- return h;
- }
- me.on(ev, h);
- }
- , enumerable : true
- , configurable : false
- });
- });
- }
-
- CStream.prototype = Object.create(Stream.prototype,
- { constructor: { value: CStream } });
-
- CStream.prototype.write = function (data) {
- data = new Buffer(data);
- for (var i = 0; i < data.length; i++) {
- var n = data[i];
-
- // check for carry over of a multi byte char split between data chunks
- // & fill temp buffer it with start of this data chunk up to the boundary limit set in the last iteration
- if (this.bytes_remaining > 0) {
- for (var j = 0; j < this.bytes_remaining; j++) {
- this.temp_buffs[this.bytes_in_sequence][this.bytes_in_sequence - this.bytes_remaining + j] = data[j];
- }
- this.string = this.temp_buffs[this.bytes_in_sequence].toString();
- this.bytes_in_sequence = this.bytes_remaining = 0;
-
- // move iterator forward by number of byte read during sequencing
- i = i + j - 1;
-
- // pass data to parser and move forward to parse rest of data
- this._parser.write(this.string);
- this.emit("data", this.string);
- continue;
- }
-
- // if no remainder bytes carried over, parse multi byte (>=128) chars one at a time
- if (this.bytes_remaining === 0 && n >= 128) {
- if ((n >= 194) && (n <= 223)) this.bytes_in_sequence = 2;
- if ((n >= 224) && (n <= 239)) this.bytes_in_sequence = 3;
- if ((n >= 240) && (n <= 244)) this.bytes_in_sequence = 4;
- if ((this.bytes_in_sequence + i) > data.length) { // if bytes needed to complete char fall outside data length, we have a boundary split
-
- for (var k = 0; k <= (data.length - 1 - i); k++) {
- this.temp_buffs[this.bytes_in_sequence][k] = data[i + k]; // fill temp data of correct size with bytes available in this chunk
- }
- this.bytes_remaining = (i + this.bytes_in_sequence) - data.length;
-
- // immediately return as we need another chunk to sequence the character
- return true;
- } else {
- this.string = data.slice(i, (i + this.bytes_in_sequence)).toString();
- i = i + this.bytes_in_sequence - 1;
-
- this._parser.write(this.string);
- this.emit("data", this.string);
- continue;
- }
- }
-
- // is there a range of characters that are immediately parsable?
- for (var p = i; p < data.length; p++) {
- if (data[p] >= 128) break;
- }
- this.string = data.slice(i, p).toString();
- this._parser.write(this.string);
- this.emit("data", this.string);
- i = p - 1;
-
- // handle any remaining characters using multibyte logic
- continue;
- }
- };
-
- CStream.prototype.end = function (chunk) {
- if (chunk && chunk.length) this._parser.write(chunk.toString());
- this._parser.end();
- return true;
- };
-
- CStream.prototype.on = function (ev, handler) {
- var me = this;
- if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) {
- me._parser["on"+ev] = function () {
- var args = arguments.length === 1 ? [arguments[0]]
- : Array.apply(null, arguments);
- args.splice(0, 0, ev);
- me.emit.apply(me, args);
- };
- }
- return Stream.prototype.on.call(me, ev, handler);
- };
-
- CStream.prototype.destroy = function () {
- clearBuffers(this._parser);
- this.emit("close");
- };
-
- function emit(parser, event, data) {
- if(clarinet.INFO) console.log('-- emit', event, data);
- if (parser[event]) parser[event](data);
- }
-
- function emitNode(parser, event, data) {
- closeValue(parser);
- emit(parser, event, data);
- }
-
- function closeValue(parser, event) {
- parser.textNode = textopts(parser.opt, parser.textNode);
- if (parser.textNode !== undefined) {
- emit(parser, (event ? event : "onvalue"), parser.textNode);
- }
- parser.textNode = undefined;
- }
-
- function closeNumber(parser) {
- if (parser.numberNode)
- emit(parser, "onvalue", parseFloat(parser.numberNode));
- parser.numberNode = "";
- }
-
- function textopts (opt, text) {
- if (text === undefined) {
- return text;
- }
- if (opt.trim) text = text.trim();
- if (opt.normalize) text = text.replace(/\s+/g, " ");
- return text;
- }
-
- function error (parser, er) {
- closeValue(parser);
- er += "\nLine: "+parser.line+
- "\nColumn: "+parser.column+
- "\nChar: "+parser.c;
- er = new Error(er);
- parser.error = er;
- emit(parser, "onerror", er);
- return parser;
- }
-
- function end(parser) {
- if (parser.state !== S.VALUE || parser.depth !== 0)
- error(parser, "Unexpected end");
-
- closeValue(parser);
- parser.c = "";
- parser.closed = true;
- emit(parser, "onend");
- CParser.call(parser, parser.opt);
- return parser;
- }
-
- function isWhitespace(c) {
- return c === Char.carriageReturn || c === Char.lineFeed || c === Char.space || c === Char.tab;
- }
-
- function write (chunk) {
- var parser = this;
- if (this.error) throw this.error;
- if (parser.closed) return error(parser,
- "Cannot write after close. Assign an onready handler.");
- if (chunk === null) return end(parser);
- var i = 0, c = chunk.charCodeAt(0), p = parser.p;
- if (clarinet.DEBUG) console.log('write -> [' + chunk + ']');
- while (c) {
- p = c;
- parser.c = c = chunk.charCodeAt(i++);
- // if chunk doesnt have next, like streaming char by char
- // this way we need to check if previous is really previous
- // if not we need to reset to what the parser says is the previous
- // from buffer
- if(p !== c ) parser.p = p;
- else p = parser.p;
-
- if(!c) break;
-
- if (clarinet.DEBUG) console.log(i,c,clarinet.STATE[parser.state]);
- parser.position ++;
- if (c === Char.lineFeed) {
- parser.line ++;
- parser.column = 0;
- } else parser.column ++;
- switch (parser.state) {
-
- case S.BEGIN:
- if (c === Char.openBrace) parser.state = S.OPEN_OBJECT;
- else if (c === Char.openBracket) parser.state = S.OPEN_ARRAY;
- else if (!isWhitespace(c))
- error(parser, "Non-whitespace before {[.");
- continue;
-
- case S.OPEN_KEY:
- case S.OPEN_OBJECT:
- if (isWhitespace(c)) continue;
- if(parser.state === S.OPEN_KEY) parser.stack.push(S.CLOSE_KEY);
- else {
- if(c === Char.closeBrace) {
- emit(parser, 'onopenobject');
- this.depth++;
- emit(parser, 'oncloseobject');
- this.depth--;
- parser.state = parser.stack.pop() || S.VALUE;
- continue;
- } else parser.stack.push(S.CLOSE_OBJECT);
- }
- if(c === Char.doubleQuote) parser.state = S.STRING;
- else error(parser, "Malformed object key should start with \"");
- continue;
-
- case S.CLOSE_KEY:
- case S.CLOSE_OBJECT:
- if (isWhitespace(c)) continue;
- var event = (parser.state === S.CLOSE_KEY) ? 'key' : 'object';
- if(c === Char.colon) {
- if(parser.state === S.CLOSE_OBJECT) {
- parser.stack.push(S.CLOSE_OBJECT);
- closeValue(parser, 'onopenobject');
- this.depth++;
- } else closeValue(parser, 'onkey');
- parser.state = S.VALUE;
- } else if (c === Char.closeBrace) {
- emitNode(parser, 'oncloseobject');
- this.depth--;
- parser.state = parser.stack.pop() || S.VALUE;
- } else if(c === Char.comma) {
- if(parser.state === S.CLOSE_OBJECT)
- parser.stack.push(S.CLOSE_OBJECT);
- closeValue(parser);
- parser.state = S.OPEN_KEY;
- } else error(parser, 'Bad object');
- continue;
-
- case S.OPEN_ARRAY: // after an array there always a value
- case S.VALUE:
- if (isWhitespace(c)) continue;
- if(parser.state===S.OPEN_ARRAY) {
- emit(parser, 'onopenarray');
- this.depth++;
- parser.state = S.VALUE;
- if(c === Char.closeBracket) {
- emit(parser, 'onclosearray');
- this.depth--;
- parser.state = parser.stack.pop() || S.VALUE;
- continue;
- } else {
- parser.stack.push(S.CLOSE_ARRAY);
- }
- }
- if(c === Char.doubleQuote) parser.state = S.STRING;
- else if(c === Char.openBrace) parser.state = S.OPEN_OBJECT;
- else if(c === Char.openBracket) parser.state = S.OPEN_ARRAY;
- else if(c === Char.t) parser.state = S.TRUE;
- else if(c === Char.f) parser.state = S.FALSE;
- else if(c === Char.n) parser.state = S.NULL;
- else if(c === Char.minus) { // keep and continue
- parser.numberNode += "-";
- } else if(Char._0 <= c && c <= Char._9) {
- parser.numberNode += String.fromCharCode(c);
- parser.state = S.NUMBER_DIGIT;
- } else error(parser, "Bad value");
- continue;
-
- case S.CLOSE_ARRAY:
- if(c === Char.comma) {
- parser.stack.push(S.CLOSE_ARRAY);
- closeValue(parser, 'onvalue');
- parser.state = S.VALUE;
- } else if (c === Char.closeBracket) {
- emitNode(parser, 'onclosearray');
- this.depth--;
- parser.state = parser.stack.pop() || S.VALUE;
- } else if (isWhitespace(c))
- continue;
- else error(parser, 'Bad array');
- continue;
-
- case S.STRING:
- if (parser.textNode === undefined) {
- parser.textNode = "";
- }
-
- // thanks thejh, this is an about 50% performance improvement.
- var starti = i-1
- , slashed = parser.slashed
- , unicodeI = parser.unicodeI
- ;
- STRING_BIGLOOP: while (true) {
- if (clarinet.DEBUG)
- console.log(i,c,clarinet.STATE[parser.state]
- ,slashed);
- // zero means "no unicode active". 1-4 mean "parse some more". end after 4.
- while (unicodeI > 0) {
- parser.unicodeS += String.fromCharCode(c);
- c = chunk.charCodeAt(i++);
- parser.position++;
- if (unicodeI === 4) {
- // TODO this might be slow? well, probably not used too often anyway
- parser.textNode += String.fromCharCode(parseInt(parser.unicodeS, 16));
- unicodeI = 0;
- starti = i-1;
- } else {
- unicodeI++;
- }
- // we can just break here: no stuff we skipped that still has to be sliced out or so
- if (!c) break STRING_BIGLOOP;
- }
- if (c === Char.doubleQuote && !slashed) {
- parser.state = parser.stack.pop() || S.VALUE;
- parser.textNode += chunk.substring(starti, i-1);
- parser.position += i - 1 - starti;
- break;
- }
- if (c === Char.backslash && !slashed) {
- slashed = true;
- parser.textNode += chunk.substring(starti, i-1);
- parser.position += i - 1 - starti;
- c = chunk.charCodeAt(i++);
- parser.position++;
- if (!c) break;
- }
- if (slashed) {
- slashed = false;
- if (c === Char.n) { parser.textNode += '\n'; }
- else if (c === Char.r) { parser.textNode += '\r'; }
- else if (c === Char.t) { parser.textNode += '\t'; }
- else if (c === Char.f) { parser.textNode += '\f'; }
- else if (c === Char.b) { parser.textNode += '\b'; }
- else if (c === Char.u) {
- // \uxxxx. meh!
- unicodeI = 1;
- parser.unicodeS = '';
- } else {
- parser.textNode += String.fromCharCode(c);
- }
- c = chunk.charCodeAt(i++);
- parser.position++;
- starti = i-1;
- if (!c) break;
- else continue;
- }
-
- stringTokenPattern.lastIndex = i;
- var reResult = stringTokenPattern.exec(chunk);
- if (reResult === null) {
- i = chunk.length+1;
- parser.textNode += chunk.substring(starti, i-1);
- parser.position += i - 1 - starti;
- break;
- }
- i = reResult.index+1;
- c = chunk.charCodeAt(reResult.index);
- if (!c) {
- parser.textNode += chunk.substring(starti, i-1);
- parser.position += i - 1 - starti;
- break;
- }
- }
- parser.slashed = slashed;
- parser.unicodeI = unicodeI;
- continue;
-
- case S.TRUE:
- if (c === Char.r) parser.state = S.TRUE2;
- else error(parser, 'Invalid true started with t'+ c);
- continue;
-
- case S.TRUE2:
- if (c === Char.u) parser.state = S.TRUE3;
- else error(parser, 'Invalid true started with tr'+ c);
- continue;
-
- case S.TRUE3:
- if(c === Char.e) {
- emit(parser, "onvalue", true);
- parser.state = parser.stack.pop() || S.VALUE;
- } else error(parser, 'Invalid true started with tru'+ c);
- continue;
-
- case S.FALSE:
- if (c === Char.a) parser.state = S.FALSE2;
- else error(parser, 'Invalid false started with f'+ c);
- continue;
-
- case S.FALSE2:
- if (c === Char.l) parser.state = S.FALSE3;
- else error(parser, 'Invalid false started with fa'+ c);
- continue;
-
- case S.FALSE3:
- if (c === Char.s) parser.state = S.FALSE4;
- else error(parser, 'Invalid false started with fal'+ c);
- continue;
-
- case S.FALSE4:
- if (c === Char.e) {
- emit(parser, "onvalue", false);
- parser.state = parser.stack.pop() || S.VALUE;
- } else error(parser, 'Invalid false started with fals'+ c);
- continue;
-
- case S.NULL:
- if (c === Char.u) parser.state = S.NULL2;
- else error(parser, 'Invalid null started with n'+ c);
- continue;
-
- case S.NULL2:
- if (c === Char.l) parser.state = S.NULL3;
- else error(parser, 'Invalid null started with nu'+ c);
- continue;
-
- case S.NULL3:
- if(c === Char.l) {
- emit(parser, "onvalue", null);
- parser.state = parser.stack.pop() || S.VALUE;
- } else error(parser, 'Invalid null started with nul'+ c);
- continue;
-
- case S.NUMBER_DECIMAL_POINT:
- if(c === Char.period) {
- parser.numberNode += ".";
- parser.state = S.NUMBER_DIGIT;
- } else error(parser, 'Leading zero not followed by .');
- continue;
-
- case S.NUMBER_DIGIT:
- if(Char._0 <= c && c <= Char._9) parser.numberNode += String.fromCharCode(c);
- else if (c === Char.period) {
- if(parser.numberNode.indexOf('.')!==-1)
- error(parser, 'Invalid number has two dots');
- parser.numberNode += ".";
- } else if (c === Char.e || c === Char.E) {
- if(parser.numberNode.indexOf('e')!==-1 ||
- parser.numberNode.indexOf('E')!==-1 )
- error(parser, 'Invalid number has two exponential');
- parser.numberNode += "e";
- } else if (c === Char.plus || c === Char.minus) {
- if(!(p === Char.e || p === Char.E))
- error(parser, 'Invalid symbol in number');
- parser.numberNode += String.fromCharCode(c);
- } else {
- closeNumber(parser);
- i--; // go back one
- parser.state = parser.stack.pop() || S.VALUE;
- }
- continue;
-
- default:
- error(parser, "Unknown state: " + parser.state);
- }
- }
- if (parser.position >= parser.bufferCheckPosition)
- checkBufferLength(parser);
- return parser;
- }
-
- })(typeof exports === "undefined" ? clarinet = {} : exports);
|