You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

multipart_parser.js 8.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. var Buffer = require('buffer').Buffer,
  2. s = 0,
  3. S =
  4. { PARSER_UNINITIALIZED: s++,
  5. START: s++,
  6. START_BOUNDARY: s++,
  7. HEADER_FIELD_START: s++,
  8. HEADER_FIELD: s++,
  9. HEADER_VALUE_START: s++,
  10. HEADER_VALUE: s++,
  11. HEADER_VALUE_ALMOST_DONE: s++,
  12. HEADERS_ALMOST_DONE: s++,
  13. PART_DATA_START: s++,
  14. PART_DATA: s++,
  15. PART_END: s++,
  16. END: s++
  17. },
  18. f = 1,
  19. F =
  20. { PART_BOUNDARY: f,
  21. LAST_BOUNDARY: f *= 2
  22. },
  23. LF = 10,
  24. CR = 13,
  25. SPACE = 32,
  26. HYPHEN = 45,
  27. COLON = 58,
  28. A = 97,
  29. Z = 122,
  30. lower = function(c) {
  31. return c | 0x20;
  32. };
  33. for (s in S) {
  34. exports[s] = S[s];
  35. }
  36. function MultipartParser() {
  37. this.boundary = null;
  38. this.boundaryChars = null;
  39. this.lookbehind = null;
  40. this.state = S.PARSER_UNINITIALIZED;
  41. this.index = null;
  42. this.flags = 0;
  43. }
  44. exports.MultipartParser = MultipartParser;
  45. MultipartParser.stateToString = function(stateNumber) {
  46. for (var state in S) {
  47. var number = S[state];
  48. if (number === stateNumber) return state;
  49. }
  50. };
  51. MultipartParser.prototype.initWithBoundary = function(str) {
  52. this.boundary = new Buffer(str.length+4);
  53. this.boundary.write('\r\n--', 0);
  54. this.boundary.write(str, 4);
  55. this.lookbehind = new Buffer(this.boundary.length+8);
  56. this.state = S.START;
  57. this.boundaryChars = {};
  58. for (var i = 0; i < this.boundary.length; i++) {
  59. this.boundaryChars[this.boundary[i]] = true;
  60. }
  61. };
  62. MultipartParser.prototype.write = function(buffer) {
  63. var self = this,
  64. i = 0,
  65. len = buffer.length,
  66. prevIndex = this.index,
  67. index = this.index,
  68. state = this.state,
  69. flags = this.flags,
  70. lookbehind = this.lookbehind,
  71. boundary = this.boundary,
  72. boundaryChars = this.boundaryChars,
  73. boundaryLength = this.boundary.length,
  74. boundaryEnd = boundaryLength - 1,
  75. bufferLength = buffer.length,
  76. c,
  77. cl,
  78. mark = function(name) {
  79. self[name+'Mark'] = i;
  80. },
  81. clear = function(name) {
  82. delete self[name+'Mark'];
  83. },
  84. callback = function(name, buffer, start, end) {
  85. if (start !== undefined && start === end) {
  86. return;
  87. }
  88. var callbackSymbol = 'on'+name.substr(0, 1).toUpperCase()+name.substr(1);
  89. if (callbackSymbol in self) {
  90. self[callbackSymbol](buffer, start, end);
  91. }
  92. },
  93. dataCallback = function(name, clear) {
  94. var markSymbol = name+'Mark';
  95. if (!(markSymbol in self)) {
  96. return;
  97. }
  98. if (!clear) {
  99. callback(name, buffer, self[markSymbol], buffer.length);
  100. self[markSymbol] = 0;
  101. } else {
  102. callback(name, buffer, self[markSymbol], i);
  103. delete self[markSymbol];
  104. }
  105. };
  106. for (i = 0; i < len; i++) {
  107. c = buffer[i];
  108. switch (state) {
  109. case S.PARSER_UNINITIALIZED:
  110. return i;
  111. case S.START:
  112. index = 0;
  113. state = S.START_BOUNDARY;
  114. case S.START_BOUNDARY:
  115. if (index == boundary.length - 2) {
  116. if (c == HYPHEN) {
  117. flags |= F.LAST_BOUNDARY;
  118. } else if (c != CR) {
  119. return i;
  120. }
  121. index++;
  122. break;
  123. } else if (index - 1 == boundary.length - 2) {
  124. if (flags & F.LAST_BOUNDARY && c == HYPHEN){
  125. callback('end');
  126. state = S.END;
  127. flags = 0;
  128. } else if (!(flags & F.LAST_BOUNDARY) && c == LF) {
  129. index = 0;
  130. callback('partBegin');
  131. state = S.HEADER_FIELD_START;
  132. } else {
  133. return i;
  134. }
  135. break;
  136. }
  137. if (c != boundary[index+2]) {
  138. index = -2;
  139. }
  140. if (c == boundary[index+2]) {
  141. index++;
  142. }
  143. break;
  144. case S.HEADER_FIELD_START:
  145. state = S.HEADER_FIELD;
  146. mark('headerField');
  147. index = 0;
  148. case S.HEADER_FIELD:
  149. if (c == CR) {
  150. clear('headerField');
  151. state = S.HEADERS_ALMOST_DONE;
  152. break;
  153. }
  154. index++;
  155. if (c == HYPHEN) {
  156. break;
  157. }
  158. if (c == COLON) {
  159. if (index == 1) {
  160. // empty header field
  161. return i;
  162. }
  163. dataCallback('headerField', true);
  164. state = S.HEADER_VALUE_START;
  165. break;
  166. }
  167. cl = lower(c);
  168. if (cl < A || cl > Z) {
  169. return i;
  170. }
  171. break;
  172. case S.HEADER_VALUE_START:
  173. if (c == SPACE) {
  174. break;
  175. }
  176. mark('headerValue');
  177. state = S.HEADER_VALUE;
  178. case S.HEADER_VALUE:
  179. if (c == CR) {
  180. dataCallback('headerValue', true);
  181. callback('headerEnd');
  182. state = S.HEADER_VALUE_ALMOST_DONE;
  183. }
  184. break;
  185. case S.HEADER_VALUE_ALMOST_DONE:
  186. if (c != LF) {
  187. return i;
  188. }
  189. state = S.HEADER_FIELD_START;
  190. break;
  191. case S.HEADERS_ALMOST_DONE:
  192. if (c != LF) {
  193. return i;
  194. }
  195. callback('headersEnd');
  196. state = S.PART_DATA_START;
  197. break;
  198. case S.PART_DATA_START:
  199. state = S.PART_DATA;
  200. mark('partData');
  201. case S.PART_DATA:
  202. prevIndex = index;
  203. if (index === 0) {
  204. // boyer-moore derrived algorithm to safely skip non-boundary data
  205. i += boundaryEnd;
  206. while (i < bufferLength && !(buffer[i] in boundaryChars)) {
  207. i += boundaryLength;
  208. }
  209. i -= boundaryEnd;
  210. c = buffer[i];
  211. }
  212. if (index < boundary.length) {
  213. if (boundary[index] == c) {
  214. if (index === 0) {
  215. dataCallback('partData', true);
  216. }
  217. index++;
  218. } else {
  219. index = 0;
  220. }
  221. } else if (index == boundary.length) {
  222. index++;
  223. if (c == CR) {
  224. // CR = part boundary
  225. flags |= F.PART_BOUNDARY;
  226. } else if (c == HYPHEN) {
  227. // HYPHEN = end boundary
  228. flags |= F.LAST_BOUNDARY;
  229. } else {
  230. index = 0;
  231. }
  232. } else if (index - 1 == boundary.length) {
  233. if (flags & F.PART_BOUNDARY) {
  234. index = 0;
  235. if (c == LF) {
  236. // unset the PART_BOUNDARY flag
  237. flags &= ~F.PART_BOUNDARY;
  238. callback('partEnd');
  239. callback('partBegin');
  240. state = S.HEADER_FIELD_START;
  241. break;
  242. }
  243. } else if (flags & F.LAST_BOUNDARY) {
  244. if (c == HYPHEN) {
  245. callback('partEnd');
  246. callback('end');
  247. state = S.END;
  248. flags = 0;
  249. } else {
  250. index = 0;
  251. }
  252. } else {
  253. index = 0;
  254. }
  255. }
  256. if (index > 0) {
  257. // when matching a possible boundary, keep a lookbehind reference
  258. // in case it turns out to be a false lead
  259. lookbehind[index-1] = c;
  260. } else if (prevIndex > 0) {
  261. // if our boundary turned out to be rubbish, the captured lookbehind
  262. // belongs to partData
  263. callback('partData', lookbehind, 0, prevIndex);
  264. prevIndex = 0;
  265. mark('partData');
  266. // reconsider the current character even so it interrupted the sequence
  267. // it could be the beginning of a new sequence
  268. i--;
  269. }
  270. break;
  271. case S.END:
  272. break;
  273. default:
  274. return i;
  275. }
  276. }
  277. dataCallback('headerField');
  278. dataCallback('headerValue');
  279. dataCallback('partData');
  280. this.index = index;
  281. this.state = state;
  282. this.flags = flags;
  283. return len;
  284. };
  285. MultipartParser.prototype.end = function() {
  286. var callback = function(self, name) {
  287. var callbackSymbol = 'on'+name.substr(0, 1).toUpperCase()+name.substr(1);
  288. if (callbackSymbol in self) {
  289. self[callbackSymbol]();
  290. }
  291. };
  292. if ((this.state == S.HEADER_FIELD_START && this.index === 0) ||
  293. (this.state == S.PART_DATA && this.index == this.boundary.length)) {
  294. callback(this, 'partEnd');
  295. callback(this, 'end');
  296. } else if (this.state != S.END) {
  297. return new Error('MultipartParser.end(): stream ended unexpectedly: ' + this.explain());
  298. }
  299. };
  300. MultipartParser.prototype.explain = function() {
  301. return 'state = ' + MultipartParser.stateToString(this.state);
  302. };