Ohm-Management - Projektarbeit B-ME
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

string_decoder.js 8.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. 'use strict';
  2. var Buffer = require('safe-buffer').Buffer;
  3. var isEncoding = Buffer.isEncoding || function (encoding) {
  4. encoding = '' + encoding;
  5. switch (encoding && encoding.toLowerCase()) {
  6. case 'hex':case 'utf8':case 'utf-8':case 'ascii':case 'binary':case 'base64':case 'ucs2':case 'ucs-2':case 'utf16le':case 'utf-16le':case 'raw':
  7. return true;
  8. default:
  9. return false;
  10. }
  11. };
  12. function _normalizeEncoding(enc) {
  13. if (!enc) return 'utf8';
  14. var retried;
  15. while (true) {
  16. switch (enc) {
  17. case 'utf8':
  18. case 'utf-8':
  19. return 'utf8';
  20. case 'ucs2':
  21. case 'ucs-2':
  22. case 'utf16le':
  23. case 'utf-16le':
  24. return 'utf16le';
  25. case 'latin1':
  26. case 'binary':
  27. return 'latin1';
  28. case 'base64':
  29. case 'ascii':
  30. case 'hex':
  31. return enc;
  32. default:
  33. if (retried) return; // undefined
  34. enc = ('' + enc).toLowerCase();
  35. retried = true;
  36. }
  37. }
  38. };
  39. // Do not cache `Buffer.isEncoding` when checking encoding names as some
  40. // modules monkey-patch it to support additional encodings
  41. function normalizeEncoding(enc) {
  42. var nenc = _normalizeEncoding(enc);
  43. if (typeof nenc !== 'string' && (Buffer.isEncoding === isEncoding || !isEncoding(enc))) throw new Error('Unknown encoding: ' + enc);
  44. return nenc || enc;
  45. }
  46. // StringDecoder provides an interface for efficiently splitting a series of
  47. // buffers into a series of JS strings without breaking apart multi-byte
  48. // characters.
  49. exports.StringDecoder = StringDecoder;
  50. function StringDecoder(encoding) {
  51. this.encoding = normalizeEncoding(encoding);
  52. var nb;
  53. switch (this.encoding) {
  54. case 'utf16le':
  55. this.text = utf16Text;
  56. this.end = utf16End;
  57. nb = 4;
  58. break;
  59. case 'utf8':
  60. this.fillLast = utf8FillLast;
  61. nb = 4;
  62. break;
  63. case 'base64':
  64. this.text = base64Text;
  65. this.end = base64End;
  66. nb = 3;
  67. break;
  68. default:
  69. this.write = simpleWrite;
  70. this.end = simpleEnd;
  71. return;
  72. }
  73. this.lastNeed = 0;
  74. this.lastTotal = 0;
  75. this.lastChar = Buffer.allocUnsafe(nb);
  76. }
  77. StringDecoder.prototype.write = function (buf) {
  78. if (buf.length === 0) return '';
  79. var r;
  80. var i;
  81. if (this.lastNeed) {
  82. r = this.fillLast(buf);
  83. if (r === undefined) return '';
  84. i = this.lastNeed;
  85. this.lastNeed = 0;
  86. } else {
  87. i = 0;
  88. }
  89. if (i < buf.length) return r ? r + this.text(buf, i) : this.text(buf, i);
  90. return r || '';
  91. };
  92. StringDecoder.prototype.end = utf8End;
  93. // Returns only complete characters in a Buffer
  94. StringDecoder.prototype.text = utf8Text;
  95. // Attempts to complete a partial non-UTF-8 character using bytes from a Buffer
  96. StringDecoder.prototype.fillLast = function (buf) {
  97. if (this.lastNeed <= buf.length) {
  98. buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, this.lastNeed);
  99. return this.lastChar.toString(this.encoding, 0, this.lastTotal);
  100. }
  101. buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, buf.length);
  102. this.lastNeed -= buf.length;
  103. };
  104. // Checks the type of a UTF-8 byte, whether it's ASCII, a leading byte, or a
  105. // continuation byte.
  106. function utf8CheckByte(byte) {
  107. if (byte <= 0x7F) return 0;else if (byte >> 5 === 0x06) return 2;else if (byte >> 4 === 0x0E) return 3;else if (byte >> 3 === 0x1E) return 4;
  108. return -1;
  109. }
  110. // Checks at most 3 bytes at the end of a Buffer in order to detect an
  111. // incomplete multi-byte UTF-8 character. The total number of bytes (2, 3, or 4)
  112. // needed to complete the UTF-8 character (if applicable) are returned.
  113. function utf8CheckIncomplete(self, buf, i) {
  114. var j = buf.length - 1;
  115. if (j < i) return 0;
  116. var nb = utf8CheckByte(buf[j]);
  117. if (nb >= 0) {
  118. if (nb > 0) self.lastNeed = nb - 1;
  119. return nb;
  120. }
  121. if (--j < i) return 0;
  122. nb = utf8CheckByte(buf[j]);
  123. if (nb >= 0) {
  124. if (nb > 0) self.lastNeed = nb - 2;
  125. return nb;
  126. }
  127. if (--j < i) return 0;
  128. nb = utf8CheckByte(buf[j]);
  129. if (nb >= 0) {
  130. if (nb > 0) {
  131. if (nb === 2) nb = 0;else self.lastNeed = nb - 3;
  132. }
  133. return nb;
  134. }
  135. return 0;
  136. }
  137. // Validates as many continuation bytes for a multi-byte UTF-8 character as
  138. // needed or are available. If we see a non-continuation byte where we expect
  139. // one, we "replace" the validated continuation bytes we've seen so far with
  140. // UTF-8 replacement characters ('\ufffd'), to match v8's UTF-8 decoding
  141. // behavior. The continuation byte check is included three times in the case
  142. // where all of the continuation bytes for a character exist in the same buffer.
  143. // It is also done this way as a slight performance increase instead of using a
  144. // loop.
  145. function utf8CheckExtraBytes(self, buf, p) {
  146. if ((buf[0] & 0xC0) !== 0x80) {
  147. self.lastNeed = 0;
  148. return '\ufffd'.repeat(p);
  149. }
  150. if (self.lastNeed > 1 && buf.length > 1) {
  151. if ((buf[1] & 0xC0) !== 0x80) {
  152. self.lastNeed = 1;
  153. return '\ufffd'.repeat(p + 1);
  154. }
  155. if (self.lastNeed > 2 && buf.length > 2) {
  156. if ((buf[2] & 0xC0) !== 0x80) {
  157. self.lastNeed = 2;
  158. return '\ufffd'.repeat(p + 2);
  159. }
  160. }
  161. }
  162. }
  163. // Attempts to complete a multi-byte UTF-8 character using bytes from a Buffer.
  164. function utf8FillLast(buf) {
  165. var p = this.lastTotal - this.lastNeed;
  166. var r = utf8CheckExtraBytes(this, buf, p);
  167. if (r !== undefined) return r;
  168. if (this.lastNeed <= buf.length) {
  169. buf.copy(this.lastChar, p, 0, this.lastNeed);
  170. return this.lastChar.toString(this.encoding, 0, this.lastTotal);
  171. }
  172. buf.copy(this.lastChar, p, 0, buf.length);
  173. this.lastNeed -= buf.length;
  174. }
  175. // Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a
  176. // partial character, the character's bytes are buffered until the required
  177. // number of bytes are available.
  178. function utf8Text(buf, i) {
  179. var total = utf8CheckIncomplete(this, buf, i);
  180. if (!this.lastNeed) return buf.toString('utf8', i);
  181. this.lastTotal = total;
  182. var end = buf.length - (total - this.lastNeed);
  183. buf.copy(this.lastChar, 0, end);
  184. return buf.toString('utf8', i, end);
  185. }
  186. // For UTF-8, a replacement character for each buffered byte of a (partial)
  187. // character needs to be added to the output.
  188. function utf8End(buf) {
  189. var r = buf && buf.length ? this.write(buf) : '';
  190. if (this.lastNeed) return r + '\ufffd'.repeat(this.lastTotal - this.lastNeed);
  191. return r;
  192. }
  193. // UTF-16LE typically needs two bytes per character, but even if we have an even
  194. // number of bytes available, we need to check if we end on a leading/high
  195. // surrogate. In that case, we need to wait for the next two bytes in order to
  196. // decode the last character properly.
  197. function utf16Text(buf, i) {
  198. if ((buf.length - i) % 2 === 0) {
  199. var r = buf.toString('utf16le', i);
  200. if (r) {
  201. var c = r.charCodeAt(r.length - 1);
  202. if (c >= 0xD800 && c <= 0xDBFF) {
  203. this.lastNeed = 2;
  204. this.lastTotal = 4;
  205. this.lastChar[0] = buf[buf.length - 2];
  206. this.lastChar[1] = buf[buf.length - 1];
  207. return r.slice(0, -1);
  208. }
  209. }
  210. return r;
  211. }
  212. this.lastNeed = 1;
  213. this.lastTotal = 2;
  214. this.lastChar[0] = buf[buf.length - 1];
  215. return buf.toString('utf16le', i, buf.length - 1);
  216. }
  217. // For UTF-16LE we do not explicitly append special replacement characters if we
  218. // end on a partial character, we simply let v8 handle that.
  219. function utf16End(buf) {
  220. var r = buf && buf.length ? this.write(buf) : '';
  221. if (this.lastNeed) {
  222. var end = this.lastTotal - this.lastNeed;
  223. return r + this.lastChar.toString('utf16le', 0, end);
  224. }
  225. return r;
  226. }
  227. function base64Text(buf, i) {
  228. var n = (buf.length - i) % 3;
  229. if (n === 0) return buf.toString('base64', i);
  230. this.lastNeed = 3 - n;
  231. this.lastTotal = 3;
  232. if (n === 1) {
  233. this.lastChar[0] = buf[buf.length - 1];
  234. } else {
  235. this.lastChar[0] = buf[buf.length - 2];
  236. this.lastChar[1] = buf[buf.length - 1];
  237. }
  238. return buf.toString('base64', i, buf.length - n);
  239. }
  240. function base64End(buf) {
  241. var r = buf && buf.length ? this.write(buf) : '';
  242. if (this.lastNeed) return r + this.lastChar.toString('base64', 0, 3 - this.lastNeed);
  243. return r;
  244. }
  245. // Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex)
  246. function simpleWrite(buf) {
  247. return buf.toString(this.encoding);
  248. }
  249. function simpleEnd(buf) {
  250. return buf && buf.length ? this.write(buf) : '';
  251. }