var fs = require('fs'); var utf8 = require('./encoding/utf8'), unicode = require('./encoding/unicode'), mbcs = require('./encoding/mbcs'), sbcs = require('./encoding/sbcs'), iso2022 = require('./encoding/iso2022'); var self = this; var recognisers = [ new utf8, new unicode.UTF_16BE, new unicode.UTF_16LE, new unicode.UTF_32BE, new unicode.UTF_32LE, new mbcs.sjis, new mbcs.big5, new mbcs.euc_jp, new mbcs.euc_kr, new mbcs.gb_18030, new iso2022.ISO_2022_JP, new iso2022.ISO_2022_KR, new iso2022.ISO_2022_CN, new sbcs.ISO_8859_1, new sbcs.ISO_8859_2, new sbcs.ISO_8859_5, new sbcs.ISO_8859_6, new sbcs.ISO_8859_7, new sbcs.ISO_8859_8, new sbcs.ISO_8859_9, new sbcs.windows_1251, new sbcs.windows_1256, new sbcs.KOI8_R ]; module.exports.detect = function(buffer, opts) { // Tally up the byte occurence statistics. var fByteStats = []; for (var i = 0; i < 256; i++) fByteStats[i] = 0; for (var i = buffer.length - 1; i >= 0; i--) fByteStats[buffer[i] & 0x00ff]++; var fC1Bytes = false; for (var i = 0x80; i <= 0x9F; i += 1) { if (fByteStats[i] != 0) { fC1Bytes = true; break; } } var context = { fByteStats: fByteStats, fC1Bytes: fC1Bytes, fRawInput: buffer, fRawLength: buffer.length, fInputBytes: buffer, fInputLen: buffer.length }; var matches = recognisers.map(function(rec) { return rec.match(context); }).filter(function(match) { return !!match; }).sort(function(a, b) { return b.confidence - a.confidence; }); if (opts && opts.returnAllMatches === true) { return matches; } else { return matches.length > 0 ? matches[0].name : null; } }; module.exports.detectFile = function(filepath, opts, cb) { if (typeof opts === 'function') { cb = opts; opts = undefined; } var fd; var handler = function(err, buffer) { if (fd) { fs.closeSync(fd); } if (err) return cb(err, null); cb(null, self.detect(buffer, opts)); }; if (opts && opts.sampleSize) { fd = fs.openSync(filepath, 'r'), sample = Buffer.allocUnsafe(opts.sampleSize); fs.read(fd, sample, 0, opts.sampleSize, null, function(err) { handler(err, sample); }); return; } fs.readFile(filepath, handler); }; module.exports.detectFileSync = function(filepath, opts) { if (opts && opts.sampleSize) { var fd = fs.openSync(filepath, 'r'), sample = Buffer.allocUnsafe(opts.sampleSize); fs.readSync(fd, sample, 0, opts.sampleSize); fs.closeSync(fd); return self.detect(sample, opts); } return self.detect(fs.readFileSync(filepath), opts); }; // Wrappers for the previous functions to return all encodings module.exports.detectAll = function(buffer, opts) { if (typeof opts !== 'object') { opts = {}; } opts.returnAllMatches = true; return self.detect(buffer, opts); } module.exports.detectFileAll = function(filepath, opts, cb) { if (typeof opts === 'function') { cb = opts; opts = undefined; } if (typeof opts !== 'object') { opts = {}; } opts.returnAllMatches = true; self.detectFile(filepath, opts, cb); } module.exports.detectFileAllSync = function(filepath, opts) { if (typeof opts !== 'object') { opts = {}; } opts.returnAllMatches = true; return self.detectFileSync(filepath, opts); }