diff --git a/.gitignore b/.gitignore index df4d7d12ef..1fed2813ff 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ b-cov *.pid *.gz +!core/test/utils/fixtures/**/*.csv + pids logs results diff --git a/core/server/api/subscribers.js b/core/server/api/subscribers.js index 677bf042c1..d7f0ac8882 100644 --- a/core/server/api/subscribers.js +++ b/core/server/api/subscribers.js @@ -288,7 +288,7 @@ subscribers = { return serverUtils.readCSV({ path: filePath, - columnsToExtract: ['email'] + columnsToExtract: [{name: 'email', lookup: /email/i}] }).then(function (result) { return Promise.all(result.map(function (entry) { return subscribers.add( diff --git a/core/server/utils/read-csv.js b/core/server/utils/read-csv.js index 505b0774b4..bd74b31256 100644 --- a/core/server/utils/read-csv.js +++ b/core/server/utils/read-csv.js @@ -1,64 +1,57 @@ -var readline = require('readline'), - Promise = require('bluebird'), - lodash = require('lodash'), - errors = require('../errors'), +var Promise = require('bluebird'), + csvParser = require('csv-parser'), + _ = require('lodash'), fs = require('fs'); function readCSV(options) { - var path = options.path, - columnsToExtract = options.columnsToExtract || [], - firstLine = true, - mapping = {}, - toReturn = [], - rl; + var columnsToExtract = options.columnsToExtract || [], + results = [], rows = []; return new Promise(function (resolve, reject) { - rl = readline.createInterface({ - input: fs.createReadStream(path), - terminal: false - }); + var readFile = fs.createReadStream(options.path); - rl.on('line', function (line) { - var values = line.split(','), - entry = {}; - - // CASE: column headers - if (firstLine) { - if (values.length === 1) { - mapping[columnsToExtract[0]] = 0; - } else { - try { - lodash.each(columnsToExtract, function (columnToExtract) { - mapping[columnToExtract] = lodash.findIndex(values, function (value) { - if (value.match(columnToExtract)) { - return true; - } - }); - - // CASE: column does not exist - if (mapping[columnToExtract] === -1) { - throw new errors.ValidationError( - 'Column header missing: "{{column}}".'.replace('{{column}}', columnToExtract) - ); - } - }); - } catch (err) { - reject(err); - } - } - - firstLine = false; - } else { - lodash.each(mapping, function (index, columnName) { - entry[columnName] = values[index]; + readFile.on('err', function (err) { + reject(err); + }) + .pipe(csvParser()) + .on('data', function (row) { + rows.push(row); + }) + .on('end', function () { + // If CSV is single column - return all values including header + var headers = _.keys(rows[0]), result = {}, columnMap = {}; + if (columnsToExtract.length === 1 && headers.length === 1) { + results = _.map(rows, function (value) { + result = {}; + result[columnsToExtract[0].name] = value[headers[0]]; + return result; }); - toReturn.push(entry); - } - }); + // Add first row + result = {}; + result[columnsToExtract[0].name] = headers[0]; + results = [result].concat(results); + } else { + // If there are multiple columns in csv file + // try to match headers using lookup value - rl.on('close', function () { - resolve(toReturn); + _.map(columnsToExtract, function findMatches(column) { + _.each(headers, function checkheader(header) { + if (column.lookup.test(header)) { + columnMap[column.name] = header; + } + }); + }); + + results = _.map(rows, function evaluateRow(row) { + var result = {}; + _.each(columnMap, function returnMatches(value, key) { + result[key] = row[value]; + }); + return result; + }); + } + resolve(results); }); }); } diff --git a/core/test/unit/utils/read-csv_spec.js b/core/test/unit/utils/read-csv_spec.js index 49d647cb52..9e16a05e14 100644 --- a/core/test/unit/utils/read-csv_spec.js +++ b/core/test/unit/utils/read-csv_spec.js @@ -1,71 +1,29 @@ -/*globals describe, beforeEach, afterEach, it*/ +/*globals describe, it*/ var utils = require('../../../server/utils'), - errors = require('../../../server/errors'), - sinon = require('sinon'), should = require('should'), - fs = require('fs'), - lodash = require('lodash'), - readline = require('readline'); + path = require ('path'), + csvPath = path.join(__dirname, '../../utils/fixtures/csv/'); describe('read csv', function () { - var scope = {}; - - beforeEach(function () { - sinon.stub(fs, 'createReadStream'); - - sinon.stub(readline, 'createInterface', function () { - return { - on: function (eventName, cb) { - switch (eventName) { - case 'line': - lodash.each(scope.csv, function (line) { - cb(line); - }); - break; - case 'close': - cb(); - break; - } - } - }; - }); - }); - - afterEach(function () { - fs.createReadStream.restore(); - readline.createInterface.restore(); - }); - it('read csv: one column', function (done) { - scope.csv = [ - 'email', - 'hannah@ghost.org', - 'kate@ghost.org' - ]; - utils.readCSV({ - path: 'read-file-is-mocked', - columnsToExtract: ['email'] + path: csvPath + 'single-column-with-header.csv', + columnsToExtract: [{name: 'email', lookup: /email/i}] }).then(function (result) { should.exist(result); - result.length.should.eql(2); - result[0].email.should.eql('hannah@ghost.org'); - result[1].email.should.eql('kate@ghost.org'); + result.length.should.eql(3); + result[0].email.should.eql('email'); + result[1].email.should.eql('hannah@ghost.org'); + result[2].email.should.eql('kate@ghost.org'); done(); }).catch(done); }); - it('read csv: two columns', function (done) { - scope.csv = [ - 'id,email', - '1,hannah@ghost.org', - '1,kate@ghost.org' - ]; - + it('read csv: two columns, 1 filter', function (done) { utils.readCSV({ - path: 'read-file-is-mocked', - columnsToExtract: ['email'] + path: csvPath + 'two-columns-with-header.csv', + columnsToExtract: [{name: 'email', lookup: /email/i}] }).then(function (result) { should.exist(result); result.length.should.eql(2); @@ -77,16 +35,13 @@ describe('read csv', function () { }).catch(done); }); - it('read csv: two columns', function (done) { - scope.csv = [ - 'id,email', - '1,hannah@ghost.org', - '2,kate@ghost.org' - ]; - + it('read csv: two columns, 2 filters', function (done) { utils.readCSV({ - path: 'read-file-is-mocked', - columnsToExtract: ['email', 'id'] + path: csvPath + 'two-columns-obscure-header.csv', + columnsToExtract: [ + {name: 'email', lookup: /email/i}, + {name: 'id', lookup: /id/i} + ] }).then(function (result) { should.exist(result); result.length.should.eql(2); @@ -97,77 +52,4 @@ describe('read csv', function () { done(); }).catch(done); }); - - it('read csv: test email regex', function (done) { - scope.csv = [ - 'email_address', - 'hannah@ghost.org', - 'kate@ghost.org' - ]; - - utils.readCSV({ - path: 'read-file-is-mocked', - columnsToExtract: ['email'] - }).then(function (result) { - should.exist(result); - result.length.should.eql(2); - result[0].email.should.eql('hannah@ghost.org'); - result[1].email.should.eql('kate@ghost.org'); - done(); - }).catch(done); - }); - - it('read csv: support single column use case', function (done) { - scope.csv = [ - 'a_column', - 'hannah@ghost.org', - 'kate@ghost.org' - ]; - - utils.readCSV({ - path: 'read-file-is-mocked', - columnsToExtract: ['email'] - }).then(function (result) { - should.exist(result); - result.length.should.eql(2); - result[0].email.should.eql('hannah@ghost.org'); - result[1].email.should.eql('kate@ghost.org'); - done(); - }).catch(done); - }); - - it('read csv: support single column use case (we would loose the first entry)', function (done) { - scope.csv = [ - 'hannah@ghost.org', - 'kate@ghost.org' - ]; - - utils.readCSV({ - path: 'read-file-is-mocked', - columnsToExtract: ['email'] - }).then(function (result) { - should.exist(result); - result.length.should.eql(1); - result[0].email.should.eql('kate@ghost.org'); - done(); - }).catch(done); - }); - - it('read csv: broken', function (done) { - scope.csv = [ - 'id,test', - '1,2', - '1,2' - ]; - - utils.readCSV({ - path: 'read-file-is-mocked', - columnsToExtract: ['email', 'id'] - }).then(function () { - return done(new Error('we expected an error from read csv!')); - }).catch(function (err) { - (err instanceof errors.ValidationError).should.eql(true); - done(); - }); - }); }); diff --git a/core/test/utils/fixtures/csv/single-column-with-header.csv b/core/test/utils/fixtures/csv/single-column-with-header.csv new file mode 100644 index 0000000000..98d2a7afe4 --- /dev/null +++ b/core/test/utils/fixtures/csv/single-column-with-header.csv @@ -0,0 +1,3 @@ +email +hannah@ghost.org +kate@ghost.org \ No newline at end of file diff --git a/core/test/utils/fixtures/csv/two-columns-obscure-header.csv b/core/test/utils/fixtures/csv/two-columns-obscure-header.csv new file mode 100644 index 0000000000..a0df5e5992 --- /dev/null +++ b/core/test/utils/fixtures/csv/two-columns-obscure-header.csv @@ -0,0 +1,3 @@ +id,Email Address +1,"hannah@ghost.org" +2,kate@ghost.org diff --git a/core/test/utils/fixtures/csv/two-columns-with-header.csv b/core/test/utils/fixtures/csv/two-columns-with-header.csv new file mode 100644 index 0000000000..e3a3691eb8 --- /dev/null +++ b/core/test/utils/fixtures/csv/two-columns-with-header.csv @@ -0,0 +1,3 @@ +id,email +1,"hannah@ghost.org" +1,kate@ghost.org diff --git a/package.json b/package.json index 9e6d824e0d..dfd9814d20 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "connect-slashes": "1.3.1", "cookie-session": "1.2.0", "cors": "2.7.1", + "csv-parser": "1.9.3", "downsize": "0.0.8", "express": "4.13.4", "express-hbs": "1.0.1",