Add CSV parser for csv read utility

closes #6865

- switch csv-read to use a csv-parser for greater reliability and management of strings when importing a csv
This commit is contained in:
cobbspur 2016-06-03 12:35:17 +01:00
parent db3df16c21
commit 0f0ca5a304
8 changed files with 76 additions and 189 deletions

2
.gitignore vendored
View File

@ -7,6 +7,8 @@ b-cov
*.pid
*.gz
!core/test/utils/fixtures/**/*.csv
pids
logs
results

View File

@ -288,7 +288,7 @@ subscribers = {
return serverUtils.readCSV({
path: filePath,
columnsToExtract: ['email']
columnsToExtract: [{name: 'email', lookup: /email/i}]
}).then(function (result) {
return Promise.all(result.map(function (entry) {
return subscribers.add(

View File

@ -1,64 +1,57 @@
var readline = require('readline'),
Promise = require('bluebird'),
lodash = require('lodash'),
errors = require('../errors'),
var Promise = require('bluebird'),
csvParser = require('csv-parser'),
_ = require('lodash'),
fs = require('fs');
function readCSV(options) {
var path = options.path,
columnsToExtract = options.columnsToExtract || [],
firstLine = true,
mapping = {},
toReturn = [],
rl;
var columnsToExtract = options.columnsToExtract || [],
results = [], rows = [];
return new Promise(function (resolve, reject) {
rl = readline.createInterface({
input: fs.createReadStream(path),
terminal: false
});
var readFile = fs.createReadStream(options.path);
rl.on('line', function (line) {
var values = line.split(','),
entry = {};
// CASE: column headers
if (firstLine) {
if (values.length === 1) {
mapping[columnsToExtract[0]] = 0;
} else {
try {
lodash.each(columnsToExtract, function (columnToExtract) {
mapping[columnToExtract] = lodash.findIndex(values, function (value) {
if (value.match(columnToExtract)) {
return true;
}
});
// CASE: column does not exist
if (mapping[columnToExtract] === -1) {
throw new errors.ValidationError(
'Column header missing: "{{column}}".'.replace('{{column}}', columnToExtract)
);
}
});
} catch (err) {
reject(err);
}
}
firstLine = false;
} else {
lodash.each(mapping, function (index, columnName) {
entry[columnName] = values[index];
readFile.on('err', function (err) {
reject(err);
})
.pipe(csvParser())
.on('data', function (row) {
rows.push(row);
})
.on('end', function () {
// If CSV is single column - return all values including header
var headers = _.keys(rows[0]), result = {}, columnMap = {};
if (columnsToExtract.length === 1 && headers.length === 1) {
results = _.map(rows, function (value) {
result = {};
result[columnsToExtract[0].name] = value[headers[0]];
return result;
});
toReturn.push(entry);
}
});
// Add first row
result = {};
result[columnsToExtract[0].name] = headers[0];
results = [result].concat(results);
} else {
// If there are multiple columns in csv file
// try to match headers using lookup value
rl.on('close', function () {
resolve(toReturn);
_.map(columnsToExtract, function findMatches(column) {
_.each(headers, function checkheader(header) {
if (column.lookup.test(header)) {
columnMap[column.name] = header;
}
});
});
results = _.map(rows, function evaluateRow(row) {
var result = {};
_.each(columnMap, function returnMatches(value, key) {
result[key] = row[value];
});
return result;
});
}
resolve(results);
});
});
}

View File

@ -1,71 +1,29 @@
/*globals describe, beforeEach, afterEach, it*/
/*globals describe, it*/
var utils = require('../../../server/utils'),
errors = require('../../../server/errors'),
sinon = require('sinon'),
should = require('should'),
fs = require('fs'),
lodash = require('lodash'),
readline = require('readline');
path = require ('path'),
csvPath = path.join(__dirname, '../../utils/fixtures/csv/');
describe('read csv', function () {
var scope = {};
beforeEach(function () {
sinon.stub(fs, 'createReadStream');
sinon.stub(readline, 'createInterface', function () {
return {
on: function (eventName, cb) {
switch (eventName) {
case 'line':
lodash.each(scope.csv, function (line) {
cb(line);
});
break;
case 'close':
cb();
break;
}
}
};
});
});
afterEach(function () {
fs.createReadStream.restore();
readline.createInterface.restore();
});
it('read csv: one column', function (done) {
scope.csv = [
'email',
'hannah@ghost.org',
'kate@ghost.org'
];
utils.readCSV({
path: 'read-file-is-mocked',
columnsToExtract: ['email']
path: csvPath + 'single-column-with-header.csv',
columnsToExtract: [{name: 'email', lookup: /email/i}]
}).then(function (result) {
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('hannah@ghost.org');
result[1].email.should.eql('kate@ghost.org');
result.length.should.eql(3);
result[0].email.should.eql('email');
result[1].email.should.eql('hannah@ghost.org');
result[2].email.should.eql('kate@ghost.org');
done();
}).catch(done);
});
it('read csv: two columns', function (done) {
scope.csv = [
'id,email',
'1,hannah@ghost.org',
'1,kate@ghost.org'
];
it('read csv: two columns, 1 filter', function (done) {
utils.readCSV({
path: 'read-file-is-mocked',
columnsToExtract: ['email']
path: csvPath + 'two-columns-with-header.csv',
columnsToExtract: [{name: 'email', lookup: /email/i}]
}).then(function (result) {
should.exist(result);
result.length.should.eql(2);
@ -77,16 +35,13 @@ describe('read csv', function () {
}).catch(done);
});
it('read csv: two columns', function (done) {
scope.csv = [
'id,email',
'1,hannah@ghost.org',
'2,kate@ghost.org'
];
it('read csv: two columns, 2 filters', function (done) {
utils.readCSV({
path: 'read-file-is-mocked',
columnsToExtract: ['email', 'id']
path: csvPath + 'two-columns-obscure-header.csv',
columnsToExtract: [
{name: 'email', lookup: /email/i},
{name: 'id', lookup: /id/i}
]
}).then(function (result) {
should.exist(result);
result.length.should.eql(2);
@ -97,77 +52,4 @@ describe('read csv', function () {
done();
}).catch(done);
});
it('read csv: test email regex', function (done) {
scope.csv = [
'email_address',
'hannah@ghost.org',
'kate@ghost.org'
];
utils.readCSV({
path: 'read-file-is-mocked',
columnsToExtract: ['email']
}).then(function (result) {
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('hannah@ghost.org');
result[1].email.should.eql('kate@ghost.org');
done();
}).catch(done);
});
it('read csv: support single column use case', function (done) {
scope.csv = [
'a_column',
'hannah@ghost.org',
'kate@ghost.org'
];
utils.readCSV({
path: 'read-file-is-mocked',
columnsToExtract: ['email']
}).then(function (result) {
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('hannah@ghost.org');
result[1].email.should.eql('kate@ghost.org');
done();
}).catch(done);
});
it('read csv: support single column use case (we would loose the first entry)', function (done) {
scope.csv = [
'hannah@ghost.org',
'kate@ghost.org'
];
utils.readCSV({
path: 'read-file-is-mocked',
columnsToExtract: ['email']
}).then(function (result) {
should.exist(result);
result.length.should.eql(1);
result[0].email.should.eql('kate@ghost.org');
done();
}).catch(done);
});
it('read csv: broken', function (done) {
scope.csv = [
'id,test',
'1,2',
'1,2'
];
utils.readCSV({
path: 'read-file-is-mocked',
columnsToExtract: ['email', 'id']
}).then(function () {
return done(new Error('we expected an error from read csv!'));
}).catch(function (err) {
(err instanceof errors.ValidationError).should.eql(true);
done();
});
});
});

View File

@ -0,0 +1,3 @@
email
hannah@ghost.org
kate@ghost.org
1 email
2 hannah@ghost.org
3 kate@ghost.org

View File

@ -0,0 +1,3 @@
id,Email Address
1,"hannah@ghost.org"
2,kate@ghost.org
1 id Email Address
2 1 hannah@ghost.org
3 2 kate@ghost.org

View File

@ -0,0 +1,3 @@
id,email
1,"hannah@ghost.org"
1,kate@ghost.org
1 id email
2 1 hannah@ghost.org
3 1 kate@ghost.org

View File

@ -36,6 +36,7 @@
"connect-slashes": "1.3.1",
"cookie-session": "1.2.0",
"cors": "2.7.1",
"csv-parser": "1.9.3",
"downsize": "0.0.8",
"express": "4.13.4",
"express-hbs": "1.0.1",