Add CSV parser for csv read utility

closes #6865 - switch csv-read to use a csv-parser for greater reliability and management of strings when importing a csv
2024-12-23 02:41:50 +03:00 · 2016-06-03 12:35:17 +01:00 · 2016-06-03 12:35:17 +01:00 · 0f0ca5a304
commit 0f0ca5a304
parent db3df16c21
8 changed files with 76 additions and 189 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,6 +7,8 @@ b-cov
 *.pid
 *.gz

+!core/test/utils/fixtures/**/*.csv
+
 pids
 logs
 results
--- a/core/server/api/subscribers.js
+++ b/core/server/api/subscribers.js
@ -288,7 +288,7 @@ subscribers = {

            return serverUtils.readCSV({
                path: filePath,
-                columnsToExtract: ['email']
+                columnsToExtract: [{name: 'email', lookup: /email/i}]
            }).then(function (result) {
                return Promise.all(result.map(function (entry) {
                    return subscribers.add(
--- a/core/server/utils/read-csv.js
+++ b/core/server/utils/read-csv.js
@ -1,64 +1,57 @@
-var readline = require('readline'),
-    Promise = require('bluebird'),
-    lodash = require('lodash'),
-    errors = require('../errors'),
+var Promise = require('bluebird'),
+    csvParser = require('csv-parser'),
+    _ = require('lodash'),
    fs = require('fs');

 function readCSV(options) {
-    var path = options.path,
-        columnsToExtract = options.columnsToExtract || [],
-        firstLine = true,
-        mapping = {},
-        toReturn = [],
-        rl;
+    var columnsToExtract = options.columnsToExtract || [],
+        results = [], rows = [];

    return new Promise(function (resolve, reject) {
-        rl = readline.createInterface({
-            input: fs.createReadStream(path),
-            terminal: false
-        });
+        var readFile = fs.createReadStream(options.path);

-        rl.on('line', function (line) {
-            var values = line.split(','),
-                entry = {};
-
-            // CASE: column headers
-            if (firstLine) {
-                if (values.length === 1) {
-                    mapping[columnsToExtract[0]] = 0;
-                } else {
-                    try {
-                        lodash.each(columnsToExtract, function (columnToExtract) {
-                            mapping[columnToExtract] = lodash.findIndex(values, function (value) {
-                                if (value.match(columnToExtract)) {
-                                    return true;
-                                }
-                            });
-
-                            // CASE: column does not exist
-                            if (mapping[columnToExtract] === -1) {
-                                throw new errors.ValidationError(
-                                    'Column header missing: "{{column}}".'.replace('{{column}}', columnToExtract)
-                                );
-                            }
-                        });
-                    } catch (err) {
-                        reject(err);
-                    }
-                }
-
-                firstLine = false;
-            } else {
-                lodash.each(mapping, function (index, columnName) {
-                    entry[columnName] = values[index];
+        readFile.on('err', function (err) {
+            reject(err);
+        })
+        .pipe(csvParser())
+        .on('data', function (row) {
+            rows.push(row);
+        })
+        .on('end', function () {
+            // If CSV is single column - return all values including header
+            var headers = _.keys(rows[0]), result = {}, columnMap = {};
+            if (columnsToExtract.length === 1 && headers.length === 1) {
+                results = _.map(rows, function (value) {
+                    result = {};
+                    result[columnsToExtract[0].name] = value[headers[0]];
+                    return result;
                });

-                toReturn.push(entry);
-            }
-        });
+                // Add first row
+                result = {};
+                result[columnsToExtract[0].name] = headers[0];
+                results = [result].concat(results);
+            } else {
+                // If there are multiple columns in csv file
+                // try to match headers using lookup value

-        rl.on('close', function () {
-            resolve(toReturn);
+                _.map(columnsToExtract, function findMatches(column) {
+                    _.each(headers, function checkheader(header) {
+                        if (column.lookup.test(header)) {
+                            columnMap[column.name] = header;
+                        }
+                    });
+                });
+
+                results = _.map(rows, function evaluateRow(row) {
+                    var result = {};
+                    _.each(columnMap, function returnMatches(value, key) {
+                        result[key] = row[value];
+                    });
+                    return result;
+                });
+            }
+            resolve(results);
        });
    });
 }
--- a/core/test/unit/utils/read-csv_spec.js
+++ b/core/test/unit/utils/read-csv_spec.js
@ -1,71 +1,29 @@
-/*globals describe, beforeEach, afterEach, it*/
+/*globals describe, it*/

 var utils = require('../../../server/utils'),
-    errors = require('../../../server/errors'),
-    sinon = require('sinon'),
    should = require('should'),
-    fs = require('fs'),
-    lodash = require('lodash'),
-    readline = require('readline');
+    path = require ('path'),
+    csvPath = path.join(__dirname, '../../utils/fixtures/csv/');

 describe('read csv', function () {
-    var scope = {};
-
-    beforeEach(function () {
-        sinon.stub(fs, 'createReadStream');
-
-        sinon.stub(readline, 'createInterface', function () {
-            return {
-                on: function (eventName, cb) {
-                    switch (eventName) {
-                        case 'line':
-                            lodash.each(scope.csv, function (line) {
-                                cb(line);
-                            });
-                            break;
-                        case 'close':
-                            cb();
-                            break;
-                    }
-                }
-            };
-        });
-    });
-
-    afterEach(function () {
-        fs.createReadStream.restore();
-        readline.createInterface.restore();
-    });
-
    it('read csv: one column', function (done) {
-        scope.csv = [
-            'email',
-            'hannah@ghost.org',
-            'kate@ghost.org'
-        ];
-
        utils.readCSV({
-            path: 'read-file-is-mocked',
-            columnsToExtract: ['email']
+            path: csvPath + 'single-column-with-header.csv',
+            columnsToExtract: [{name: 'email', lookup: /email/i}]
        }).then(function (result) {
            should.exist(result);
-            result.length.should.eql(2);
-            result[0].email.should.eql('hannah@ghost.org');
-            result[1].email.should.eql('kate@ghost.org');
+            result.length.should.eql(3);
+            result[0].email.should.eql('email');
+            result[1].email.should.eql('hannah@ghost.org');
+            result[2].email.should.eql('kate@ghost.org');
            done();
        }).catch(done);
    });

-    it('read csv: two columns', function (done) {
-        scope.csv = [
-            'id,email',
-            '1,hannah@ghost.org',
-            '1,kate@ghost.org'
-        ];
-
+    it('read csv: two columns, 1 filter', function (done) {
        utils.readCSV({
-            path: 'read-file-is-mocked',
-            columnsToExtract: ['email']
+            path: csvPath + 'two-columns-with-header.csv',
+            columnsToExtract: [{name: 'email', lookup: /email/i}]
        }).then(function (result) {
            should.exist(result);
            result.length.should.eql(2);
@ -77,16 +35,13 @@ describe('read csv', function () {
        }).catch(done);
    });

-    it('read csv: two columns', function (done) {
-        scope.csv = [
-            'id,email',
-            '1,hannah@ghost.org',
-            '2,kate@ghost.org'
-        ];
-
+    it('read csv: two columns, 2 filters', function (done) {
        utils.readCSV({
-            path: 'read-file-is-mocked',
-            columnsToExtract: ['email', 'id']
+            path: csvPath + 'two-columns-obscure-header.csv',
+            columnsToExtract: [
+                {name: 'email', lookup: /email/i},
+                {name: 'id', lookup: /id/i}
+            ]
        }).then(function (result) {
            should.exist(result);
            result.length.should.eql(2);
@ -97,77 +52,4 @@ describe('read csv', function () {
            done();
        }).catch(done);
    });
-
-    it('read csv: test email regex', function (done) {
-        scope.csv = [
-            'email_address',
-            'hannah@ghost.org',
-            'kate@ghost.org'
-        ];
-
-        utils.readCSV({
-            path: 'read-file-is-mocked',
-            columnsToExtract: ['email']
-        }).then(function (result) {
-            should.exist(result);
-            result.length.should.eql(2);
-            result[0].email.should.eql('hannah@ghost.org');
-            result[1].email.should.eql('kate@ghost.org');
-            done();
-        }).catch(done);
-    });
-
-    it('read csv: support single column use case', function (done) {
-        scope.csv = [
-            'a_column',
-            'hannah@ghost.org',
-            'kate@ghost.org'
-        ];
-
-        utils.readCSV({
-            path: 'read-file-is-mocked',
-            columnsToExtract: ['email']
-        }).then(function (result) {
-            should.exist(result);
-            result.length.should.eql(2);
-            result[0].email.should.eql('hannah@ghost.org');
-            result[1].email.should.eql('kate@ghost.org');
-            done();
-        }).catch(done);
-    });
-
-    it('read csv: support single column use case (we would loose the first entry)', function (done) {
-        scope.csv = [
-            'hannah@ghost.org',
-            'kate@ghost.org'
-        ];
-
-        utils.readCSV({
-            path: 'read-file-is-mocked',
-            columnsToExtract: ['email']
-        }).then(function (result) {
-            should.exist(result);
-            result.length.should.eql(1);
-            result[0].email.should.eql('kate@ghost.org');
-            done();
-        }).catch(done);
-    });
-
-    it('read csv: broken', function (done) {
-        scope.csv = [
-            'id,test',
-            '1,2',
-            '1,2'
-        ];
-
-        utils.readCSV({
-            path: 'read-file-is-mocked',
-            columnsToExtract: ['email', 'id']
-        }).then(function () {
-            return done(new Error('we expected an error from read csv!'));
-        }).catch(function (err) {
-            (err instanceof errors.ValidationError).should.eql(true);
-            done();
-        });
-    });
 });
--- a/core/test/utils/fixtures/csv/single-column-with-header.csv
+++ b/core/test/utils/fixtures/csv/single-column-with-header.csv
@ -0,0 +1,3 @@
+email
+hannah@ghost.org
+kate@ghost.org
--- a/core/test/utils/fixtures/csv/two-columns-obscure-header.csv
+++ b/core/test/utils/fixtures/csv/two-columns-obscure-header.csv
@ -0,0 +1,3 @@
+id,Email Address
+1,"hannah@ghost.org"
+2,kate@ghost.org
--- a/core/test/utils/fixtures/csv/two-columns-with-header.csv
+++ b/core/test/utils/fixtures/csv/two-columns-with-header.csv
@ -0,0 +1,3 @@
+id,email
+1,"hannah@ghost.org"
+1,kate@ghost.org
--- a/package.json
+++ b/package.json
@ -36,6 +36,7 @@
    "connect-slashes": "1.3.1",
    "cookie-session": "1.2.0",
    "cors": "2.7.1",
+    "csv-parser": "1.9.3",
    "downsize": "0.0.8",
    "express": "4.13.4",
    "express-hbs": "1.0.1",