Store email batch and recipient records when sending newsletters (#12195)

requires https://github.com/TryGhost/Ghost/pull/12192

- added initial `EmailBatch` and `EmailRecipient` model definitions with defaults and relationships
- added missing `post` relationship function to email model
- fetch member list without bookshelf
    - bookshelf can add around 3x overhead when fetching the members list for an email
    - we don't need full members at this point, only having the data is fine
    - if we need full models later on we can push the model hydration into background jobs where recipient batches are fetched ready for an email to be sent
    - bookshelf model instantiation of many models blocks the event loop, using knex directly keeps concurrent requests fast
- store recipient list before sending email
    - chunk already-fetched members list into batches and insert records into the `email_recipients` table via knex
    - chunked into batches of 1000 to match the number of emails that Mailgun accepts in a single API request but this may not be the absolute fastest batch size for recipient insertion:
        | Batch size | Batch time | Total time |
        | ---------- | ---------- | ---------- |
        |        500 |       20ms |     4142ms |
        |       1000 |       50ms |     4651ms |
        |       5000 |      170ms |     3540ms |
        |      10000 |      370ms |     3684ms |
    - create an email_batch record before inserting recipient rows so we can effeciently fetch recipients by batch and store the overall batch status
This commit is contained in:
Kevin Ansfield 2020-09-14 15:40:00 +01:00 committed by GitHub
parent 4e2d3e3505
commit 80af56b530
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 117 additions and 18 deletions

View File

@ -0,0 +1,30 @@
const ghostBookshelf = require('./base');
const EmailBatch = ghostBookshelf.Model.extend({
tableName: 'email_batches',
defaults() {
return {
status: 'pending'
};
},
email() {
return this.belongsTo('Email', 'email_id');
},
recipients() {
return this.hasMany('EmailRecipient', 'batch_id');
},
members() {
return this.belongsToMany('Member', 'email_recipients', 'batch_id', 'member_id');
}
});
const EmailBatches = ghostBookshelf.Collection.extend({
model: EmailBatch
});
module.exports = {
EmailBatch: ghostBookshelf.model('EmailBatch', EmailBatch),
EmailBatches: ghostBookshelf.model('EmailBatches', EmailBatches)
};

View File

@ -0,0 +1,24 @@
const ghostBookshelf = require('./base');
const EmailRecipient = ghostBookshelf.Model.extend({
tableName: 'email_recipients',
email() {
return this.belongsTo('Email', 'email_id');
},
emailBatch() {
return this.belongsTo('EmailBatch', 'batch_id');
},
member() {
return this.belongsTo('Member', 'member_id');
}
});
const EmailRecipients = ghostBookshelf.Collection.extend({
model: EmailRecipient
});
module.exports = {
EmailRecipient: ghostBookshelf.model('EmailRecipient', EmailRecipient),
EmailRecipients: ghostBookshelf.model('EmailRecipients', EmailRecipients)
};

View File

@ -19,6 +19,16 @@ const Email = ghostBookshelf.Model.extend({
};
},
post() {
return this.belongsTo('Post', 'post_id');
},
emailBatches() {
return this.hasMany('EmailBatch', 'email_id');
},
recipients() {
return this.hasMany('EmailRecipient', 'email_id');
},
emitChange: function emitChange(event, options) {
const eventToTrigger = 'email' + '.' + event;
ghostBookshelf.Model.prototype.emitChange.bind(this)(this, eventToTrigger, options);

View File

@ -33,6 +33,8 @@ const models = [
'member-stripe-customer',
'stripe-customer-subscription',
'email',
'email-batch',
'email-recipient',
'label'
];

View File

@ -2,6 +2,7 @@ const _ = require('lodash');
const debug = require('ghost-ignition').debug('mega');
const url = require('url');
const moment = require('moment');
const ObjectId = require('bson-objectid');
const errors = require('@tryghost/errors');
const {events, i18n} = require('../../lib/common');
const logging = require('../../../shared/logging');
@ -9,11 +10,12 @@ const membersService = require('../members');
const bulkEmailService = require('../bulk-email');
const jobService = require('../jobs');
const models = require('../../models');
const db = require('../../data/db');
const postEmailSerializer = require('./post-email-serializer');
const getEmailData = async (postModel, memberModels = []) => {
const getEmailData = async (postModel, memberRows = []) => {
const startTime = Date.now();
debug(`getEmailData: starting for ${memberModels.length} members`);
debug(`getEmailData: starting for ${memberRows.length} members`);
const {emailTmpl, replacements} = await postEmailSerializer.serialize(postModel);
emailTmpl.from = membersService.config.getEmailFromAddress();
@ -29,33 +31,33 @@ const getEmailData = async (postModel, memberModels = []) => {
const emails = [];
const emailData = {};
memberModels.forEach((memberModel) => {
emails.push(memberModel.get('email'));
memberRows.forEach((memberRow) => {
emails.push(memberRow.email);
// first_name is a computed property only used here for now
// TODO: move into model computed property or output serializer?
memberModel.first_name = (memberModel.get('name') || '').split(' ')[0];
memberRow.first_name = (memberRow.name || '').split(' ')[0];
// add static data to mailgun template variables
const data = {
unique_id: memberModel.uuid,
unsubscribe_url: postEmailSerializer.createUnsubscribeUrl(memberModel.get('uuid'))
unique_id: memberRow.uuid,
unsubscribe_url: postEmailSerializer.createUnsubscribeUrl(memberRow.uuid)
};
// add replacement data/requested fallback to mailgun template variables
replacements.forEach(({id, memberProp, fallback}) => {
data[id] = memberModel[memberProp] || fallback || '';
data[id] = memberRow[memberProp] || fallback || '';
});
emailData[memberModel.get('email')] = data;
emailData[memberRow.email] = data;
});
debug(`getEmailData: done (${Date.now() - startTime}ms)`);
return {emailTmpl, emails, emailData};
};
const sendEmail = async (postModel, memberModels) => {
const {emailTmpl, emails, emailData} = await getEmailData(postModel, memberModels);
const sendEmail = async (postModel, memberRows) => {
const {emailTmpl, emails, emailData} = await getEmailData(postModel, memberRows);
return bulkEmailService.send(emailTmpl, emails, emailData);
};
@ -197,9 +199,9 @@ async function sendEmailJob({emailModel, options}) {
// Check host limit for allowed member count and throw error if over limit
await membersService.checkHostLimit();
// No need to fetch list until after we've passed the check
const knexOptions = _.pick(options, ['transacting', 'forUpdate']);
const filterOptions = Object.assign({}, knexOptions, {filter: 'subscribed:true', limit: 'all'});
// TODO: this will clobber a user-assigned filter if/when we allow emails to be sent to filtered member lists
const filterOptions = Object.assign({}, knexOptions, {filter: 'subscribed:true'});
if (postModel.get('visibility') === 'paid') {
filterOptions.paid = true;
@ -207,10 +209,18 @@ async function sendEmailJob({emailModel, options}) {
const startRetrieve = Date.now();
debug('pendingEmailHandler: retrieving members list');
const {data: members} = await membersService.api.members.list(Object.assign({}, knexOptions, filterOptions));
debug(`pendingEmailHandler: retrieved members list - ${members.length} members (${Date.now() - startRetrieve}ms)`);
const memberQuery = await models.Member.getFilteredCollection(filterOptions).query();
// TODO: how to apply this more elegantly? Normally done by `onFetching` bookshelf hook
if (options.transacting) {
memberQuery.transacting(options.transacting);
if (options.forUpdate) {
memberQuery.forUpdate();
}
}
const memberRows = await memberQuery;
debug(`pendingEmailHandler: retrieved members list - ${memberRows.length} members (${Date.now() - startRetrieve}ms)`);
if (!members.length) {
if (!memberRows.length) {
return;
}
@ -220,11 +230,34 @@ async function sendEmailJob({emailModel, options}) {
id: emailModel.id
});
// NOTE: meta can contains an array which can be a mix of successful and error responses
debug('pendingEmailHandler: storing recipient list');
const startOfRecipientStorage = Date.now();
const storeRecipientBatch = async function (recipients) {
let batchModel = await models.EmailBatch.add({email_id: emailModel.id}, knexOptions);
// use knex rather than bookshelf to avoid overhead and event loop blocking
// when instantiating large numbers of bookshelf model objects
const recipientData = recipients.map((memberRow) => {
return {
id: ObjectId.generate(),
email_id: emailModel.id,
member_id: memberRow.id,
batch_id: batchModel.id,
member_uuid: memberRow.uuid,
member_email: memberRow.email,
member_name: memberRow.name
};
});
return await db.knex('email_recipients').insert(recipientData);
};
await Promise.each(_.chunk(memberRows, 1000), storeRecipientBatch);
debug(`pendingEmailHandler: stored recipient list (${Date.now() - startOfRecipientStorage}ms)`);
// NOTE: meta contains an array which can be a mix of successful and error responses
// needs filtering and saving objects of {error, batchData} form to separate property
debug('pendingEmailHandler: sending email');
startEmailSend = Date.now();
meta = await sendEmail(postModel, members);
meta = await sendEmail(postModel, memberRows);
debug(`pendingEmailHandler: sent email (${Date.now() - startEmailSend}ms)`);
} catch (err) {
if (startEmailSend) {