2023-08-02 16:43:26 +03:00
const TableImporter = require ( './TableImporter' ) ;
2023-02-17 17:03:52 +03:00
const { faker } = require ( '@faker-js/faker' ) ;
const generateEvents = require ( '../utils/event-generator' ) ;
const dateToDatabaseString = require ( '../utils/database-date' ) ;
2024-01-05 16:42:30 +03:00
const debug = require ( '@tryghost/debug' ) ( 'EmailRecipientsImporter' ) ;
2023-02-17 17:03:52 +03:00
const emailStatus = {
delivered : Symbol ( ) ,
opened : Symbol ( ) ,
failed : Symbol ( ) ,
none : Symbol ( )
} ;
2024-01-05 16:42:30 +03:00
function findFirstHigherIndex ( arr , target ) {
let start = 0 ;
let end = arr . length - 1 ;
let result = - 1 ;
while ( start <= end ) {
let mid = Math . floor ( ( start + end ) / 2 ) ;
if ( arr [ mid ] >= target ) {
result = mid ;
end = mid - 1 ; // Continue searching in the left half
} else {
start = mid + 1 ; // Continue searching in the right half
}
}
return result ; // Return -1 if no element is higher than target
}
2023-02-17 17:03:52 +03:00
class EmailRecipientsImporter extends TableImporter {
static table = 'email_recipients' ;
2023-08-02 16:43:26 +03:00
static dependencies = [ 'emails' , 'email_batches' , 'members' , 'members_subscribe_events' ] ;
2023-02-17 17:03:52 +03:00
2023-08-02 16:43:26 +03:00
constructor ( knex , transaction ) {
super ( EmailRecipientsImporter . table , knex , transaction ) ;
2023-02-17 17:03:52 +03:00
}
2023-08-02 16:43:26 +03:00
async import ( quantity ) {
2024-01-15 18:23:49 +03:00
if ( quantity === 0 ) {
return ;
}
2024-01-05 16:42:30 +03:00
const now = Date . now ( ) ;
2023-08-02 16:43:26 +03:00
const emails = await this . transaction
. select (
'id' ,
'newsletter_id' ,
'email_count' ,
'delivered_count' ,
'opened_count' ,
'failed_count' )
. from ( 'emails' ) ;
2024-01-05 16:42:30 +03:00
this . emails = new Map ( ) ;
for ( const email of emails ) {
this . emails . set ( email . id , email ) ;
}
this . emailBatches = await this . transaction . select ( 'id' , 'email_id' , 'updated_at' ) . from ( 'email_batches' ) . orderBy ( 'email_id' ) ;
const members = await this . transaction . select ( 'id' , 'uuid' , 'email' , 'name' ) . from ( 'members' ) ;
this . membersSubscribeEvents = await this . transaction . select ( 'id' , 'newsletter_id' , 'created_at' , 'member_id' ) . from ( 'members_subscribe_events' ) . orderBy ( 'created_at' , 'asc' ) ; // Order required for better performance in setReferencedModel
// Create a map for fast lookups
this . members = new Map ( ) ;
for ( const member of members ) {
this . members . set ( member . id , member ) ;
}
// Save indexes of each batch for performance (remarkable faster than doing findIndex on each generate call)
let lastEmailId = null ;
let lastIndex = 0 ;
for ( const batch of this . emailBatches ) {
if ( batch . email _id !== lastEmailId ) {
lastIndex = 0 ;
lastEmailId = batch . email _id ;
}
batch . index = lastIndex ;
lastIndex += 1 ;
}
// Now reorder by email id
debug ( ` Prepared data for ${ this . name } in ${ Date . now ( ) - now } ms ` ) ;
// We use the same event curve for all emails to speed up the generation
// Spread over 14 days
this . eventStartTimeUsed = new Date ( ) ;
const endTime = new Date ( this . eventStartTimeUsed . getTime ( ) + 1000 * 60 * 60 * 24 * 14 ) ;
this . eventCurve = generateEvents ( {
shape : 'ease-out' ,
trend : 'negative' ,
total : 1000 ,
startTime : this . eventStartTimeUsed ,
endTime
} ) ;
this . membersSubscribeEventsByNewsletterId = new Map ( ) ;
this . membersSubscribeEventsCreatedAtsByNewsletterId = new Map ( ) ;
for ( const memberSubscribeEvent of this . membersSubscribeEvents ) {
if ( ! this . membersSubscribeEventsByNewsletterId . has ( memberSubscribeEvent . newsletter _id ) ) {
this . membersSubscribeEventsByNewsletterId . set ( memberSubscribeEvent . newsletter _id , [ ] ) ;
}
this . membersSubscribeEventsByNewsletterId . get ( memberSubscribeEvent . newsletter _id ) . push ( memberSubscribeEvent ) ;
if ( ! this . membersSubscribeEventsCreatedAtsByNewsletterId . has ( memberSubscribeEvent . newsletter _id ) ) {
this . membersSubscribeEventsCreatedAtsByNewsletterId . set ( memberSubscribeEvent . newsletter _id , [ ] ) ;
}
if ( ! ( memberSubscribeEvent . created _at instanceof Date ) ) {
// SQLite fix
memberSubscribeEvent . created _at = new Date ( memberSubscribeEvent . created _at ) ;
}
this . membersSubscribeEventsCreatedAtsByNewsletterId . get ( memberSubscribeEvent . newsletter _id ) . push ( memberSubscribeEvent . created _at . getTime ( ) ) ;
}
2023-08-02 16:43:26 +03:00
2023-12-12 19:37:36 +03:00
await this . importForEach ( this . emailBatches , quantity ? quantity / emails . length : 1000 ) ;
2023-08-02 16:43:26 +03:00
}
setReferencedModel ( model ) {
2023-12-12 19:37:36 +03:00
this . batch = model ;
2024-01-05 16:42:30 +03:00
this . model = this . emails . get ( this . batch . email _id ) ;
this . batchIndex = this . batch . index ;
2023-12-12 19:37:36 +03:00
2023-02-17 17:03:52 +03:00
// Shallow clone members list so we can shuffle and modify it
const earliestOpenTime = new Date ( this . batch . updated _at ) ;
const latestOpenTime = new Date ( this . batch . updated _at ) ;
latestOpenTime . setDate ( latestOpenTime . getDate ( ) + 14 ) ;
2023-12-12 19:37:36 +03:00
2024-01-05 16:42:30 +03:00
// Get all members that were subscribed to this newsletter BEFORE the batch was sent
// We use binary search to speed up it up
const lastIndex = findFirstHigherIndex ( this . membersSubscribeEventsCreatedAtsByNewsletterId . get ( this . model . newsletter _id ) , earliestOpenTime ) ;
2023-12-12 19:37:36 +03:00
2024-01-05 16:42:30 +03:00
this . membersList = this . membersSubscribeEventsByNewsletterId . get ( this . model . newsletter _id ) . slice ( 0 , Math . max ( 0 , lastIndex - 1 ) )
. slice ( this . batchIndex * 1000 , ( this . batchIndex + 1 ) * 1000 )
. map ( memberSubscribeEvent => memberSubscribeEvent . member _id ) ;
this . events = faker . helpers . shuffle ( this . eventCurve . slice ( 0 , this . membersList . length ) ) ;
this . eventIndex = 0 ;
2023-12-12 19:37:36 +03:00
2023-02-17 17:03:52 +03:00
this . emailMeta = {
2023-02-22 02:48:43 +03:00
// delievered and not opened
deliveredCount : this . model . delivered _count - this . model . opened _count ,
2023-02-17 17:03:52 +03:00
openedCount : this . model . opened _count ,
failedCount : this . model . failed _count
} ;
2023-12-12 19:37:36 +03:00
let offset = this . batchIndex * 1000 ;
// We always first create the failures, then the opened, then the delivered, so we need to remove those from the meta so we don't generate them multiple times
this . emailMeta = {
failedCount : Math . max ( 0 , this . emailMeta . failedCount - offset ) ,
openedCount : Math . max ( 0 , this . emailMeta . openedCount - Math . max ( 0 , offset - this . emailMeta . failedCount ) ) ,
deliveredCount : Math . max ( 0 , this . emailMeta . deliveredCount - Math . max ( 0 , offset - this . emailMeta . failedCount - this . emailMeta . openedCount ) )
} ;
2023-02-17 17:03:52 +03:00
}
generate ( ) {
2024-01-05 16:42:30 +03:00
let timestamp = this . events . pop ( ) ;
2023-02-17 17:03:52 +03:00
if ( ! timestamp ) {
return ;
}
2024-01-05 16:42:30 +03:00
// The events are generated for a different time, so we need to move them to the batch time
timestamp = new Date ( timestamp . getTime ( ) - this . eventStartTimeUsed . getTime ( ) + new Date ( this . batch . updated _at ) . getTime ( ) ) ;
if ( timestamp > new Date ( ) ) {
timestamp = new Date ( ) ;
}
const memberId = this . membersList [ this . events . length ] ;
const member = this . members . get ( memberId ) ;
2023-02-17 17:03:52 +03:00
let status = emailStatus . none ;
if ( this . emailMeta . failedCount > 0 ) {
status = emailStatus . failed ;
this . emailMeta . failedCount -= 1 ;
} else if ( this . emailMeta . openedCount > 0 ) {
status = emailStatus . opened ;
2023-02-22 02:48:43 +03:00
this . emailMeta . openedCount -= 1 ;
2023-02-17 17:03:52 +03:00
} else if ( this . emailMeta . deliveredCount > 0 ) {
status = emailStatus . delivered ;
this . emailMeta . deliveredCount -= 1 ;
}
2023-02-22 02:48:43 +03:00
let deliveredTime ;
if ( status === emailStatus . opened ) {
2024-01-05 16:42:30 +03:00
const startDate = this . batch . updated _at ;
const endDate = timestamp ;
deliveredTime = faker . date . between ( startDate , endDate ) ;
2023-02-22 02:48:43 +03:00
}
2023-02-17 17:03:52 +03:00
return {
2024-01-05 16:42:30 +03:00
// Using sorted ids are much much faster (35% as far as my testing goes) for huge imports
id : this . fastFakeObjectId ( ) ,
2023-02-17 17:03:52 +03:00
email _id : this . model . id ,
batch _id : this . batch . id ,
member _id : member . id ,
2024-01-05 16:42:30 +03:00
processed _at : dateToDatabaseString ( this . batch . updated _at ) ,
2023-02-22 02:48:43 +03:00
delivered _at : status === emailStatus . opened ? dateToDatabaseString ( deliveredTime ) : status === emailStatus . delivered ? dateToDatabaseString ( timestamp ) : null ,
2023-02-17 17:03:52 +03:00
opened _at : status === emailStatus . opened ? dateToDatabaseString ( timestamp ) : null ,
failed _at : status === emailStatus . failed ? dateToDatabaseString ( timestamp ) : null ,
member _uuid : member . uuid ,
member _email : member . email ,
member _name : member . name
} ;
}
}
module . exports = EmailRecipientsImporter ;