5015 make gmail filters work for partial sync (#5695)

Closes #5015

---------

Co-authored-by: Charles Bochet <charles@twenty.com>
This commit is contained in:
bosiraphael 2024-05-31 23:20:57 +02:00 committed by GitHub
parent c960d2e8ef
commit e0103bbcdc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 216 additions and 133 deletions

View File

@ -1,8 +0,0 @@
export const isPersonEmail = (email: string | undefined): boolean => {
if (!email) return false;
const nonPersonalPattern =
/noreply|no-reply|do_not_reply|no\.reply|^(accounts@|info@|admin@|contact@|hello@|support@|sales@|feedback@|service@|help@|mailer-daemon|notifications?|digest|auto|apps|assign|comments|customer-success|enterprise|esign|express|forum|gc@|learn|mailer|marketing|messages|news|notification|payments|receipts|recrutement|security|service|support|team)/;
return !nonPersonalPattern.test(email);
};

View File

@ -0,0 +1 @@
export const GMAIL_EXCLUDED_CATEGORIES = ['promotions', 'social', 'forums'];

View File

@ -1 +0,0 @@
export const MESSAGES_TO_DELETE_FROM_CACHE_BATCH_SIZE = 1000;

View File

@ -186,6 +186,8 @@ export class FetchMessagesByBatchesService {
const bodyData = this.getBodyData(message);
const text = bodyData ? Buffer.from(bodyData, 'base64').toString() : '';
const attachments = this.getAttachmentData(message);
return {
id,
headerMessageId: messageId,
@ -199,7 +201,7 @@ export class FetchMessagesByBatchesService {
cc: rawCc ? addressparser(rawCc) : undefined,
bcc: rawBcc ? addressparser(rawBcc) : undefined,
text,
attachments: [],
attachments,
};
}
@ -214,6 +216,19 @@ export class FetchMessagesByBatchesService {
?.body?.data;
}
private getAttachmentData(message: gmail_v1.Schema$Message) {
return (
message.payload?.parts
?.filter((part) => part.filename && part.body?.attachmentId)
.map((part) => ({
filename: part.filename || '',
id: part.body?.attachmentId || '',
mimeType: part.mimeType || '',
size: part.body?.size || 0,
})) || []
);
}
private getPropertyFromHeaders(
message: gmail_v1.Schema$Message,
property: string,

View File

@ -21,6 +21,8 @@ import {
GmailErrorHandlingService,
} from 'src/modules/messaging/services/gmail-error-handling/gmail-error-handling.service';
import { MessageChannelSyncStatusService } from 'src/modules/messaging/services/message-channel-sync-status/message-channel-sync-status.service';
import { computeGmailCategoryExcludeSearchFilter } from 'src/modules/messaging/utils/compute-gmail-category-excude-search-filter';
import { GMAIL_EXCLUDED_CATEGORIES } from 'src/modules/messaging/constants/gmail-excluded-categories';
@Injectable()
export class GmailFullMessageListFetchV2Service {
@ -97,6 +99,7 @@ export class GmailFullMessageListFetchV2Service {
userId: 'me',
maxResults: GMAIL_USERS_MESSAGES_LIST_MAX_RESULT,
pageToken,
q: computeGmailCategoryExcludeSearchFilter(GMAIL_EXCLUDED_CATEGORIES),
});
} catch (error) {
return {

View File

@ -26,7 +26,8 @@ import {
MessageChannelSyncStatus,
} from 'src/modules/messaging/standard-objects/message-channel.workspace-entity';
import { WorkspaceDataSourceService } from 'src/engine/workspace-datasource/workspace-datasource.service';
import { gmailSearchFilterEmailAdresses } from 'src/modules/messaging/utils/gmail-search-filter.util';
import { computeGmailCategoryExcludeSearchFilter } from 'src/modules/messaging/utils/compute-gmail-category-excude-search-filter';
import { GMAIL_EXCLUDED_CATEGORIES } from 'src/modules/messaging/constants/gmail-excluded-categories';
@Injectable()
export class GmailFullMessageListFetchService {
@ -167,7 +168,7 @@ export class GmailFullMessageListFetchService {
userId: 'me',
maxResults: GMAIL_USERS_MESSAGES_LIST_MAX_RESULT,
pageToken,
q: gmailSearchFilterEmailAdresses(includedEmails, blocklistedEmails),
q: computeGmailCategoryExcludeSearchFilter(GMAIL_EXCLUDED_CATEGORIES),
});
if (response.data?.messages) {

View File

@ -18,6 +18,10 @@ import { MessageChannelSyncStatusService } from 'src/modules/messaging/services/
import { GoogleAPIRefreshAccessTokenService } from 'src/modules/connected-account/services/google-api-refresh-access-token/google-api-refresh-access-token.service';
import { GmailMessagesImportService } from 'src/modules/messaging/services/gmail-messages-import/gmail-messages-import.service';
import { MessagingTelemetryService } from 'src/modules/messaging/services/telemetry/messaging-telemetry.service';
import { BlocklistWorkspaceEntity } from 'src/modules/connected-account/standard-objects/blocklist.workspace-entity';
import { InjectObjectMetadataRepository } from 'src/engine/object-metadata-repository/object-metadata-repository.decorator';
import { BlocklistRepository } from 'src/modules/connected-account/repositories/blocklist.repository';
import { filterEmails } from 'src/modules/messaging/services/utils/filter-emails.util';
@Injectable()
export class GmailMessagesImportV2Service {
@ -32,6 +36,8 @@ export class GmailMessagesImportV2Service {
private readonly gmailErrorHandlingService: GmailErrorHandlingService,
private readonly googleAPIsRefreshAccessTokenService: GoogleAPIRefreshAccessTokenService,
private readonly messagingTelemetryService: MessagingTelemetryService,
@InjectObjectMetadataRepository(BlocklistWorkspaceEntity)
private readonly blocklistRepository: BlocklistRepository,
) {}
async processMessageBatchImport(
@ -88,7 +94,7 @@ export class GmailMessagesImportV2Service {
const messageQueries = createQueriesFromMessageIds(messageIdsToFetch);
try {
const messagesToSave =
const allMessages =
await this.fetchMessagesByBatchesService.fetchAllMessages(
messageQueries,
connectedAccount.accessToken,
@ -96,6 +102,16 @@ export class GmailMessagesImportV2Service {
connectedAccount.id,
);
const blocklist = await this.blocklistRepository.getByWorkspaceMemberId(
connectedAccount.accountOwnerId,
workspaceId,
);
const messagesToSave = filterEmails(
allMessages,
blocklist.map((blocklistItem) => blocklistItem.handle),
);
if (!messagesToSave.length) {
await this.messageChannelSyncStatusService.markAsCompletedAndSchedulePartialMessageListFetch(
messageChannel.id,

View File

@ -0,0 +1,46 @@
import { Injectable } from '@nestjs/common';
import { gmail_v1 } from 'googleapis';
import { GMAIL_EXCLUDED_CATEGORIES } from 'src/modules/messaging/constants/gmail-excluded-categories';
import { GmailGetHistoryService } from 'src/modules/messaging/services/gmail-partial-message-list-fetch/gmail-get-history.service';
import { computeGmailCategoryLabelId } from 'src/modules/messaging/utils/compute-gmail-category-label-id';
import { assertNotNull } from 'src/utils/assert';
@Injectable()
export class GmailFetchMessageIdsToExcludeService {
constructor(
private readonly gmailGetHistoryService: GmailGetHistoryService,
) {}
public async fetchEmailIdsToExcludeOrThrow(
gmailClient: gmail_v1.Gmail,
lastSyncHistoryId: string,
): Promise<string[]> {
const emailIds: string[] = [];
for (const category of GMAIL_EXCLUDED_CATEGORIES) {
const { history, error } = await this.gmailGetHistoryService.getHistory(
gmailClient,
lastSyncHistoryId,
['messageAdded'],
computeGmailCategoryLabelId(category),
);
if (error) {
throw error;
}
const emailIdsFromCategory = history
.map((history) => history.messagesAdded)
.flat()
.map((message) => message?.message?.id)
.filter((id) => id)
.filter(assertNotNull);
emailIds.push(...emailIdsFromCategory);
}
return emailIds;
}
}

View File

@ -13,6 +13,8 @@ export class GmailGetHistoryService {
public async getHistory(
gmailClient: gmail_v1.Gmail,
lastSyncHistoryId: string,
historyTypes?: ('messageAdded' | 'messageDeleted')[],
labelId?: string,
): Promise<{
history: gmail_v1.Schema$History[];
historyId?: string | null;
@ -31,7 +33,8 @@ export class GmailGetHistoryService {
maxResults: GMAIL_USERS_HISTORY_MAX_RESULT,
pageToken,
startHistoryId: lastSyncHistoryId,
historyTypes: ['messageAdded', 'messageDeleted'],
historyTypes: historyTypes || ['messageAdded', 'messageDeleted'],
labelId,
});
} catch (error) {
return {

View File

@ -16,6 +16,7 @@ import { GmailGetHistoryService } from 'src/modules/messaging/services/gmail-par
import { ObjectRecord } from 'src/engine/workspace-manager/workspace-sync-metadata/types/object-record';
import { GmailErrorHandlingService } from 'src/modules/messaging/services/gmail-error-handling/gmail-error-handling.service';
import { MessageChannelSyncStatusService } from 'src/modules/messaging/services/message-channel-sync-status/message-channel-sync-status.service';
import { GmailFetchMessageIdsToExcludeService } from 'src/modules/messaging/services/gmail-partial-message-list-fetch/gmail-fetch-messages-ids-to-exclude.service';
@Injectable()
export class GmailPartialMessageListFetchV2Service {
@ -36,6 +37,7 @@ export class GmailPartialMessageListFetchV2Service {
private readonly gmailErrorHandlingService: GmailErrorHandlingService,
private readonly gmailGetHistoryService: GmailGetHistoryService,
private readonly messageChannelSyncStatusService: MessageChannelSyncStatusService,
private readonly gmailFetchMessageIdsToExcludeService: GmailFetchMessageIdsToExcludeService,
) {}
public async processMessageListFetch(
@ -94,13 +96,36 @@ export class GmailPartialMessageListFetchV2Service {
const { messagesAdded, messagesDeleted } =
await this.gmailGetHistoryService.getMessageIdsFromHistory(history);
let messageIdsToFilter: string[] = [];
try {
messageIdsToFilter =
await this.gmailFetchMessageIdsToExcludeService.fetchEmailIdsToExcludeOrThrow(
gmailClient,
lastSyncHistoryId,
);
} catch (error) {
await this.gmailErrorHandlingService.handleGmailError(
error,
'partial-message-list-fetch',
messageChannel,
workspaceId,
);
return;
}
const messagesAddedFiltered = messagesAdded.filter(
(messageId) => !messageIdsToFilter.includes(messageId),
);
await this.cacheStorage.setAdd(
`messages-to-import:${workspaceId}:gmail:${messageChannel.id}`,
messagesAdded,
messagesAddedFiltered,
);
this.logger.log(
`Added ${messagesAdded.length} messages to import for workspace ${workspaceId} and account ${connectedAccount.id}`,
`Added ${messagesAddedFiltered.length} messages to import for workspace ${workspaceId} and account ${connectedAccount.id}`,
);
await this.messageChannelMessageAssociationRepository.deleteByMessageExternalIdsAndMessageChannelId(

View File

@ -8,6 +8,7 @@ import { BlocklistWorkspaceEntity } from 'src/modules/connected-account/standard
import { ConnectedAccountWorkspaceEntity } from 'src/modules/connected-account/standard-objects/connected-account.workspace-entity';
import { FetchMessagesByBatchesModule } from 'src/modules/messaging/services/fetch-messages-by-batches/fetch-messages-by-batches.module';
import { GmailErrorHandlingModule } from 'src/modules/messaging/services/gmail-error-handling/gmail-error-handling.module';
import { GmailFetchMessageIdsToExcludeService } from 'src/modules/messaging/services/gmail-partial-message-list-fetch/gmail-fetch-messages-ids-to-exclude.service';
import { GmailGetHistoryService } from 'src/modules/messaging/services/gmail-partial-message-list-fetch/gmail-get-history.service';
import { GmailPartialMessageListFetchV2Service } from 'src/modules/messaging/services/gmail-partial-message-list-fetch/gmail-partial-message-list-fetch-v2.service';
import { GmailPartialMessageListFetchService } from 'src/modules/messaging/services/gmail-partial-message-list-fetch/gmail-partial-message-list-fetch.service';
@ -35,6 +36,7 @@ import { MessageChannelWorkspaceEntity } from 'src/modules/messaging/standard-ob
GmailPartialMessageListFetchService,
GmailPartialMessageListFetchV2Service,
GmailGetHistoryService,
GmailFetchMessageIdsToExcludeService,
],
exports: [
GmailPartialMessageListFetchService,

View File

@ -0,0 +1,56 @@
import { GmailMessage } from 'src/modules/messaging/types/gmail-message';
import { isEmailBlocklisted } from 'src/modules/calendar-messaging-participant/utils/is-email-blocklisted.util';
// Todo: refactor this into several utils
export const filterEmails = (messages: GmailMessage[], blocklist: string[]) => {
return filterOutBlocklistedMessages(
filterOutIcsAttachments(filterOutNonPersonalEmails(messages)),
blocklist,
);
};
const filterOutBlocklistedMessages = (
messages: GmailMessage[],
blocklist: string[],
) => {
return messages.filter((message) => {
if (!message.participants) {
return true;
}
return message.participants.every(
(participant) => !isEmailBlocklisted(participant.handle, blocklist),
);
});
};
const filterOutIcsAttachments = (messages: GmailMessage[]) => {
return messages.filter((message) => {
if (!message.attachments) {
return true;
}
return message.attachments.every(
(attachment) => !attachment.filename.endsWith('.ics'),
);
});
};
const isPersonEmail = (email: string): boolean => {
const nonPersonalPattern =
/noreply|no-reply|do_not_reply|no\.reply|^(info@|contact@|hello@|support@|feedback@|service@|help@|invites@|invite@|welcome@|alerts@|team@)/;
return !nonPersonalPattern.test(email);
};
const filterOutNonPersonalEmails = (messages: GmailMessage[]) => {
return messages.filter((message) => {
if (!message.participants) {
return true;
}
return message.participants.every((participant) =>
isPersonEmail(participant.handle),
);
});
};

View File

@ -0,0 +1,27 @@
import { computeGmailCategoryExcludeSearchFilter } from 'src/modules/messaging/utils/compute-gmail-category-excude-search-filter';
describe('computeGmailCategoryExcludeSearchFilter', () => {
it('should return correct exclude search filter with empty category array', () => {
const result = computeGmailCategoryExcludeSearchFilter([]);
expect(result).toBe('');
});
it('should return correct exclude search filter with one category', () => {
const result = computeGmailCategoryExcludeSearchFilter(['CATEGORY1']);
expect(result).toBe('-category:CATEGORY1');
});
it('should return correct exclude search filter with multiple categories', () => {
const result = computeGmailCategoryExcludeSearchFilter([
'CATEGORY1',
'CATEGORY2',
'CATEGORY3',
]);
expect(result).toBe(
'-category:CATEGORY1 -category:CATEGORY2 -category:CATEGORY3',
);
});
});

View File

@ -0,0 +1,9 @@
import { computeGmailCategoryLabelId } from 'src/modules/messaging/utils/compute-gmail-category-label-id';
describe('computeGmailCategoryLabelId', () => {
it('should return correct category label id', () => {
const result = computeGmailCategoryLabelId('CATEGORY1');
expect(result).toBe('CATEGORY_CATEGORY1');
});
});

View File

@ -1,58 +0,0 @@
import {
excludedCategoriesAndFileTypesString,
gmailSearchFilterEmailAdresses,
gmailSearchFilterExcludeEmailAdresses,
gmailSearchFilterIncludeOnlyEmailAdresses,
gmailSearchFilterNonPersonalEmails,
} from 'src/modules/messaging/utils/gmail-search-filter.util';
describe('gmailSearchFilterExcludeEmailAdresses', () => {
it('should return correct search filter for excluding emails', () => {
const emails = ['hello@twenty.com', 'hey@twenty.com'];
const result = gmailSearchFilterExcludeEmailAdresses(emails);
expect(result).toBe(
`(in:inbox from:-(${gmailSearchFilterNonPersonalEmails}|hello@twenty.com|hey@twenty.com)|(in:sent to:-(${gmailSearchFilterNonPersonalEmails}|hello@twenty.com|hey@twenty.com)) ${excludedCategoriesAndFileTypesString}`,
);
});
it('should return correct search filter for excluding emails when no emails are provided', () => {
const result = gmailSearchFilterExcludeEmailAdresses();
expect(result).toBe(
`from:-(${gmailSearchFilterNonPersonalEmails}) ${excludedCategoriesAndFileTypesString}`,
);
});
});
describe('gmailSearchFilterIncludeOnlyEmailAdresses', () => {
it('should return correct search filter for including emails', () => {
const emails = ['hello@twenty.com', 'hey@twenty.com'];
const result = gmailSearchFilterIncludeOnlyEmailAdresses(emails);
expect(result).toBe(
`(in:inbox from:(hello@twenty.com|hey@twenty.com)|(in:sent to:(hello@twenty.com|hey@twenty.com)) ${excludedCategoriesAndFileTypesString}`,
);
});
it('should return undefined when no emails are provided', () => {
const result = gmailSearchFilterIncludeOnlyEmailAdresses();
expect(result).toBe(undefined);
});
});
describe('gmailSearchFilterEmailAdresses', () => {
it('should return correct search filter for including emails and excluding emails', () => {
const includedEmails = ['hello@twenty.com', 'hey@twenty.com'];
const excludedEmails = ['noreply@twenty.com', 'no-reply@twenty.com'];
const result = gmailSearchFilterEmailAdresses(
includedEmails,
excludedEmails,
);
expect(result).toBe(
`(in:inbox from:((hello@twenty.com|hey@twenty.com) -(${gmailSearchFilterNonPersonalEmails}|noreply@twenty.com|no-reply@twenty.com))|(in:sent to:((hello@twenty.com|hey@twenty.com) -(${gmailSearchFilterNonPersonalEmails}|noreply@twenty.com|no-reply@twenty.com)) ${excludedCategoriesAndFileTypesString}`,
);
});
});

View File

@ -0,0 +1,3 @@
export const computeGmailCategoryExcludeSearchFilter = (
excludedCategories: string[],
) => excludedCategories.map((category) => `-category:${category}`).join(' ');

View File

@ -0,0 +1,2 @@
export const computeGmailCategoryLabelId = (category: string) =>
`CATEGORY_${category.toUpperCase()}`;

View File

@ -1,59 +0,0 @@
export const gmailSearchFilterNonPersonalEmails =
'*noreply@|*no-reply@|*do_not_reply@|*no.reply@|*info@|*contact@|*hello@|*support@|*feedback@|*service@|*help@';
export const excludedCategories = ['promotions', 'social', 'forums'];
export const excludedFileTypes = ['.ics'];
export const excludedCategoriesAndFileTypesString = `-category:${excludedCategories.join(
' -category:',
)} -filename:${excludedFileTypes.join(' -filename:')}`;
export const gmailSearchFilterExcludeEmailAdresses = (
emails?: string[],
): string => {
if (!emails || emails.length === 0) {
return `from:-(${gmailSearchFilterNonPersonalEmails}) ${excludedCategoriesAndFileTypesString}`;
}
return `(in:inbox from:-(${gmailSearchFilterNonPersonalEmails}|${emails.join(
'|',
)})|(in:sent to:-(${gmailSearchFilterNonPersonalEmails}|${emails.join(
'|',
)})) ${excludedCategoriesAndFileTypesString}`;
};
export const gmailSearchFilterIncludeOnlyEmailAdresses = (
emails?: string[],
): string | undefined => {
if (!emails || emails.length === 0) {
return undefined;
}
return `(in:inbox from:(${emails.join('|')})|(in:sent to:(${emails.join(
'|',
)})) ${excludedCategoriesAndFileTypesString}`;
};
export const gmailSearchFilterEmailAdresses = (
includedEmails?: string[] | undefined,
excludedEmails?: string[] | undefined,
): string | undefined => {
if (!includedEmails || includedEmails.length === 0) {
return gmailSearchFilterExcludeEmailAdresses(excludedEmails);
}
if (!excludedEmails || excludedEmails.length === 0) {
return gmailSearchFilterIncludeOnlyEmailAdresses(includedEmails);
}
return `(in:inbox from:((${includedEmails.join(
'|',
)}) -(${gmailSearchFilterNonPersonalEmails}|${excludedEmails.join(
'|',
)}))|(in:sent to:((${includedEmails.join(
'|',
)}) -(${gmailSearchFilterNonPersonalEmails}|${excludedEmails.join(
'|',
)})) ${excludedCategoriesAndFileTypesString}`;
};