fix(messaging): fix reply-quotation stripping that emptied email bodies (#21118)
some synced messages were stored with empty bodies, others with the entire reply thread re-quoted, planer was stripping entirely quoted forwards down to nothing and not trimming inline reply history at all switched plaintext quote stripping to `email-reply-parser`, falling back to the full text when it strips everything so forwards don't end up blank. kept planer for the html path, and normalized body whitespac --------- Co-authored-by: prastoin <paul@twenty.com>
This commit is contained in:
@@ -17,7 +17,7 @@ const jestConfig = {
|
||||
testEnvironment: 'node',
|
||||
setupFilesAfterEnv: ['./setupTests.ts'],
|
||||
transformIgnorePatterns: [
|
||||
'/node_modules/(?!(file-type|@file-type|strtok3|token-types|@borewit|@tokenizer|uint8array-extras|read-next-line|digest-fetch|md5|js-sha256|js-sha512|base-64|charenc|crypt)/)',
|
||||
'/node_modules/(?!(file-type|@file-type|strtok3|token-types|@borewit|@tokenizer|uint8array-extras|read-next-line|digest-fetch|md5|js-sha256|js-sha512|base-64|charenc|crypt|email-reply-parser)/)',
|
||||
],
|
||||
testRegex: '.*\\.spec\\.ts$',
|
||||
transform: {
|
||||
|
||||
@@ -97,6 +97,7 @@
|
||||
"digest-fetch": "^3.1.1",
|
||||
"dompurify": "3.3.3",
|
||||
"dotenv": "16.4.5",
|
||||
"email-reply-parser": "^2.3.5",
|
||||
"express": "4.22.1",
|
||||
"express-session": "^1.18.2",
|
||||
"file-type": "^21.3.1",
|
||||
|
||||
+33
@@ -99,6 +99,39 @@ describe('parseAndFormatGmailMessage', () => {
|
||||
expect(result?.direction).toBe(MessageDirection.OUTGOING);
|
||||
});
|
||||
|
||||
it('should keep the body of an entirely-quoted forwarded message instead of emptying it', () => {
|
||||
// Regression: planer stripped the whole forwarded body, persisting text=''.
|
||||
const forwardedBody =
|
||||
'> quoted line one\n> quoted line two\n> quoted line three';
|
||||
|
||||
const result = parseAndFormatGmailMessage(
|
||||
buildMessage(
|
||||
[
|
||||
{ name: 'From', value: 'sender@example.com' },
|
||||
{ name: 'To', value: 'me@example.com' },
|
||||
{ name: 'Message-ID', value: '<abc@example.com>' },
|
||||
],
|
||||
{
|
||||
payload: {
|
||||
headers: [
|
||||
{ name: 'From', value: 'sender@example.com' },
|
||||
{ name: 'To', value: 'me@example.com' },
|
||||
{ name: 'Message-ID', value: '<abc@example.com>' },
|
||||
],
|
||||
mimeType: 'text/plain',
|
||||
body: {
|
||||
data: Buffer.from(forwardedBody).toString('base64'),
|
||||
size: forwardedBody.length,
|
||||
},
|
||||
},
|
||||
},
|
||||
),
|
||||
connectedAccount,
|
||||
);
|
||||
|
||||
expect(result?.text).toBe(forwardedBody);
|
||||
});
|
||||
|
||||
it('should return null when required headers (`From`, `Message-ID`) are missing', () => {
|
||||
const result = parseAndFormatGmailMessage(
|
||||
buildMessage([{ name: 'To', value: 'alice@example.com' }]),
|
||||
|
||||
+4
-8
@@ -1,5 +1,4 @@
|
||||
import { type gmail_v1 as gmailV1 } from 'googleapis';
|
||||
import planer from 'planer';
|
||||
import { MessageParticipantRole } from 'twenty-shared/types';
|
||||
import { isNonEmptyString } from '@sniptt/guards';
|
||||
import { isDefined, isNonEmptyArray } from 'twenty-shared/utils';
|
||||
@@ -8,8 +7,8 @@ import { type ConnectedAccountEntity } from 'src/engine/metadata-modules/connect
|
||||
import { computeMessageDirection } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/compute-message-direction.util';
|
||||
import { parseGmailMessage } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/parse-gmail-message.util';
|
||||
import { type MessageWithParticipants } from 'src/modules/messaging/message-import-manager/types/message';
|
||||
import { extractMessageBodyText } from 'src/modules/messaging/message-import-manager/utils/extract-message-body-text.util';
|
||||
import { formatAddressObjectAsParticipants } from 'src/modules/messaging/message-import-manager/utils/format-address-object-as-participants.util';
|
||||
import { sanitizeString } from 'src/modules/messaging/message-import-manager/utils/sanitize-string.util';
|
||||
|
||||
export const parseAndFormatGmailMessage = (
|
||||
message: gmailV1.Schema$Message,
|
||||
@@ -25,7 +24,8 @@ export const parseAndFormatGmailMessage = (
|
||||
cc,
|
||||
bcc,
|
||||
headerMessageId,
|
||||
text,
|
||||
body,
|
||||
isHtml,
|
||||
attachments,
|
||||
deliveredTo,
|
||||
labelIds,
|
||||
@@ -59,10 +59,6 @@ export const parseAndFormatGmailMessage = (
|
||||
return null;
|
||||
}
|
||||
|
||||
const textWithoutReplyQuotations = text
|
||||
? planer.extractFrom(text, 'text/plain')
|
||||
: '';
|
||||
|
||||
return {
|
||||
externalId: id,
|
||||
headerMessageId,
|
||||
@@ -71,7 +67,7 @@ export const parseAndFormatGmailMessage = (
|
||||
receivedAt: new Date(parseInt(internalDate)),
|
||||
direction: computeMessageDirection(from.address || '', connectedAccount),
|
||||
participants,
|
||||
text: sanitizeString(textWithoutReplyQuotations),
|
||||
text: extractMessageBodyText(isHtml ? { html: body } : { text: body }),
|
||||
attachments,
|
||||
messageFolderExternalIds: labelIds,
|
||||
labelIds,
|
||||
|
||||
+3
-6
@@ -5,7 +5,6 @@ import { type gmail_v1 } from 'googleapis';
|
||||
import { getAttachmentData } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/get-attachment-data.util';
|
||||
import { getBodyData } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/get-body-data.util';
|
||||
import { getPropertyFromHeaders } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/get-property-from-headers.util';
|
||||
import { createHtmlToTextConverter } from 'src/modules/messaging/message-import-manager/utils/create-html-to-text-converter.util';
|
||||
import { safeParseEmailAddressAddress } from 'src/modules/messaging/message-import-manager/utils/safe-parse-email-address-address.util';
|
||||
import { safeParseEmailAddresses } from 'src/modules/messaging/message-import-manager/utils/safe-parse-email-addresses.util';
|
||||
|
||||
@@ -31,10 +30,7 @@ export const parseGmailMessage = (message: gmail_v1.Schema$Message) => {
|
||||
const decodedBody = bodyResult
|
||||
? Buffer.from(bodyResult.data, 'base64').toString()
|
||||
: '';
|
||||
|
||||
const text = bodyResult?.isHtml
|
||||
? createHtmlToTextConverter()(decodedBody)
|
||||
: decodedBody;
|
||||
const isHtml = bodyResult?.isHtml ?? false;
|
||||
|
||||
const attachments = getAttachmentData(message);
|
||||
|
||||
@@ -52,7 +48,8 @@ export const parseGmailMessage = (message: gmail_v1.Schema$Message) => {
|
||||
to: rawTo ? safeParseEmailAddresses(rawTo) : [],
|
||||
cc: rawCc ? safeParseEmailAddresses(rawCc) : [],
|
||||
bcc: rawBcc ? safeParseEmailAddresses(rawBcc) : [],
|
||||
text,
|
||||
body: decodedBody,
|
||||
isHtml,
|
||||
attachments,
|
||||
labelIds,
|
||||
};
|
||||
|
||||
-2
@@ -19,7 +19,6 @@ import { ImapGetMessageListService } from 'src/modules/messaging/message-import-
|
||||
import { ImapGetMessagesService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-get-messages.service';
|
||||
import { ImapMessageListFetchErrorHandler } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-list-fetch-error-handler.service';
|
||||
import { ImapMessageParserService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-parser.service';
|
||||
import { ImapMessageTextExtractorService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-text-extractor.service';
|
||||
import { ImapMessagesImportErrorHandler } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-messages-import-error-handler.service';
|
||||
import { ImapSyncService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-sync.service';
|
||||
import { MessageParticipantManagerModule } from 'src/modules/messaging/message-participant-manager/message-participant-manager.module';
|
||||
@@ -47,7 +46,6 @@ import { MessageParticipantManagerModule } from 'src/modules/messaging/message-p
|
||||
ImapMessageParserService,
|
||||
ImapFindDraftsFolderService,
|
||||
ImapFindSentFolderService,
|
||||
ImapMessageTextExtractorService,
|
||||
],
|
||||
exports: [
|
||||
ImapGetMessagesService,
|
||||
|
||||
+5
-5
@@ -7,11 +7,11 @@ import { type ConnectedAccountEntity } from 'src/engine/metadata-modules/connect
|
||||
import { computeMessageDirection } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/compute-message-direction.util';
|
||||
import { ImapClientProvider } from 'src/modules/messaging/message-import-manager/drivers/imap/providers/imap-client.provider';
|
||||
import { ImapMessageParserService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-parser.service';
|
||||
import { ImapMessageTextExtractorService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-text-extractor.service';
|
||||
import { ImapMessagesImportErrorHandler } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-messages-import-error-handler.service';
|
||||
import { parseMessageId } from 'src/modules/messaging/message-import-manager/drivers/imap/utils/parse-message-id.util';
|
||||
import { type MessageWithParticipants } from 'src/modules/messaging/message-import-manager/types/message';
|
||||
import { extractAddressesFromParsedEmail } from 'src/modules/messaging/message-import-manager/utils/extract-addresses-from-parsed-email.util';
|
||||
import { extractMessageBodyText } from 'src/modules/messaging/message-import-manager/utils/extract-message-body-text.util';
|
||||
import { extractParticipantsFromParsedEmail } from 'src/modules/messaging/message-import-manager/utils/extract-participants-from-parsed-email.util';
|
||||
import { extractThreadIdFromParsedEmail } from 'src/modules/messaging/message-import-manager/utils/extract-thread-id-from-parsed-email.util';
|
||||
import { sanitizeString } from 'src/modules/messaging/message-import-manager/utils/sanitize-string.util';
|
||||
@@ -28,7 +28,6 @@ export class ImapGetMessagesService {
|
||||
constructor(
|
||||
private readonly imapClientProvider: ImapClientProvider,
|
||||
private readonly messageParser: ImapMessageParserService,
|
||||
private readonly textExtractor: ImapMessageTextExtractorService,
|
||||
private readonly errorHandler: ImapMessagesImportErrorHandler,
|
||||
) {}
|
||||
|
||||
@@ -167,9 +166,10 @@ export class ImapGetMessagesService {
|
||||
const fromAddresses = extractAddressesFromParsedEmail(parsed.from);
|
||||
const senderAddress = fromAddresses[0]?.address ?? '';
|
||||
|
||||
const text = sanitizeString(
|
||||
this.textExtractor.extractTextWithoutReplyQuotations(parsed),
|
||||
);
|
||||
const text = extractMessageBodyText({
|
||||
text: parsed.text,
|
||||
html: parsed.html,
|
||||
});
|
||||
|
||||
return {
|
||||
externalId: `${folderPath}:${uid}`,
|
||||
|
||||
-26
@@ -1,26 +0,0 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
|
||||
import * as planer from 'planer';
|
||||
import { safeDecodeURIComponent } from 'twenty-shared/utils';
|
||||
import { type Email as ParsedEmail } from 'postal-mime';
|
||||
|
||||
import { createHtmlToTextConverter } from 'src/modules/messaging/message-import-manager/utils/create-html-to-text-converter.util';
|
||||
|
||||
@Injectable()
|
||||
export class ImapMessageTextExtractorService {
|
||||
private readonly convertHtmlToText = createHtmlToTextConverter();
|
||||
|
||||
extractTextWithoutReplyQuotations(parsed: ParsedEmail): string {
|
||||
if (parsed.text) {
|
||||
const extractedText = planer.extractFrom(parsed.text, 'text/plain');
|
||||
|
||||
return safeDecodeURIComponent(extractedText);
|
||||
}
|
||||
|
||||
if (parsed.html) {
|
||||
return safeDecodeURIComponent(this.convertHtmlToText(parsed.html));
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
}
|
||||
+2
-2
@@ -152,7 +152,7 @@ describe('Microsoft get messages service', () => {
|
||||
});
|
||||
});
|
||||
|
||||
it('Should set empty text for html responses', () => {
|
||||
it('Should convert html responses to text', () => {
|
||||
const batchResponses: MicrosoftGraphBatchResponse[] =
|
||||
microsoftGraphBatchWithHtmlMessagesResponse;
|
||||
const connectedAccount = {
|
||||
@@ -176,7 +176,7 @@ describe('Microsoft get messages service', () => {
|
||||
externalId: responseExample.body.id,
|
||||
subject: responseExample.body.subject,
|
||||
receivedAt: new Date(responseExample.body.receivedDateTime),
|
||||
text: '',
|
||||
text: 'test 4',
|
||||
headerMessageId: responseExample.body.internetMessageId,
|
||||
messageThreadExternalId: responseExample.body.conversationId,
|
||||
direction: 'OUTGOING',
|
||||
|
||||
+8
-2
@@ -10,6 +10,7 @@ import { computeMessageDirection } from 'src/modules/messaging/message-import-ma
|
||||
import { MicrosoftImportDriverException } from 'src/modules/messaging/message-import-manager/drivers/microsoft/exceptions/microsoft-import-driver.exception';
|
||||
import { type MicrosoftGraphBatchResponse } from 'src/modules/messaging/message-import-manager/drivers/microsoft/services/microsoft-get-messages.interface';
|
||||
import { type MessageWithParticipants } from 'src/modules/messaging/message-import-manager/types/message';
|
||||
import { extractMessageBodyText } from 'src/modules/messaging/message-import-manager/utils/extract-message-body-text.util';
|
||||
import { formatAddressObjectAsParticipants } from 'src/modules/messaging/message-import-manager/utils/format-address-object-as-participants.util';
|
||||
import { safeParseEmailAddress } from 'src/modules/messaging/message-import-manager/utils/safe-parse-email-address.util';
|
||||
|
||||
@@ -130,12 +131,17 @@ export class MicrosoftGetMessagesService {
|
||||
: []),
|
||||
];
|
||||
|
||||
const text = extractMessageBodyText(
|
||||
response.body?.contentType === 'text'
|
||||
? { text: response.body?.content }
|
||||
: { html: response.body?.content },
|
||||
);
|
||||
|
||||
return {
|
||||
externalId: response.id,
|
||||
subject: response.subject || '',
|
||||
receivedAt: new Date(response.receivedDateTime),
|
||||
text:
|
||||
response.body?.contentType === 'text' ? response.body?.content : '',
|
||||
text,
|
||||
headerMessageId: response.internetMessageId,
|
||||
messageThreadExternalId: response.conversationId,
|
||||
direction: response.from
|
||||
|
||||
+9
@@ -22,6 +22,15 @@ describe('createHtmlToTextConverter', () => {
|
||||
expect(convertHtmlToText('')).toBe('');
|
||||
});
|
||||
|
||||
it('should keep the body when planer strips an entirely-quoted message to empty', () => {
|
||||
// Regression: forwarded/fully-quoted html was stripped to empty by planer.
|
||||
const result = convertHtmlToText(
|
||||
'<div class="gmail_quote"><p>Only quoted content here</p></div>',
|
||||
);
|
||||
|
||||
expect(result).toBe('Only quoted content here');
|
||||
});
|
||||
|
||||
it('should sanitize malicious HTML', () => {
|
||||
const result = convertHtmlToText(
|
||||
'<p>Hello</p><script>alert("xss")</script>',
|
||||
|
||||
+49
-19
@@ -1,14 +1,8 @@
|
||||
import { type Email as ParsedMail } from 'postal-mime';
|
||||
|
||||
import { ImapMessageTextExtractorService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-text-extractor.service';
|
||||
|
||||
describe('ImapMessageTextExtractorService', () => {
|
||||
let service: ImapMessageTextExtractorService;
|
||||
|
||||
beforeEach(() => {
|
||||
service = new ImapMessageTextExtractorService();
|
||||
});
|
||||
import { extractMessageBodyText } from 'src/modules/messaging/message-import-manager/utils/extract-message-body-text.util';
|
||||
|
||||
describe('extractMessageBodyText', () => {
|
||||
it('should extract text from plain text emails with lot of reply quotations', () => {
|
||||
const parsed: ParsedMail = {
|
||||
text: `Hi John,
|
||||
@@ -101,21 +95,24 @@ Developer Support
|
||||
headerLines: [],
|
||||
};
|
||||
|
||||
const result = service.extractTextWithoutReplyQuotations(parsed);
|
||||
const result = extractMessageBodyText({
|
||||
text: parsed.text,
|
||||
html: parsed.html,
|
||||
});
|
||||
|
||||
expect(result).toBe(`Hi John,
|
||||
|
||||
Thank you for contacting Developer Support, this is Erica again. I hope you are having a good day.
|
||||
Thank you for contacting Developer Support, this is Erica again. I hope you are having a good day.
|
||||
|
||||
I understand that you are unable to contact finance. Despite your account being expired, you should still be able to contact our finance team.
|
||||
I understand that you are unable to contact finance. Despite your account being expired, you should still be able to contact our finance team.
|
||||
|
||||
Follow the link below the link for contacting our finance team.
|
||||
Follow the link below the link for contacting our finance team.
|
||||
|
||||
https://idmsa.apple.com/IDMSWebAuth/signin.html?path=/contact/finance/
|
||||
|
||||
Best Regards,
|
||||
|
||||
Erica
|
||||
Erica
|
||||
Developer Support`);
|
||||
});
|
||||
|
||||
@@ -136,7 +133,10 @@ Developer Support`);
|
||||
headerLines: [],
|
||||
};
|
||||
|
||||
const result = service.extractTextWithoutReplyQuotations(parsed);
|
||||
const result = extractMessageBodyText({
|
||||
text: parsed.text,
|
||||
html: parsed.html,
|
||||
});
|
||||
|
||||
expect(result).toBe('just a follow up');
|
||||
});
|
||||
@@ -158,7 +158,10 @@ Developer Support`);
|
||||
headerLines: [],
|
||||
};
|
||||
|
||||
const result = service.extractTextWithoutReplyQuotations(parsed);
|
||||
const result = extractMessageBodyText({
|
||||
text: parsed.text,
|
||||
html: parsed.html,
|
||||
});
|
||||
|
||||
expect(result).toBe('just a follow up');
|
||||
});
|
||||
@@ -170,7 +173,10 @@ Developer Support`);
|
||||
headerLines: [],
|
||||
};
|
||||
|
||||
const result = service.extractTextWithoutReplyQuotations(parsed);
|
||||
const result = extractMessageBodyText({
|
||||
text: parsed.text,
|
||||
html: parsed.html,
|
||||
});
|
||||
|
||||
expect(result).toBe('');
|
||||
});
|
||||
@@ -298,10 +304,13 @@ Developer Support`);
|
||||
</style></head><body><div id="inbox-html-wrapper"><div id="isPasted" fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">Hi Sarah,</div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"><br fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"></div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">I wanted to quickly follow up regarding the Q3 marketing campaign results. </div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">We’ve seen a 14% increase in engagement compared to last quarter, but conversions are still slightly below target. </div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"><br fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"></div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">Let’s schedule a short call early next week to discuss adjustments before the Q4 push. </div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">Would Monday 10 AM work for you?</div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"><br fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"></div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">Best regards, </div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">John</div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"><br fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"></div><img class="flm-open" width="0" height="0" style="border: 0px; width: 0px; height: 0px; max-width: 100vw;" data-open-tracking-src="{{track-read-receipt}}"></div></body></html>`,
|
||||
};
|
||||
|
||||
const result = service.extractTextWithoutReplyQuotations(parsed);
|
||||
const result = extractMessageBodyText({
|
||||
text: parsed.text,
|
||||
html: parsed.html,
|
||||
});
|
||||
|
||||
expect(result).toEqual(
|
||||
`Hi Sarah,\n\nI wanted to quickly follow up regarding the Q3 marketing campaign results. \nWe’ve seen a 14% increase in engagement compared to last quarter, but conversions are still slightly below target. \n\nLet’s schedule a short call early next week to discuss adjustments before the Q4 push. \nWould Monday 10 AM work for you?\n\nBest regards, \nJohn`,
|
||||
`Hi Sarah,\n\nI wanted to quickly follow up regarding the Q3 marketing campaign results.\nWe’ve seen a 14% increase in engagement compared to last quarter, but conversions are still slightly below target.\n\nLet’s schedule a short call early next week to discuss adjustments before the Q4 push.\nWould Monday 10 AM work for you?\n\nBest regards,\nJohn`,
|
||||
);
|
||||
});
|
||||
|
||||
@@ -314,8 +323,29 @@ Developer Support`);
|
||||
headerLines: [],
|
||||
};
|
||||
|
||||
const result = service.extractTextWithoutReplyQuotations(parsed);
|
||||
const result = extractMessageBodyText({
|
||||
text: parsed.text,
|
||||
html: parsed.html,
|
||||
});
|
||||
|
||||
expect(result).toBe('Plain text content');
|
||||
});
|
||||
|
||||
it('should preserve percent sequences instead of URI-decoding the body', () => {
|
||||
const parsed: ParsedMail = {
|
||||
text: 'See https://example.com/path%2Fto%2Ffile and a 100%20 budget cut',
|
||||
attachments: [],
|
||||
headers: [],
|
||||
headerLines: [],
|
||||
};
|
||||
|
||||
const result = extractMessageBodyText({
|
||||
text: parsed.text,
|
||||
html: parsed.html,
|
||||
});
|
||||
|
||||
expect(result).toBe(
|
||||
'See https://example.com/path%2Fto%2Ffile and a 100%20 budget cut',
|
||||
);
|
||||
});
|
||||
});
|
||||
+32
@@ -0,0 +1,32 @@
|
||||
import { extractTextWithoutReplyQuotations } from 'src/modules/messaging/message-import-manager/utils/extract-text-without-reply-quotations.util';
|
||||
|
||||
describe('extractTextWithoutReplyQuotations', () => {
|
||||
it('should keep the new reply and drop the quoted history', () => {
|
||||
const result = extractTextWithoutReplyQuotations(
|
||||
'New reply here.\n\nOn Mon, someone wrote:\n> old line',
|
||||
);
|
||||
|
||||
expect(result).toContain('New reply here.');
|
||||
expect(result).not.toContain('old line');
|
||||
});
|
||||
|
||||
it('should drop a nested "On <date>, <name> wrote:" thread without quote markers', () => {
|
||||
const result = extractTextWithoutReplyQuotations(
|
||||
'Latest answer.\n\nOn 24 Jun 2024, at 15:14, Omar M <omar@x.com> wrote:\nprevious question\nsecond previous line',
|
||||
);
|
||||
|
||||
expect(result).toContain('Latest answer.');
|
||||
expect(result).not.toContain('previous question');
|
||||
});
|
||||
|
||||
it('should keep the full body when the message is entirely quoted (forward) and would otherwise be emptied', () => {
|
||||
// Regression: forwarded emails are entirely quotation-like, so the parser
|
||||
// returned empty and the message body was lost.
|
||||
const forwardedBody =
|
||||
'> quoted line one\n> quoted line two\n> quoted line three';
|
||||
|
||||
expect(extractTextWithoutReplyQuotations(forwardedBody)).toBe(
|
||||
forwardedBody,
|
||||
);
|
||||
});
|
||||
});
|
||||
+29
@@ -0,0 +1,29 @@
|
||||
import { normalizeMessageText } from 'src/modules/messaging/message-import-manager/utils/normalize-message-text.util';
|
||||
|
||||
describe('normalizeMessageText', () => {
|
||||
it('should convert CRLF and bare CR to LF', () => {
|
||||
expect(normalizeMessageText('line one\r\nline two\rline three')).toBe(
|
||||
'line one\nline two\nline three',
|
||||
);
|
||||
});
|
||||
|
||||
it('should replace non-breaking spaces with regular spaces', () => {
|
||||
expect(normalizeMessageText('Hello\u00A0world')).toBe('Hello world');
|
||||
});
|
||||
|
||||
it('should strip trailing whitespace on each line', () => {
|
||||
expect(normalizeMessageText('hello \nworld\t')).toBe('hello\nworld');
|
||||
});
|
||||
|
||||
it('should collapse runs of three or more blank lines to one blank line', () => {
|
||||
expect(normalizeMessageText('top\n\n\n\n\nbottom')).toBe('top\n\nbottom');
|
||||
});
|
||||
|
||||
it('should trim leading and trailing whitespace overall', () => {
|
||||
expect(normalizeMessageText('\r\n\r\n\r\nHello\r\n\r\n')).toBe('Hello');
|
||||
});
|
||||
|
||||
it('should leave already-clean text unchanged', () => {
|
||||
expect(normalizeMessageText('Hello\n\nworld')).toBe('Hello\n\nworld');
|
||||
});
|
||||
});
|
||||
+15
-8
@@ -1,7 +1,15 @@
|
||||
import createDOMPurify from 'dompurify';
|
||||
import { convert } from 'html-to-text';
|
||||
import { convert, HtmlToTextOptions } from 'html-to-text';
|
||||
import { JSDOM } from 'jsdom';
|
||||
import * as planer from 'planer';
|
||||
import { isNonEmptyString } from '@sniptt/guards';
|
||||
|
||||
import { normalizeMessageText } from 'src/modules/messaging/message-import-manager/utils/normalize-message-text.util';
|
||||
|
||||
const CONVERT_OPTIONS = {
|
||||
wordwrap: false,
|
||||
preserveNewlines: true,
|
||||
} satisfies HtmlToTextOptions;
|
||||
|
||||
export const createHtmlToTextConverter = (): ((html: string) => string) => {
|
||||
const jsdom = new JSDOM('');
|
||||
@@ -15,13 +23,12 @@ export const createHtmlToTextConverter = (): ((html: string) => string) => {
|
||||
jsdom.window.document,
|
||||
);
|
||||
|
||||
const text = convert(cleanedHtml, {
|
||||
wordwrap: false,
|
||||
preserveNewlines: true,
|
||||
}).trim();
|
||||
const text = normalizeMessageText(convert(cleanedHtml, CONVERT_OPTIONS));
|
||||
|
||||
const output = text.replace(/\u00A0/g, ' ').replace(/\n{3,}/g, '\n\n');
|
||||
|
||||
return output;
|
||||
// planer can strip an entirely-quoted (e.g. forwarded) body to nothing;
|
||||
// fall back to the un-stripped sanitized html so the body is not lost.
|
||||
return isNonEmptyString(text)
|
||||
? text
|
||||
: normalizeMessageText(convert(sanitizedHtml, CONVERT_OPTIONS));
|
||||
};
|
||||
};
|
||||
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
import { isNonEmptyString } from '@sniptt/guards';
|
||||
|
||||
import { createHtmlToTextConverter } from 'src/modules/messaging/message-import-manager/utils/create-html-to-text-converter.util';
|
||||
import { extractTextWithoutReplyQuotations } from 'src/modules/messaging/message-import-manager/utils/extract-text-without-reply-quotations.util';
|
||||
import { normalizeMessageText } from 'src/modules/messaging/message-import-manager/utils/normalize-message-text.util';
|
||||
import { sanitizeString } from 'src/modules/messaging/message-import-manager/utils/sanitize-string.util';
|
||||
|
||||
export const extractMessageBodyText = ({
|
||||
text,
|
||||
html,
|
||||
}: {
|
||||
text?: string | null;
|
||||
html?: string | null;
|
||||
}): string => {
|
||||
const candidate = isNonEmptyString(text)
|
||||
? text
|
||||
: isNonEmptyString(html)
|
||||
? createHtmlToTextConverter()(html)
|
||||
: '';
|
||||
|
||||
const textWithoutReplyQuotations =
|
||||
extractTextWithoutReplyQuotations(candidate);
|
||||
const sanitizedText = sanitizeString(textWithoutReplyQuotations);
|
||||
|
||||
return normalizeMessageText(sanitizedText);
|
||||
};
|
||||
+15
@@ -0,0 +1,15 @@
|
||||
import { isNonEmptyString } from '@sniptt/guards';
|
||||
import EmailReplyParser from 'email-reply-parser';
|
||||
|
||||
export const extractTextWithoutReplyQuotations = (text: string): string => {
|
||||
const textWithoutQuotations = new EmailReplyParser()
|
||||
.read(text)
|
||||
.getFragments()
|
||||
.filter((fragment) => !fragment.isQuoted())
|
||||
.map((fragment) => fragment.getContent())
|
||||
.join('\n');
|
||||
|
||||
return isNonEmptyString(textWithoutQuotations.trim())
|
||||
? textWithoutQuotations
|
||||
: text;
|
||||
};
|
||||
+7
@@ -0,0 +1,7 @@
|
||||
export const normalizeMessageText = (text: string): string =>
|
||||
text
|
||||
.replace(/\r\n?/g, '\n')
|
||||
.replace(/ /g, ' ')
|
||||
.replace(/[^\S\n]+$/gm, '')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.trim();
|
||||
@@ -33291,6 +33291,18 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"email-reply-parser@npm:^2.3.5":
|
||||
version: 2.3.5
|
||||
resolution: "email-reply-parser@npm:2.3.5"
|
||||
peerDependencies:
|
||||
re2: 1.22.1
|
||||
peerDependenciesMeta:
|
||||
re2:
|
||||
optional: true
|
||||
checksum: 10c0/1ab770b3d90cfb917dc347001db92b01916b5ee47ca7029eae39c6588458cd93c5ebe26c354c47b1052f5b7d6966e196eb893d729bf7b7c92ce546b9e6df9430
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"emittery@npm:^0.13.1":
|
||||
version: 0.13.1
|
||||
resolution: "emittery@npm:0.13.1"
|
||||
@@ -55863,6 +55875,7 @@ __metadata:
|
||||
digest-fetch: "npm:^3.1.1"
|
||||
dompurify: "npm:3.3.3"
|
||||
dotenv: "npm:16.4.5"
|
||||
email-reply-parser: "npm:^2.3.5"
|
||||
express: "npm:4.22.1"
|
||||
express-session: "npm:^1.18.2"
|
||||
file-type: "npm:^21.3.1"
|
||||
|
||||
Reference in New Issue
Block a user