fix(messaging): fix reply-quotation stripping that emptied email bodies (#21118)

some synced messages were stored with empty bodies, others with the
entire reply thread re-quoted, planer was stripping entirely quoted
forwards down to nothing and not trimming inline reply history at all

switched plaintext quote stripping to `email-reply-parser`, falling back
to the full text when it strips everything so forwards don't end up
blank. kept planer for the html path, and normalized body whitespac

---------

Co-authored-by: prastoin <paul@twenty.com>
This commit is contained in:
neo773
2026-06-04 18:01:53 +05:30
committed by GitHub
parent 5a55021e26
commit 437eed0862
19 changed files with 252 additions and 79 deletions
+1 -1
View File
@@ -17,7 +17,7 @@ const jestConfig = {
testEnvironment: 'node',
setupFilesAfterEnv: ['./setupTests.ts'],
transformIgnorePatterns: [
'/node_modules/(?!(file-type|@file-type|strtok3|token-types|@borewit|@tokenizer|uint8array-extras|read-next-line|digest-fetch|md5|js-sha256|js-sha512|base-64|charenc|crypt)/)',
'/node_modules/(?!(file-type|@file-type|strtok3|token-types|@borewit|@tokenizer|uint8array-extras|read-next-line|digest-fetch|md5|js-sha256|js-sha512|base-64|charenc|crypt|email-reply-parser)/)',
],
testRegex: '.*\\.spec\\.ts$',
transform: {
+1
View File
@@ -97,6 +97,7 @@
"digest-fetch": "^3.1.1",
"dompurify": "3.3.3",
"dotenv": "16.4.5",
"email-reply-parser": "^2.3.5",
"express": "4.22.1",
"express-session": "^1.18.2",
"file-type": "^21.3.1",
@@ -99,6 +99,39 @@ describe('parseAndFormatGmailMessage', () => {
expect(result?.direction).toBe(MessageDirection.OUTGOING);
});
it('should keep the body of an entirely-quoted forwarded message instead of emptying it', () => {
// Regression: planer stripped the whole forwarded body, persisting text=''.
const forwardedBody =
'> quoted line one\n> quoted line two\n> quoted line three';
const result = parseAndFormatGmailMessage(
buildMessage(
[
{ name: 'From', value: 'sender@example.com' },
{ name: 'To', value: 'me@example.com' },
{ name: 'Message-ID', value: '<abc@example.com>' },
],
{
payload: {
headers: [
{ name: 'From', value: 'sender@example.com' },
{ name: 'To', value: 'me@example.com' },
{ name: 'Message-ID', value: '<abc@example.com>' },
],
mimeType: 'text/plain',
body: {
data: Buffer.from(forwardedBody).toString('base64'),
size: forwardedBody.length,
},
},
},
),
connectedAccount,
);
expect(result?.text).toBe(forwardedBody);
});
it('should return null when required headers (`From`, `Message-ID`) are missing', () => {
const result = parseAndFormatGmailMessage(
buildMessage([{ name: 'To', value: 'alice@example.com' }]),
@@ -1,5 +1,4 @@
import { type gmail_v1 as gmailV1 } from 'googleapis';
import planer from 'planer';
import { MessageParticipantRole } from 'twenty-shared/types';
import { isNonEmptyString } from '@sniptt/guards';
import { isDefined, isNonEmptyArray } from 'twenty-shared/utils';
@@ -8,8 +7,8 @@ import { type ConnectedAccountEntity } from 'src/engine/metadata-modules/connect
import { computeMessageDirection } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/compute-message-direction.util';
import { parseGmailMessage } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/parse-gmail-message.util';
import { type MessageWithParticipants } from 'src/modules/messaging/message-import-manager/types/message';
import { extractMessageBodyText } from 'src/modules/messaging/message-import-manager/utils/extract-message-body-text.util';
import { formatAddressObjectAsParticipants } from 'src/modules/messaging/message-import-manager/utils/format-address-object-as-participants.util';
import { sanitizeString } from 'src/modules/messaging/message-import-manager/utils/sanitize-string.util';
export const parseAndFormatGmailMessage = (
message: gmailV1.Schema$Message,
@@ -25,7 +24,8 @@ export const parseAndFormatGmailMessage = (
cc,
bcc,
headerMessageId,
text,
body,
isHtml,
attachments,
deliveredTo,
labelIds,
@@ -59,10 +59,6 @@ export const parseAndFormatGmailMessage = (
return null;
}
const textWithoutReplyQuotations = text
? planer.extractFrom(text, 'text/plain')
: '';
return {
externalId: id,
headerMessageId,
@@ -71,7 +67,7 @@ export const parseAndFormatGmailMessage = (
receivedAt: new Date(parseInt(internalDate)),
direction: computeMessageDirection(from.address || '', connectedAccount),
participants,
text: sanitizeString(textWithoutReplyQuotations),
text: extractMessageBodyText(isHtml ? { html: body } : { text: body }),
attachments,
messageFolderExternalIds: labelIds,
labelIds,
@@ -5,7 +5,6 @@ import { type gmail_v1 } from 'googleapis';
import { getAttachmentData } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/get-attachment-data.util';
import { getBodyData } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/get-body-data.util';
import { getPropertyFromHeaders } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/get-property-from-headers.util';
import { createHtmlToTextConverter } from 'src/modules/messaging/message-import-manager/utils/create-html-to-text-converter.util';
import { safeParseEmailAddressAddress } from 'src/modules/messaging/message-import-manager/utils/safe-parse-email-address-address.util';
import { safeParseEmailAddresses } from 'src/modules/messaging/message-import-manager/utils/safe-parse-email-addresses.util';
@@ -31,10 +30,7 @@ export const parseGmailMessage = (message: gmail_v1.Schema$Message) => {
const decodedBody = bodyResult
? Buffer.from(bodyResult.data, 'base64').toString()
: '';
const text = bodyResult?.isHtml
? createHtmlToTextConverter()(decodedBody)
: decodedBody;
const isHtml = bodyResult?.isHtml ?? false;
const attachments = getAttachmentData(message);
@@ -52,7 +48,8 @@ export const parseGmailMessage = (message: gmail_v1.Schema$Message) => {
to: rawTo ? safeParseEmailAddresses(rawTo) : [],
cc: rawCc ? safeParseEmailAddresses(rawCc) : [],
bcc: rawBcc ? safeParseEmailAddresses(rawBcc) : [],
text,
body: decodedBody,
isHtml,
attachments,
labelIds,
};
@@ -19,7 +19,6 @@ import { ImapGetMessageListService } from 'src/modules/messaging/message-import-
import { ImapGetMessagesService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-get-messages.service';
import { ImapMessageListFetchErrorHandler } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-list-fetch-error-handler.service';
import { ImapMessageParserService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-parser.service';
import { ImapMessageTextExtractorService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-text-extractor.service';
import { ImapMessagesImportErrorHandler } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-messages-import-error-handler.service';
import { ImapSyncService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-sync.service';
import { MessageParticipantManagerModule } from 'src/modules/messaging/message-participant-manager/message-participant-manager.module';
@@ -47,7 +46,6 @@ import { MessageParticipantManagerModule } from 'src/modules/messaging/message-p
ImapMessageParserService,
ImapFindDraftsFolderService,
ImapFindSentFolderService,
ImapMessageTextExtractorService,
],
exports: [
ImapGetMessagesService,
@@ -7,11 +7,11 @@ import { type ConnectedAccountEntity } from 'src/engine/metadata-modules/connect
import { computeMessageDirection } from 'src/modules/messaging/message-import-manager/drivers/gmail/utils/compute-message-direction.util';
import { ImapClientProvider } from 'src/modules/messaging/message-import-manager/drivers/imap/providers/imap-client.provider';
import { ImapMessageParserService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-parser.service';
import { ImapMessageTextExtractorService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-text-extractor.service';
import { ImapMessagesImportErrorHandler } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-messages-import-error-handler.service';
import { parseMessageId } from 'src/modules/messaging/message-import-manager/drivers/imap/utils/parse-message-id.util';
import { type MessageWithParticipants } from 'src/modules/messaging/message-import-manager/types/message';
import { extractAddressesFromParsedEmail } from 'src/modules/messaging/message-import-manager/utils/extract-addresses-from-parsed-email.util';
import { extractMessageBodyText } from 'src/modules/messaging/message-import-manager/utils/extract-message-body-text.util';
import { extractParticipantsFromParsedEmail } from 'src/modules/messaging/message-import-manager/utils/extract-participants-from-parsed-email.util';
import { extractThreadIdFromParsedEmail } from 'src/modules/messaging/message-import-manager/utils/extract-thread-id-from-parsed-email.util';
import { sanitizeString } from 'src/modules/messaging/message-import-manager/utils/sanitize-string.util';
@@ -28,7 +28,6 @@ export class ImapGetMessagesService {
constructor(
private readonly imapClientProvider: ImapClientProvider,
private readonly messageParser: ImapMessageParserService,
private readonly textExtractor: ImapMessageTextExtractorService,
private readonly errorHandler: ImapMessagesImportErrorHandler,
) {}
@@ -167,9 +166,10 @@ export class ImapGetMessagesService {
const fromAddresses = extractAddressesFromParsedEmail(parsed.from);
const senderAddress = fromAddresses[0]?.address ?? '';
const text = sanitizeString(
this.textExtractor.extractTextWithoutReplyQuotations(parsed),
);
const text = extractMessageBodyText({
text: parsed.text,
html: parsed.html,
});
return {
externalId: `${folderPath}:${uid}`,
@@ -1,26 +0,0 @@
import { Injectable } from '@nestjs/common';
import * as planer from 'planer';
import { safeDecodeURIComponent } from 'twenty-shared/utils';
import { type Email as ParsedEmail } from 'postal-mime';
import { createHtmlToTextConverter } from 'src/modules/messaging/message-import-manager/utils/create-html-to-text-converter.util';
@Injectable()
export class ImapMessageTextExtractorService {
private readonly convertHtmlToText = createHtmlToTextConverter();
extractTextWithoutReplyQuotations(parsed: ParsedEmail): string {
if (parsed.text) {
const extractedText = planer.extractFrom(parsed.text, 'text/plain');
return safeDecodeURIComponent(extractedText);
}
if (parsed.html) {
return safeDecodeURIComponent(this.convertHtmlToText(parsed.html));
}
return '';
}
}
@@ -152,7 +152,7 @@ describe('Microsoft get messages service', () => {
});
});
it('Should set empty text for html responses', () => {
it('Should convert html responses to text', () => {
const batchResponses: MicrosoftGraphBatchResponse[] =
microsoftGraphBatchWithHtmlMessagesResponse;
const connectedAccount = {
@@ -176,7 +176,7 @@ describe('Microsoft get messages service', () => {
externalId: responseExample.body.id,
subject: responseExample.body.subject,
receivedAt: new Date(responseExample.body.receivedDateTime),
text: '',
text: 'test 4',
headerMessageId: responseExample.body.internetMessageId,
messageThreadExternalId: responseExample.body.conversationId,
direction: 'OUTGOING',
@@ -10,6 +10,7 @@ import { computeMessageDirection } from 'src/modules/messaging/message-import-ma
import { MicrosoftImportDriverException } from 'src/modules/messaging/message-import-manager/drivers/microsoft/exceptions/microsoft-import-driver.exception';
import { type MicrosoftGraphBatchResponse } from 'src/modules/messaging/message-import-manager/drivers/microsoft/services/microsoft-get-messages.interface';
import { type MessageWithParticipants } from 'src/modules/messaging/message-import-manager/types/message';
import { extractMessageBodyText } from 'src/modules/messaging/message-import-manager/utils/extract-message-body-text.util';
import { formatAddressObjectAsParticipants } from 'src/modules/messaging/message-import-manager/utils/format-address-object-as-participants.util';
import { safeParseEmailAddress } from 'src/modules/messaging/message-import-manager/utils/safe-parse-email-address.util';
@@ -130,12 +131,17 @@ export class MicrosoftGetMessagesService {
: []),
];
const text = extractMessageBodyText(
response.body?.contentType === 'text'
? { text: response.body?.content }
: { html: response.body?.content },
);
return {
externalId: response.id,
subject: response.subject || '',
receivedAt: new Date(response.receivedDateTime),
text:
response.body?.contentType === 'text' ? response.body?.content : '',
text,
headerMessageId: response.internetMessageId,
messageThreadExternalId: response.conversationId,
direction: response.from
@@ -22,6 +22,15 @@ describe('createHtmlToTextConverter', () => {
expect(convertHtmlToText('')).toBe('');
});
it('should keep the body when planer strips an entirely-quoted message to empty', () => {
// Regression: forwarded/fully-quoted html was stripped to empty by planer.
const result = convertHtmlToText(
'<div class="gmail_quote"><p>Only quoted content here</p></div>',
);
expect(result).toBe('Only quoted content here');
});
it('should sanitize malicious HTML', () => {
const result = convertHtmlToText(
'<p>Hello</p><script>alert("xss")</script>',
@@ -1,14 +1,8 @@
import { type Email as ParsedMail } from 'postal-mime';
import { ImapMessageTextExtractorService } from 'src/modules/messaging/message-import-manager/drivers/imap/services/imap-message-text-extractor.service';
describe('ImapMessageTextExtractorService', () => {
let service: ImapMessageTextExtractorService;
beforeEach(() => {
service = new ImapMessageTextExtractorService();
});
import { extractMessageBodyText } from 'src/modules/messaging/message-import-manager/utils/extract-message-body-text.util';
describe('extractMessageBodyText', () => {
it('should extract text from plain text emails with lot of reply quotations', () => {
const parsed: ParsedMail = {
text: `Hi John,
@@ -101,21 +95,24 @@ Developer Support
headerLines: [],
};
const result = service.extractTextWithoutReplyQuotations(parsed);
const result = extractMessageBodyText({
text: parsed.text,
html: parsed.html,
});
expect(result).toBe(`Hi John,
Thank you for contacting Developer Support, this is Erica again. I hope you are having a good day.
Thank you for contacting Developer Support, this is Erica again. I hope you are having a good day.
I understand that you are unable to contact finance. Despite your account being expired, you should still be able to contact our finance team.
I understand that you are unable to contact finance. Despite your account being expired, you should still be able to contact our finance team.
Follow the link below the link for contacting our finance team.
Follow the link below the link for contacting our finance team.
https://idmsa.apple.com/IDMSWebAuth/signin.html?path=/contact/finance/
Best Regards,
Erica
Erica
Developer Support`);
});
@@ -136,7 +133,10 @@ Developer Support`);
headerLines: [],
};
const result = service.extractTextWithoutReplyQuotations(parsed);
const result = extractMessageBodyText({
text: parsed.text,
html: parsed.html,
});
expect(result).toBe('just a follow up');
});
@@ -158,7 +158,10 @@ Developer Support`);
headerLines: [],
};
const result = service.extractTextWithoutReplyQuotations(parsed);
const result = extractMessageBodyText({
text: parsed.text,
html: parsed.html,
});
expect(result).toBe('just a follow up');
});
@@ -170,7 +173,10 @@ Developer Support`);
headerLines: [],
};
const result = service.extractTextWithoutReplyQuotations(parsed);
const result = extractMessageBodyText({
text: parsed.text,
html: parsed.html,
});
expect(result).toBe('');
});
@@ -298,10 +304,13 @@ Developer Support`);
</style></head><body><div id="inbox-html-wrapper"><div id="isPasted" fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">Hi Sarah,</div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"><br fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"></div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">I wanted to quickly follow up regarding the Q3 marketing campaign results. &nbsp;</div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">Weve seen a 14% increase in engagement compared to last quarter, but conversions are still slightly below target. &nbsp;</div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"><br fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"></div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">Lets schedule a short call early next week to discuss adjustments before the Q4 push. &nbsp;</div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">Would Monday 10 AM work for you?</div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"><br fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"></div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">Best regards, &nbsp;</div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;">John</div><div fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"><br fr-original-style="" style="user-select: inherit; scrollbar-color: var(--scrollbar-active-color) #0000; box-sizing: border-box;"></div><img class="flm-open" width="0" height="0" style="border: 0px; width: 0px; height: 0px; max-width: 100vw;" data-open-tracking-src="{{track-read-receipt}}"></div></body></html>`,
};
const result = service.extractTextWithoutReplyQuotations(parsed);
const result = extractMessageBodyText({
text: parsed.text,
html: parsed.html,
});
expect(result).toEqual(
`Hi Sarah,\n\nI wanted to quickly follow up regarding the Q3 marketing campaign results. \nWeve seen a 14% increase in engagement compared to last quarter, but conversions are still slightly below target. \n\nLets schedule a short call early next week to discuss adjustments before the Q4 push. \nWould Monday 10 AM work for you?\n\nBest regards, \nJohn`,
`Hi Sarah,\n\nI wanted to quickly follow up regarding the Q3 marketing campaign results.\nWeve seen a 14% increase in engagement compared to last quarter, but conversions are still slightly below target.\n\nLets schedule a short call early next week to discuss adjustments before the Q4 push.\nWould Monday 10 AM work for you?\n\nBest regards,\nJohn`,
);
});
@@ -314,8 +323,29 @@ Developer Support`);
headerLines: [],
};
const result = service.extractTextWithoutReplyQuotations(parsed);
const result = extractMessageBodyText({
text: parsed.text,
html: parsed.html,
});
expect(result).toBe('Plain text content');
});
it('should preserve percent sequences instead of URI-decoding the body', () => {
const parsed: ParsedMail = {
text: 'See https://example.com/path%2Fto%2Ffile and a 100%20 budget cut',
attachments: [],
headers: [],
headerLines: [],
};
const result = extractMessageBodyText({
text: parsed.text,
html: parsed.html,
});
expect(result).toBe(
'See https://example.com/path%2Fto%2Ffile and a 100%20 budget cut',
);
});
});
@@ -0,0 +1,32 @@
import { extractTextWithoutReplyQuotations } from 'src/modules/messaging/message-import-manager/utils/extract-text-without-reply-quotations.util';
describe('extractTextWithoutReplyQuotations', () => {
it('should keep the new reply and drop the quoted history', () => {
const result = extractTextWithoutReplyQuotations(
'New reply here.\n\nOn Mon, someone wrote:\n> old line',
);
expect(result).toContain('New reply here.');
expect(result).not.toContain('old line');
});
it('should drop a nested "On <date>, <name> wrote:" thread without quote markers', () => {
const result = extractTextWithoutReplyQuotations(
'Latest answer.\n\nOn 24 Jun 2024, at 15:14, Omar M <omar@x.com> wrote:\nprevious question\nsecond previous line',
);
expect(result).toContain('Latest answer.');
expect(result).not.toContain('previous question');
});
it('should keep the full body when the message is entirely quoted (forward) and would otherwise be emptied', () => {
// Regression: forwarded emails are entirely quotation-like, so the parser
// returned empty and the message body was lost.
const forwardedBody =
'> quoted line one\n> quoted line two\n> quoted line three';
expect(extractTextWithoutReplyQuotations(forwardedBody)).toBe(
forwardedBody,
);
});
});
@@ -0,0 +1,29 @@
import { normalizeMessageText } from 'src/modules/messaging/message-import-manager/utils/normalize-message-text.util';
describe('normalizeMessageText', () => {
it('should convert CRLF and bare CR to LF', () => {
expect(normalizeMessageText('line one\r\nline two\rline three')).toBe(
'line one\nline two\nline three',
);
});
it('should replace non-breaking spaces with regular spaces', () => {
expect(normalizeMessageText('Hello\u00A0world')).toBe('Hello world');
});
it('should strip trailing whitespace on each line', () => {
expect(normalizeMessageText('hello \nworld\t')).toBe('hello\nworld');
});
it('should collapse runs of three or more blank lines to one blank line', () => {
expect(normalizeMessageText('top\n\n\n\n\nbottom')).toBe('top\n\nbottom');
});
it('should trim leading and trailing whitespace overall', () => {
expect(normalizeMessageText('\r\n\r\n\r\nHello\r\n\r\n')).toBe('Hello');
});
it('should leave already-clean text unchanged', () => {
expect(normalizeMessageText('Hello\n\nworld')).toBe('Hello\n\nworld');
});
});
@@ -1,7 +1,15 @@
import createDOMPurify from 'dompurify';
import { convert } from 'html-to-text';
import { convert, HtmlToTextOptions } from 'html-to-text';
import { JSDOM } from 'jsdom';
import * as planer from 'planer';
import { isNonEmptyString } from '@sniptt/guards';
import { normalizeMessageText } from 'src/modules/messaging/message-import-manager/utils/normalize-message-text.util';
const CONVERT_OPTIONS = {
wordwrap: false,
preserveNewlines: true,
} satisfies HtmlToTextOptions;
export const createHtmlToTextConverter = (): ((html: string) => string) => {
const jsdom = new JSDOM('');
@@ -15,13 +23,12 @@ export const createHtmlToTextConverter = (): ((html: string) => string) => {
jsdom.window.document,
);
const text = convert(cleanedHtml, {
wordwrap: false,
preserveNewlines: true,
}).trim();
const text = normalizeMessageText(convert(cleanedHtml, CONVERT_OPTIONS));
const output = text.replace(/\u00A0/g, ' ').replace(/\n{3,}/g, '\n\n');
return output;
// planer can strip an entirely-quoted (e.g. forwarded) body to nothing;
// fall back to the un-stripped sanitized html so the body is not lost.
return isNonEmptyString(text)
? text
: normalizeMessageText(convert(sanitizedHtml, CONVERT_OPTIONS));
};
};
@@ -0,0 +1,26 @@
import { isNonEmptyString } from '@sniptt/guards';
import { createHtmlToTextConverter } from 'src/modules/messaging/message-import-manager/utils/create-html-to-text-converter.util';
import { extractTextWithoutReplyQuotations } from 'src/modules/messaging/message-import-manager/utils/extract-text-without-reply-quotations.util';
import { normalizeMessageText } from 'src/modules/messaging/message-import-manager/utils/normalize-message-text.util';
import { sanitizeString } from 'src/modules/messaging/message-import-manager/utils/sanitize-string.util';
export const extractMessageBodyText = ({
text,
html,
}: {
text?: string | null;
html?: string | null;
}): string => {
const candidate = isNonEmptyString(text)
? text
: isNonEmptyString(html)
? createHtmlToTextConverter()(html)
: '';
const textWithoutReplyQuotations =
extractTextWithoutReplyQuotations(candidate);
const sanitizedText = sanitizeString(textWithoutReplyQuotations);
return normalizeMessageText(sanitizedText);
};
@@ -0,0 +1,15 @@
import { isNonEmptyString } from '@sniptt/guards';
import EmailReplyParser from 'email-reply-parser';
export const extractTextWithoutReplyQuotations = (text: string): string => {
const textWithoutQuotations = new EmailReplyParser()
.read(text)
.getFragments()
.filter((fragment) => !fragment.isQuoted())
.map((fragment) => fragment.getContent())
.join('\n');
return isNonEmptyString(textWithoutQuotations.trim())
? textWithoutQuotations
: text;
};
@@ -0,0 +1,7 @@
export const normalizeMessageText = (text: string): string =>
text
.replace(/\r\n?/g, '\n')
.replace(/ /g, ' ')
.replace(/[^\S\n]+$/gm, '')
.replace(/\n{3,}/g, '\n\n')
.trim();
+13
View File
@@ -33291,6 +33291,18 @@ __metadata:
languageName: node
linkType: hard
"email-reply-parser@npm:^2.3.5":
version: 2.3.5
resolution: "email-reply-parser@npm:2.3.5"
peerDependencies:
re2: 1.22.1
peerDependenciesMeta:
re2:
optional: true
checksum: 10c0/1ab770b3d90cfb917dc347001db92b01916b5ee47ca7029eae39c6588458cd93c5ebe26c354c47b1052f5b7d6966e196eb893d729bf7b7c92ce546b9e6df9430
languageName: node
linkType: hard
"emittery@npm:^0.13.1":
version: 0.13.1
resolution: "emittery@npm:0.13.1"
@@ -55863,6 +55875,7 @@ __metadata:
digest-fetch: "npm:^3.1.1"
dompurify: "npm:3.3.3"
dotenv: "npm:16.4.5"
email-reply-parser: "npm:^2.3.5"
express: "npm:4.22.1"
express-session: "npm:^1.18.2"
file-type: "npm:^21.3.1"