From 854524967e889ddbf2fb54c2c2705c713d8738ef Mon Sep 17 00:00:00 2001 From: Nate Ijams Date: Thu, 25 Sep 2025 11:46:35 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=A8=20Removed=20extra=20new=20lines=20?= =?UTF-8?q?and=20improved=20header=20formatting=20in=20plain=20text.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit see https://forum.ghost.org/t/unnecessary-and-excessive-newlines-in-plain-text-part-of-newsletters/60267/ see https://www.w3.org/WAI/WCAG22/Techniques/text/T3 - Previously relied on presence of \n in source HTML. - Now sets reasonable new lines based on HTML elements. - Formats headers using WCAG 2.2 Techniques suggestions - Tests added for all changes. --- .../lib/html-to-plaintext.js | 10 ++++++- .../test/html-to-plaintext.test.js | 30 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/packages/html-to-plaintext/lib/html-to-plaintext.js b/packages/html-to-plaintext/lib/html-to-plaintext.js index 76f6d96b4..6013e6689 100644 --- a/packages/html-to-plaintext/lib/html-to-plaintext.js +++ b/packages/html-to-plaintext/lib/html-to-plaintext.js @@ -61,11 +61,19 @@ const loadConverters = () => { }); const emailSettings = mergeSettings({ + preserveNewlines: false, selectors: [ // equiv hideLinkHrefIfSameAsText: true {selector: 'a', options: {hideLinkHrefIfSameAsText: true}}, // Don't include html .preheader in email - {selector: '.preheader', format: 'skip'} + {selector: '.preheader', format: 'skip'}, + {selector: 'p', options: {leadingLineBreaks: 2, trailingLineBreaks: 1}}, + {selector: 'h1', options: {leadingLineBreaks: 3, trailingLineBreaks: 1}}, + {selector: 'h2', options: {leadingLineBreaks: 3, trailingLineBreaks: 1}}, + {selector: 'h3', options: {leadingLineBreaks: 3, trailingLineBreaks: 1}}, + {selector: 'h4', options: {leadingLineBreaks: 3, trailingLineBreaks: 1}}, + {selector: 'h5', options: {leadingLineBreaks: 3, trailingLineBreaks: 1}}, + {selector: 'h6', options: {uppercase: false, leadingLineBreaks: 3, trailingLineBreaks: 1}} ] }); diff --git a/packages/html-to-plaintext/test/html-to-plaintext.test.js b/packages/html-to-plaintext/test/html-to-plaintext.test.js index 7a5c30fd3..7d08e80ec 100644 --- a/packages/html-to-plaintext/test/html-to-plaintext.test.js +++ b/packages/html-to-plaintext/test/html-to-plaintext.test.js @@ -88,6 +88,36 @@ describe('Html to Plaintext', function () { }); }); + describe('New lines and format headers', function () { + it('Strips excessive new lines and formats headers', function () { + const html = '

Some ordinary text

\n\n\n\n

Should not be way far apart from earlier text.

'; + const expected = 'Some ordinary text\n\nShould not be way far apart from earlier text.'; + const {email} = getEmailandExcert(html); + assert.equal(email, expected); + }); + + it('Check header formatting', function () { + const html = '

Header One

\n

What should I even write about?

And more

With Header Two

What about code?

And Header Three

Good bye

'; + const expected = 'Header One\n\nWhat should I even write about?\n\nAnd more\n\n\nWith Header Two\n\nWhat about code?\n\n\nAnd Header Three\n\nGood bye'; + const {email} = getEmailandExcert(html); + assert.equal(email, expected); + }); + + it('Empty headers return nothing', function () { + const html = '

'; + const expected = ''; + const {email} = getEmailandExcert(html); + assert.equal(email, expected); + }); + + it('Non-text header contents don’t appear', function () { + const html = '

Helloworld

'; + const expected = 'Helloworld'; + const {email} = getEmailandExcert(html); + assert.equal(email, expected); + }); + }); + describe('commentSnippet converter', function () { function testConverter({input, expected}) { return () => {