Refactored html-to-plaintext to have two functions

refs: https://github.com/TryGhost/Team/issues/1609

- Split html-to-plaintext into a function for excerpts and emails
- Updated all usages so they use the correct function
- There's currently no difference between the two
This commit is contained in:
Hannah Wolfe 2022-05-13 14:42:26 +01:00
parent f73a84abac
commit 338dc3ae6c
8 changed files with 95 additions and 31 deletions

View File

@ -15,7 +15,7 @@ const forPost = (attrs, frame) => {
if (paywallIndex !== -1) {
attrs.html = attrs.html.slice(0, paywallIndex);
attrs.plaintext = htmlToPlaintext(attrs.html);
attrs.plaintext = htmlToPlaintext.excerpt(attrs.html);
if (!attrs.custom_excerpt && attrs.excerpt) {
attrs.excerpt = attrs.plaintext.substring(0, 500);

View File

@ -46,7 +46,7 @@ module.exports = createIrreversibleMigration(async (knex) => {
};
if (html !== post.html || !post.plaintext) {
const plaintext = htmlToPlaintext(html);
const plaintext = htmlToPlaintext.excerpt(html);
if (plaintext !== post.plaintext) {
updatedAttrs.plaintext = plaintext;

View File

@ -65,7 +65,7 @@ module.exports = createTransactionalMigration(
continue;
}
const plaintext = htmlToPlaintext(html);
const plaintext = htmlToPlaintext.excerpt(html);
await knex('posts')
.transacting(trx)

View File

@ -50,7 +50,7 @@ module.exports = createIrreversibleMigration(async (knex) => {
};
if (html !== post.html || !post.plaintext) {
const plaintext = htmlToPlaintext(html);
const plaintext = htmlToPlaintext.excerpt(html);
if (plaintext !== post.plaintext) {
updatedAttrs.plaintext = plaintext;

View File

@ -626,7 +626,7 @@ Post = ghostBookshelf.Model.extend({
if (this.get('html') === null) {
plaintext = null;
} else {
plaintext = htmlToPlaintext(this.get('html'));
plaintext = htmlToPlaintext.excerpt(this.get('html'));
}
// CASE: html is e.g. <p></p>

View File

@ -248,7 +248,7 @@ const serialize = async (postModel, newsletter, options = {isBrowserPreview: fal
`).remove();
post.html = _cheerio('body').html();
post.plaintext = htmlToPlaintext(post.html);
post.plaintext = htmlToPlaintext.email(post.html);
// Outlook will render feature images at full-size breaking the layout.
// Content images fix this by rendering max 600px images - do the same for feature image here
@ -321,7 +321,7 @@ function renderEmailForSegment(email, memberSegment) {
});
result.html = formatHtmlForEmail($.html());
result.plaintext = htmlToPlaintext(result.html);
result.plaintext = htmlToPlaintext.email(result.html);
return result;
}

View File

@ -1,28 +1,52 @@
module.exports = function htmlToPlaintext(html) {
const {convert} = require('html-to-text');
const baseSettings = {
wordwrap: false,
preserveNewlines: true,
return convert(html, {
wordwrap: false,
preserveNewlines: true,
// equiv returnDomByDefault: true,
baseElements: {returnDomByDefault: true},
selectors: [
// Ignore images, equiv ignoreImage: true
{selector: 'img', format: 'skip'},
// equiv returnDomByDefault: true,
baseElements: {returnDomByDefault: true},
selectors: [
// Ignore images, equiv ignoreImage: true
{selector: 'img', format: 'skip'} ,
// disable uppercase headings, equiv uppercaseHeadings: false
{selector: 'h1', options: {uppercase: false}},
{selector: 'h2', options: {uppercase: false}},
{selector: 'h3', options: {uppercase: false}},
{selector: 'h4', options: {uppercase: false}},
{selector: 'h5', options: {uppercase: false}},
{selector: 'h6', options: {uppercase: false}},
{selector: 'table', options: {uppercaseHeaderCells: false}},
// equiv hideLinkHrefIfSameAsText: true
{selector: 'a', options: {hideLinkHrefIfSameAsText: true}},
// disable uppercase headings, equiv uppercaseHeadings: false
{selector: 'h1', options: {uppercase: false}},
{selector: 'h2', options: {uppercase: false}},
{selector: 'h3', options: {uppercase: false}},
{selector: 'h4', options: {uppercase: false}},
{selector: 'h5', options: {uppercase: false}},
{selector: 'h6', options: {uppercase: false}},
{selector: 'table', options: {uppercaseHeaderCells: false}},
// Backwards compatibility with html-to-text 5.1.1
{selector: 'div', format: 'inline'}
]
});
// equiv hideLinkHrefIfSameAsText: true
{selector: 'a', options: {hideLinkHrefIfSameAsText: true}},
// Backwards compatibility with html-to-text 5.1.1
{selector: 'div', format: 'inline'}
]
};
let excerptConverter;
let emailConverter;
const loadConverters = () => {
if (excerptConverter && emailConverter) {
return;
}
const {compile} = require('html-to-text');
excerptConverter = compile(baseSettings);
emailConverter = compile(baseSettings);
};
module.exports.excerpt = (html) => {
loadConverters();
return excerptConverter(html);
};
module.exports.email = (html) => {
loadConverters();
return emailConverter(html);
};

View File

@ -0,0 +1,40 @@
const assert = require('assert');
const htmlToPlaintext = require('../../../core/shared/html-to-plaintext');
describe('Html to Plaintext', function () {
function getEmailandExcert(input) {
const excerpt = htmlToPlaintext.excerpt(input);
const email = htmlToPlaintext.email(input);
return {email, excerpt};
}
describe('excerpt vs email behaviour', function () {
it('example case with img & link', function () {
const input = '<p>Some thing <a href="https://google.com">Google</a> once told me.</p><img src="https://hotlink.com" alt="An important image"><p>And <strong>another</strong> thing.</p>';
const {excerpt, email} = getEmailandExcert(input);
assert.equal(excerpt, 'Some thing Google [https://google.com] once told me.\n\nAnd another thing.');
assert.equal(email, 'Some thing Google [https://google.com] once told me.\n\nAnd another thing.');
});
it('example case with figure + figcaption', function () {
const input = '<figcaption>A snippet from a post template</figcaption></figure><p>See? Not that scary! But still completely optional. </p>';
const {excerpt, email} = getEmailandExcert(input);
assert.equal(excerpt, 'A snippet from a post template\n\nSee? Not that scary! But still completely optional.');
assert.equal(email, 'A snippet from a post template\n\nSee? Not that scary! But still completely optional.');
});
it('example case with figure + figcaption inside a link', function () {
const input = '<a href="https://mysite.com"><figcaption>A snippet from a post template</figcaption></figure></a><p>See? Not that scary! But still completely optional. </p>';
const {excerpt, email} = getEmailandExcert(input);
assert.equal(excerpt, 'A snippet from a post template [https://mysite.com]\n\nSee? Not that scary! But still completely optional.');
assert.equal(email, 'A snippet from a post template [https://mysite.com]\n\nSee? Not that scary! But still completely optional.');
});
});
});