Added handling for parsing errors with user-submitted HTML

fix https://linear.app/tryghost/issue/SLO-87/cannot-read-properties-of-undefined-reading-createimpl-an-unexpected
refs https://github.com/jsdom/jsdom/issues/3709

- in the event we are given some HTML to parse, and that fails, we
  currently return a HTTP 500 because it's unhandled
- the instance we saw was due to `<constructor>` crashing jsdom, we've
  opened an issue for that
- in terms of handling the error gracefully, we can surround the code
  in a try-catch and return a more suitable error. I've gone for a
  ValidationError for now - you could debate whether a different one is
  more appropriate
- also added Sentry error capturing so we're not blind to these,
  ultimately we should make sure the parser can handle all
  user-submitted data
This commit is contained in:
Daniel Lockyer 2024-05-07 17:07:41 +02:00 committed by Daniel Lockyer
parent 40ee2043e0
commit 2659e5aa40
4 changed files with 131 additions and 5 deletions

View File

@ -1,12 +1,20 @@
const _ = require('lodash');
const debug = require('@tryghost/debug')('api:endpoints:utils:serializers:input:pages');
const mobiledoc = require('../../../../../lib/mobiledoc');
const {ValidationError} = require('@tryghost/errors');
const tpl = require('@tryghost/tpl');
const url = require('./utils/url');
const slugFilterOrder = require('./utils/slug-filter-order');
const localUtils = require('../../index');
const mobiledoc = require('../../../../../lib/mobiledoc');
const postsMetaSchema = require('../../../../../data/schema').tables.posts_meta;
const clean = require('./utils/clean');
const lexical = require('../../../../../lib/lexical');
const sentry = require('../../../../../../shared/sentry');
const messages = {
failedHtmlToMobiledoc: 'Failed to convert HTML to Mobiledoc',
failedHtmlToLexical: 'Failed to convert HTML to Lexical'
};
function removeSourceFormats(frame) {
if (frame.options.formats?.includes('mobiledoc') || frame.options.formats?.includes('lexical')) {
@ -136,7 +144,17 @@ module.exports = {
if (process.env.CI) {
console.time('htmlToMobiledocConverter (page)'); // eslint-disable-line no-console
}
frame.data.pages[0].mobiledoc = JSON.stringify(mobiledoc.htmlToMobiledocConverter(html));
try {
frame.data.pages[0].mobiledoc = JSON.stringify(mobiledoc.htmlToMobiledocConverter(html));
} catch (err) {
sentry.captureException(err);
throw new ValidationError({
message: tpl(messages.failedHtmlToMobiledoc),
err
});
}
if (process.env.CI) {
console.timeEnd('htmlToMobiledocConverter (page)'); // eslint-disable-line no-console
}
@ -146,7 +164,17 @@ module.exports = {
if (process.env.CI) {
console.time('htmlToLexicalConverter (page)'); // eslint-disable-line no-console
}
frame.data.pages[0].lexical = JSON.stringify(lexical.htmlToLexicalConverter(html));
try {
frame.data.pages[0].lexical = JSON.stringify(lexical.htmlToLexicalConverter(html));
} catch (err) {
sentry.captureException(err);
throw new ValidationError({
message: tpl(messages.failedHtmlToLexical),
err
});
}
if (process.env.CI) {
console.timeEnd('htmlToLexicalConverter (page)'); // eslint-disable-line no-console
}

View File

@ -1,5 +1,7 @@
const _ = require('lodash');
const debug = require('@tryghost/debug')('api:endpoints:utils:serializers:input:posts');
const {ValidationError} = require('@tryghost/errors');
const tpl = require('@tryghost/tpl');
const url = require('./utils/url');
const slugFilterOrder = require('./utils/slug-filter-order');
const localUtils = require('../../index');
@ -7,6 +9,12 @@ const mobiledoc = require('../../../../../lib/mobiledoc');
const postsMetaSchema = require('../../../../../data/schema').tables.posts_meta;
const clean = require('./utils/clean');
const lexical = require('../../../../../lib/lexical');
const sentry = require('../../../../../../shared/sentry');
const messages = {
failedHtmlToMobiledoc: 'Failed to convert HTML to Mobiledoc',
failedHtmlToLexical: 'Failed to convert HTML to Lexical'
};
function removeSourceFormats(frame) {
if (frame.options.formats?.includes('mobiledoc') || frame.options.formats?.includes('lexical')) {
@ -170,7 +178,17 @@ module.exports = {
if (process.env.CI) {
console.time('htmlToMobiledocConverter (post)'); // eslint-disable-line no-console
}
frame.data.posts[0].mobiledoc = JSON.stringify(mobiledoc.htmlToMobiledocConverter(html));
try {
frame.data.posts[0].mobiledoc = JSON.stringify(mobiledoc.htmlToMobiledocConverter(html));
} catch (err) {
sentry.captureException(err);
throw new ValidationError({
message: tpl(messages.failedHtmlToMobiledoc),
err
});
}
if (process.env.CI) {
console.timeEnd('htmlToMobiledocConverter (post)'); // eslint-disable-line no-console
}
@ -180,7 +198,17 @@ module.exports = {
if (process.env.CI) {
console.time('htmlToLexicalConverter (post)'); // eslint-disable-line no-console
}
frame.data.posts[0].lexical = JSON.stringify(lexical.htmlToLexicalConverter(html));
try {
frame.data.posts[0].lexical = JSON.stringify(lexical.htmlToLexicalConverter(html));
} catch (err) {
sentry.captureException(err);
throw new ValidationError({
message: tpl(messages.failedHtmlToLexical),
err
});
}
if (process.env.CI) {
console.timeEnd('htmlToLexicalConverter (post)'); // eslint-disable-line no-console
}

View File

@ -1,7 +1,14 @@
const should = require('should');
const sinon = require('sinon');
const serializers = require('../../../../../../../core/server/api/endpoints/utils/serializers');
const mobiledocLib = require('@tryghost/html-to-mobiledoc');
describe('Unit: endpoints/utils/serializers/input/pages', function () {
afterEach(function () {
sinon.restore();
});
describe('browse', function () {
it('default', function () {
const apiConfig = {};
@ -179,6 +186,34 @@ describe('Unit: endpoints/utils/serializers/input/pages', function () {
frame.data.pages[0].tags.should.eql([{slug: 'slug1', name: 'hey'}, {slug: 'slug2'}]);
});
it('throws error if HTML conversion fails', function () {
// JSDOM require is sometimes very slow on CI causing random timeouts
this.timeout(4000);
const frame = {
options: {
source: 'html'
},
data: {
posts: [
{
id: 'id1',
html: '<bananarama>'
}
]
}
};
sinon.stub(mobiledocLib, 'toMobiledoc').throws(new Error('Some error'));
try {
serializers.input.posts.edit({}, frame);
should.fail('Error expected');
} catch (err) {
err.message.should.eql('Failed to convert HTML to Mobiledoc');
}
});
describe('Ensure relations format', function () {
it('relations is array of objects', function () {
const apiConfig = {};

View File

@ -1,7 +1,14 @@
const should = require('should');
const sinon = require('sinon');
const serializers = require('../../../../../../../core/server/api/endpoints/utils/serializers');
const mobiledocLib = require('@tryghost/html-to-mobiledoc');
describe('Unit: endpoints/utils/serializers/input/posts', function () {
afterEach(function () {
sinon.restore();
});
describe('browse', function () {
it('default', function () {
const apiConfig = {};
@ -288,6 +295,34 @@ describe('Unit: endpoints/utils/serializers/input/posts', function () {
let postData = frame.data.posts[0];
postData.lexical.should.equal('{"root":{"children":[{"children":[{"detail":0,"format":0,"mode":"normal","style":"","text":"this is great feature","type":"extended-text","version":1}],"direction":null,"format":"","indent":0,"type":"paragraph","version":1},{"type":"html","version":1,"html":"<div class=\\"custom\\">My Custom HTML</div>"},{"children":[{"detail":0,"format":0,"mode":"normal","style":"","text":"custom html preserved!","type":"extended-text","version":1}],"direction":null,"format":"","indent":0,"type":"paragraph","version":1}],"direction":null,"format":"","indent":0,"type":"root","version":1}}');
});
it('throws error when HTML conversion fails', function () {
// JSDOM require is sometimes very slow on CI causing random timeouts
this.timeout(4000);
const frame = {
options: {
source: 'html'
},
data: {
posts: [
{
id: 'id1',
html: '<bananarama>'
}
]
}
};
sinon.stub(mobiledocLib, 'toMobiledoc').throws(new Error('Some error'));
try {
serializers.input.posts.edit({}, frame);
should.fail('Error expected');
} catch (err) {
err.message.should.eql('Failed to convert HTML to Mobiledoc');
}
});
});
it('tags relation is stripped of unknown properties', function () {