Ghost/core/server/api/canary/oembed.js
Daniel Lockyer a851cdfc7b Handled bad URLs in oembed bookmark API
fixes #11636

- malformed URLs passed to oembed API would cause `got` or `metascraper`
  to throw an error and this would result in a 500 error from Ghost
- this commit catches the errors and returns a reasonable response
2020-03-02 14:24:26 +00:00

162 lines
4.3 KiB
JavaScript

const common = require('../../lib/common');
const {extract, hasProvider} = require('oembed-parser');
const Promise = require('bluebird');
const request = require('../../lib/request');
const cheerio = require('cheerio');
async function fetchBookmarkData(url, html) {
const metascraper = require('metascraper')([
require('metascraper-url')(),
require('metascraper-title')(),
require('metascraper-description')(),
require('metascraper-author')(),
require('metascraper-publisher')(),
require('metascraper-image')(),
require('metascraper-logo-favicon')(),
require('metascraper-logo')()
]);
let scraperResponse;
try {
if (!html) {
const response = await request(url, {
headers: {
'user-agent': 'Ghost(https://github.com/TryGhost/Ghost)'
}
});
html = response.body;
}
scraperResponse = await metascraper({html, url});
} catch (e) {
return Promise.reject();
}
const metadata = Object.assign({}, scraperResponse, {
thumbnail: scraperResponse.image,
icon: scraperResponse.logo
});
// We want to use standard naming for image and logo
delete metadata.image;
delete metadata.logo;
if (metadata.title && metadata.description) {
return Promise.resolve({
type: 'bookmark',
url,
metadata
});
}
return Promise.resolve();
}
const findUrlWithProvider = (url) => {
let provider;
// build up a list of URL variations to test against because the oembed
// providers list is not always up to date with scheme or www vs non-www
let baseUrl = url.replace(/^\/\/|^https?:\/\/(?:www\.)?/, '');
let testUrls = [
`http://${baseUrl}`,
`https://${baseUrl}`,
`http://www.${baseUrl}`,
`https://www.${baseUrl}`
];
for (let testUrl of testUrls) {
provider = hasProvider(testUrl);
if (provider) {
url = testUrl;
break;
}
}
return {url, provider};
};
const getOembedUrlFromHTML = (html) => {
return cheerio('link[type="application/json+oembed"]', html).attr('href');
};
function unknownProvider(url) {
return Promise.reject(new common.errors.ValidationError({
message: common.i18n.t('errors.api.oembed.unknownProvider'),
context: url
}));
}
function knownProvider(url) {
return extract(url).catch((err) => {
return Promise.reject(new common.errors.InternalServerError({
message: err.message
}));
});
}
function fetchOembedData(url) {
let provider;
({url, provider} = findUrlWithProvider(url));
if (provider) {
return knownProvider(url);
}
return request(url, {
method: 'GET',
timeout: 2 * 1000,
followRedirect: true,
headers: {
'user-agent': 'Ghost(https://github.com/TryGhost/Ghost)'
}
}).then((response) => {
if (response.url !== url) {
({url, provider} = findUrlWithProvider(response.url));
}
if (provider) {
return knownProvider(url);
}
const oembedUrl = getOembedUrlFromHTML(response.body);
if (oembedUrl) {
return request(oembedUrl, {
method: 'GET',
json: true
}).then((response) => {
return response.body;
}).catch(() => {});
}
});
}
module.exports = {
docName: 'oembed',
read: {
permissions: false,
data: [
'url',
'type'
],
options: [],
query({data}) {
let {url, type} = data;
if (type === 'bookmark') {
return fetchBookmarkData(url)
.catch(() => unknownProvider(url));
}
return fetchOembedData(url).then((response) => {
if (!response && !type) {
return fetchBookmarkData(url);
}
return response;
}).then((response) => {
if (!response) {
return unknownProvider(url);
}
return response;
}).catch(() => {
return unknownProvider(url);
});
}
}
};