Ghost/core/server/services/oembed.js
Naz 04e7c9fca5 Refactored oembed service to async/await syntax
no issue

- The method was super hard to read with unintuitive catches in multiple places and lots of conditional logic. There's still more to reshuffle here, but that would be for the next time. At least now the data flow is clear within the method
2021-08-23 10:53:44 +04:00

295 lines
9.6 KiB
JavaScript

const Promise = require('bluebird');
const errors = require('@tryghost/errors');
const {extract, hasProvider} = require('oembed-parser');
const cheerio = require('cheerio');
const _ = require('lodash');
const {CookieJar} = require('tough-cookie');
const findUrlWithProvider = (url) => {
let provider;
// build up a list of URL variations to test against because the oembed
// providers list is not always up to date with scheme or www vs non-www
let baseUrl = url.replace(/^\/\/|^https?:\/\/(?:www\.)?/, '');
let testUrls = [
`http://${baseUrl}`,
`https://${baseUrl}`,
`http://www.${baseUrl}`,
`https://www.${baseUrl}`
];
for (let testUrl of testUrls) {
provider = hasProvider(testUrl);
if (provider) {
url = testUrl;
break;
}
}
return {url, provider};
};
/**
* @typedef {Object} Ii18n
* @prop {(key: string) => string} t
*/
/**
* @typedef {Object} IConfig
* @prop {(key: string) => string} get
*/
/**
* @typedef {(url: string, config: Object) => Promise} IExternalRequest
*/
class OEmbed {
/**
*
* @param {Object} dependencies
* @param {Ii18n} dependencies.i18n
* @param {IConfig} dependencies.config
* @param {IExternalRequest} dependencies.externalRequest
*/
constructor({config, externalRequest, i18n}) {
this.config = config;
this.externalRequest = externalRequest;
this.i18n = i18n;
}
unknownProvider(url) {
return Promise.reject(new errors.ValidationError({
message: this.i18n.t('errors.api.oembed.unknownProvider'),
context: url
}));
}
knownProvider(url) {
return extract(url).catch((err) => {
return Promise.reject(new errors.InternalServerError({
message: err.message
}));
});
}
errorHandler(url) {
return (err) => {
// allow specific validation errors through for better error messages
if (errors.utils.isIgnitionError(err) && err.errorType === 'ValidationError') {
return Promise.reject(err);
}
// default to unknown provider to avoid leaking any app specifics
return this.unknownProvider(url);
};
}
async fetchBookmarkData(url) {
const metascraper = require('metascraper')([
require('metascraper-url')(),
require('metascraper-title')(),
require('metascraper-description')(),
require('metascraper-author')(),
require('metascraper-publisher')(),
require('metascraper-image')(),
require('metascraper-logo-favicon')(),
require('metascraper-logo')()
]);
let scraperResponse;
try {
const cookieJar = new CookieJar();
const response = await this.externalRequest(url, {cookieJar});
const html = response.body;
scraperResponse = await metascraper({html, url});
} catch (err) {
return Promise.reject(err);
}
const metadata = Object.assign({}, scraperResponse, {
thumbnail: scraperResponse.image,
icon: scraperResponse.logo
});
// We want to use standard naming for image and logo
delete metadata.image;
delete metadata.logo;
if (metadata.title) {
return Promise.resolve({
type: 'bookmark',
url,
metadata
});
}
return Promise.reject(new errors.ValidationError({
message: this.i18n.t('errors.api.oembed.insufficientMetadata'),
context: url
}));
}
isIpOrLocalhost(url) {
try {
const IPV4_REGEX = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
const IPV6_REGEX = /:/; // fqdns will not have colons
const HTTP_REGEX = /^https?:/i;
const siteUrl = new URL(this.config.get('url'));
const {protocol, hostname, host} = new URL(url);
// allow requests to Ghost's own url through
if (siteUrl.host === host) {
return false;
}
if (!HTTP_REGEX.test(protocol) || hostname === 'localhost' || IPV4_REGEX.test(hostname) || IPV6_REGEX.test(hostname)) {
return true;
}
return false;
} catch (e) {
return true;
}
}
/**
* @param {string} _url
* @param {string} [cardType]
*
* @returns {Promise<Object>}
*/
fetchOembedData(_url, cardType) {
// parse the url then validate the protocol and host to make sure it's
// http(s) and not an IP address or localhost to avoid potential access to
// internal network endpoints
if (this.isIpOrLocalhost(_url)) {
return this.unknownProvider();
}
// check against known oembed list
let {url, provider} = findUrlWithProvider(_url);
if (provider) {
return this.knownProvider(url);
}
// url not in oembed list so fetch it in case it's a redirect or has a
// <link rel="alternate" type="application/json+oembed"> element
const cookieJar = new CookieJar();
return this.externalRequest(url, {
method: 'GET',
timeout: 2 * 1000,
followRedirect: true,
cookieJar
}).then((pageResponse) => {
// url changed after fetch, see if we were redirected to a known oembed
if (pageResponse.url !== url) {
({url, provider} = findUrlWithProvider(pageResponse.url));
if (provider) {
return this.knownProvider(url);
}
}
// check for <link rel="alternate" type="application/json+oembed"> element
let oembedUrl;
try {
oembedUrl = cheerio('link[type="application/json+oembed"]', pageResponse.body).attr('href');
} catch (e) {
return this.unknownProvider(url);
}
if (oembedUrl) {
// make sure the linked url is not an ip address or localhost
if (this.isIpOrLocalhost(oembedUrl)) {
return this.unknownProvider(oembedUrl);
}
// for standard WP oembed's we want to insert a bookmark card rather than their blockquote+script
// which breaks in the editor and most Ghost themes. Only fallback if card type was not explicitly chosen
if (!cardType && oembedUrl.match(/wp-json\/oembed/)) {
return;
}
// fetch oembed response from embedded rel="alternate" url
return this.externalRequest(oembedUrl, {
method: 'GET',
json: true,
timeout: 2 * 1000,
followRedirect: true,
cookieJar
}).then((oembedResponse) => {
// validate the fetched json against the oembed spec to avoid
// leaking non-oembed responses
const body = oembedResponse.body;
const hasRequiredFields = body.type && body.version;
const hasValidType = ['photo', 'video', 'link', 'rich'].includes(body.type);
if (hasRequiredFields && hasValidType) {
// extract known oembed fields from the response to limit leaking of unrecognised data
const knownFields = [
'type',
'version',
'html',
'url',
'title',
'width',
'height',
'author_name',
'author_url',
'provider_name',
'provider_url',
'thumbnail_url',
'thumbnail_width',
'thumbnail_height'
];
const oembed = _.pick(body, knownFields);
// ensure we have required data for certain types
if (oembed.type === 'photo' && !oembed.url) {
return;
}
if ((oembed.type === 'video' || oembed.type === 'rich') && (!oembed.html || !oembed.width || !oembed.height)) {
return;
}
// return the extracted object, don't pass through the response body
return oembed;
}
}).catch(() => {});
}
});
}
/**
* @param {string} url - oembed URL
* @param {string} type - card type
*
* @returns {Promise<Object>}
*/
async fetchOembedDataFromUrl(url, type) {
let data;
try {
if (type === 'bookmark') {
return this.fetchBookmarkData(url);
}
data = await this.fetchOembedData(url);
if (!data && !type) {
data = await this.fetchBookmarkData(url);
}
if (!data) {
data = await this.unknownProvider(url);
}
return data;
} catch (e) {
return this.errorHandler(url);
}
}
}
module.exports = OEmbed;