2020-05-22 21:22:20 +03:00
|
|
|
const errors = require('@tryghost/errors');
|
2018-11-02 11:32:26 +03:00
|
|
|
const {extract, hasProvider} = require('oembed-parser');
|
|
|
|
const Promise = require('bluebird');
|
|
|
|
const cheerio = require('cheerio');
|
2020-04-07 13:05:48 +03:00
|
|
|
const _ = require('lodash');
|
2020-06-15 20:38:43 +03:00
|
|
|
const config = require('../../../shared/config');
|
2021-05-03 19:29:44 +03:00
|
|
|
const i18n = require('../../../shared/i18n');
|
2020-06-15 20:38:43 +03:00
|
|
|
const externalRequest = require('../../lib/request-external');
|
2018-11-02 11:32:26 +03:00
|
|
|
|
|
|
|
const findUrlWithProvider = (url) => {
|
|
|
|
let provider;
|
|
|
|
|
|
|
|
// build up a list of URL variations to test against because the oembed
|
|
|
|
// providers list is not always up to date with scheme or www vs non-www
|
|
|
|
let baseUrl = url.replace(/^\/\/|^https?:\/\/(?:www\.)?/, '');
|
|
|
|
let testUrls = [
|
|
|
|
`http://${baseUrl}`,
|
|
|
|
`https://${baseUrl}`,
|
|
|
|
`http://www.${baseUrl}`,
|
|
|
|
`https://www.${baseUrl}`
|
|
|
|
];
|
|
|
|
|
|
|
|
for (let testUrl of testUrls) {
|
|
|
|
provider = hasProvider(testUrl);
|
|
|
|
if (provider) {
|
|
|
|
url = testUrl;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return {url, provider};
|
|
|
|
};
|
|
|
|
|
2020-04-07 13:05:48 +03:00
|
|
|
function unknownProvider(url) {
|
2020-05-22 21:22:20 +03:00
|
|
|
return Promise.reject(new errors.ValidationError({
|
|
|
|
message: i18n.t('errors.api.oembed.unknownProvider'),
|
2020-04-07 13:05:48 +03:00
|
|
|
context: url
|
|
|
|
}));
|
|
|
|
}
|
2018-11-02 11:32:26 +03:00
|
|
|
|
2020-04-07 13:05:48 +03:00
|
|
|
function knownProvider(url) {
|
2020-12-01 11:58:42 +03:00
|
|
|
return extract(url, {maxwidth: 1280}).catch((err) => {
|
2020-05-22 21:22:20 +03:00
|
|
|
return Promise.reject(new errors.InternalServerError({
|
2020-04-07 13:05:48 +03:00
|
|
|
message: err.message
|
|
|
|
}));
|
|
|
|
});
|
|
|
|
}
|
2018-11-02 11:32:26 +03:00
|
|
|
|
2020-04-07 13:05:48 +03:00
|
|
|
function isIpOrLocalhost(url) {
|
|
|
|
try {
|
|
|
|
const IPV4_REGEX = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
|
|
|
|
const IPV6_REGEX = /:/; // fqdns will not have colons
|
|
|
|
const HTTP_REGEX = /^https?:/i;
|
2018-11-02 11:32:26 +03:00
|
|
|
|
2020-06-15 20:38:43 +03:00
|
|
|
const siteUrl = new URL(config.get('url'));
|
|
|
|
const {protocol, hostname, host} = new URL(url);
|
|
|
|
|
|
|
|
// allow requests to Ghost's own url through
|
|
|
|
if (siteUrl.host === host) {
|
|
|
|
return false;
|
|
|
|
}
|
2018-11-02 11:32:26 +03:00
|
|
|
|
2020-04-07 13:05:48 +03:00
|
|
|
if (!HTTP_REGEX.test(protocol) || hostname === 'localhost' || IPV4_REGEX.test(hostname) || IPV6_REGEX.test(hostname)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
} catch (e) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function fetchOembedData(_url) {
|
|
|
|
// parse the url then validate the protocol and host to make sure it's
|
|
|
|
// http(s) and not an IP address or localhost to avoid potential access to
|
|
|
|
// internal network endpoints
|
|
|
|
if (isIpOrLocalhost(_url)) {
|
|
|
|
return unknownProvider();
|
|
|
|
}
|
|
|
|
|
|
|
|
// check against known oembed list
|
|
|
|
let {url, provider} = findUrlWithProvider(_url);
|
|
|
|
if (provider) {
|
|
|
|
return knownProvider(url);
|
|
|
|
}
|
2018-11-02 11:32:26 +03:00
|
|
|
|
2020-04-07 13:05:48 +03:00
|
|
|
// url not in oembed list so fetch it in case it's a redirect or has a
|
|
|
|
// <link rel="alternate" type="application/json+oembed"> element
|
2020-06-02 16:30:10 +03:00
|
|
|
return externalRequest(url, {
|
2020-04-07 13:05:48 +03:00
|
|
|
method: 'GET',
|
|
|
|
timeout: 2 * 1000,
|
2020-06-02 16:30:10 +03:00
|
|
|
followRedirect: true
|
2020-06-08 14:51:59 +03:00
|
|
|
}).then((pageResponse) => {
|
2020-04-07 13:05:48 +03:00
|
|
|
// url changed after fetch, see if we were redirected to a known oembed
|
2020-06-08 14:51:59 +03:00
|
|
|
if (pageResponse.url !== url) {
|
|
|
|
({url, provider} = findUrlWithProvider(pageResponse.url));
|
2018-11-02 11:32:26 +03:00
|
|
|
if (provider) {
|
|
|
|
return knownProvider(url);
|
|
|
|
}
|
2020-04-07 13:05:48 +03:00
|
|
|
}
|
2018-11-02 11:32:26 +03:00
|
|
|
|
2020-04-07 13:05:48 +03:00
|
|
|
// check for <link rel="alternate" type="application/json+oembed"> element
|
|
|
|
let oembedUrl;
|
|
|
|
try {
|
2020-06-08 14:51:59 +03:00
|
|
|
oembedUrl = cheerio('link[type="application/json+oembed"]', pageResponse.body).attr('href');
|
2020-04-07 13:05:48 +03:00
|
|
|
} catch (e) {
|
|
|
|
return unknownProvider(url);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (oembedUrl) {
|
|
|
|
// make sure the linked url is not an ip address or localhost
|
|
|
|
if (isIpOrLocalhost(oembedUrl)) {
|
|
|
|
return unknownProvider(oembedUrl);
|
|
|
|
}
|
|
|
|
|
|
|
|
// fetch oembed response from embedded rel="alternate" url
|
2020-06-02 16:30:10 +03:00
|
|
|
return externalRequest(oembedUrl, {
|
2018-11-02 11:32:26 +03:00
|
|
|
method: 'GET',
|
2020-04-07 17:29:22 +03:00
|
|
|
json: true,
|
|
|
|
timeout: 2 * 1000,
|
2020-06-02 16:30:10 +03:00
|
|
|
followRedirect: true
|
2020-06-08 14:51:59 +03:00
|
|
|
}).then((oembedResponse) => {
|
2020-04-07 13:05:48 +03:00
|
|
|
// validate the fetched json against the oembed spec to avoid
|
|
|
|
// leaking non-oembed responses
|
2020-06-08 14:51:59 +03:00
|
|
|
const body = oembedResponse.body;
|
2020-04-07 13:05:48 +03:00
|
|
|
const hasRequiredFields = body.type && body.version;
|
|
|
|
const hasValidType = ['photo', 'video', 'link', 'rich'].includes(body.type);
|
2018-11-02 11:32:26 +03:00
|
|
|
|
2020-04-07 13:05:48 +03:00
|
|
|
if (hasRequiredFields && hasValidType) {
|
|
|
|
// extract known oembed fields from the response to limit leaking of unrecognised data
|
|
|
|
const knownFields = [
|
|
|
|
'type',
|
|
|
|
'version',
|
|
|
|
'html',
|
|
|
|
'url',
|
|
|
|
'title',
|
|
|
|
'width',
|
|
|
|
'height',
|
|
|
|
'author_name',
|
|
|
|
'author_url',
|
|
|
|
'provider_name',
|
|
|
|
'provider_url',
|
|
|
|
'thumbnail_url',
|
|
|
|
'thumbnail_width',
|
|
|
|
'thumbnail_height'
|
|
|
|
];
|
|
|
|
const oembed = _.pick(body, knownFields);
|
2018-11-02 11:32:26 +03:00
|
|
|
|
2020-04-07 13:05:48 +03:00
|
|
|
// ensure we have required data for certain types
|
|
|
|
if (oembed.type === 'photo' && !oembed.url) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if ((oembed.type === 'video' || oembed.type === 'rich') && (!oembed.html || !oembed.width || !oembed.height)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// return the extracted object, don't pass through the response body
|
|
|
|
return oembed;
|
2018-11-02 11:32:26 +03:00
|
|
|
}
|
2020-04-07 13:05:48 +03:00
|
|
|
}).catch(() => {});
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
2018-11-02 11:32:26 +03:00
|
|
|
|
2020-04-07 13:05:48 +03:00
|
|
|
module.exports = {
|
|
|
|
docName: 'oembed',
|
|
|
|
|
|
|
|
read: {
|
|
|
|
permissions: false,
|
|
|
|
data: [
|
|
|
|
'url'
|
|
|
|
],
|
|
|
|
options: [],
|
|
|
|
query({data: {url}}) {
|
|
|
|
return fetchOembedData(url).then((response) => {
|
|
|
|
return response || unknownProvider(url);
|
2018-11-02 11:32:26 +03:00
|
|
|
}).catch(() => {
|
2020-04-07 13:05:48 +03:00
|
|
|
return unknownProvider(url);
|
2018-11-02 11:32:26 +03:00
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|