Oembed meta tag fallback for unknown providers (#9827)

closes #9786

- Make GET request when url has no provider match
  - The HEAD request was made in order to send less data over the wire when
checking for redirects for urls that do not have an oembed provider
match. We are now going to look for provider metatags withing the
response of the request - rather than making a HEAD followed by a GET if
no redirect is found, this condenses that to a single request.

- Try to get OEmbed data from tag if no provider
  - Here we parse the HTML response of the resource and look for a link tag
that will give us the oembed resource url which we can use to fetch the
embed html
This commit is contained in:
Fabien O'Carroll 2018-08-27 22:02:03 +08:00 committed by Kevin Ansfield
parent 2376c614b3
commit 8ccf27340b
2 changed files with 50 additions and 5 deletions

View File

@ -2,6 +2,7 @@ const common = require('../lib/common');
const {extract, hasProvider} = require('oembed-parser');
const Promise = require('bluebird');
const request = require('../lib/request');
const cheerio = require('cheerio');
const findUrlWithProvider = function findUrlWithProvider(url) {
let provider;
@ -27,6 +28,10 @@ const findUrlWithProvider = function findUrlWithProvider(url) {
return {url, provider};
};
const getOembedUrlFromHTML = function getOembedUrlFromHTML(html) {
return cheerio('link[type="application/json+oembed"]', html).attr('href');
};
let oembed = {
read(options) {
let {url} = options;
@ -60,7 +65,7 @@ let oembed = {
// see if the URL is a redirect to cater for shortened urls
return request(url, {
method: 'HEAD',
method: 'GET',
timeout: 2 * 1000,
followRedirect: true
}).then((response) => {
@ -69,7 +74,18 @@ let oembed = {
return provider ? knownProvider(url) : unknownProvider();
}
return unknownProvider();
const oembedUrl = getOembedUrlFromHTML(response.body);
if (!oembedUrl) {
return unknownProvider();
}
return request(oembedUrl, {
method: 'GET',
json: true
}).then((response) => {
return response.body;
});
}).catch(() => {
return unknownProvider();
});

View File

@ -41,14 +41,14 @@ describe('API: oembed', function () {
it('follows redirects to get base url', function (done) {
let redirectMock = nock('https://youtu.be')
.intercept('/yHohwmrxrto', 'HEAD')
.intercept('/yHohwmrxrto', 'GET')
.reply(302, undefined, {
// eslint-disable-next-line
'Location': 'https://www.youtube.com/watch?v=yHohwmrxrto&feature=youtu.be'
});
let videoMock = nock('https://www.youtube.com')
.intercept('/watch', 'HEAD')
.intercept('/watch', 'GET')
.query({v: 'yHohwmrxrto', feature: 'youtu.be'})
.reply(200);
@ -83,7 +83,7 @@ describe('API: oembed', function () {
it('returns error for unsupported provider', function (done) {
nock('http://example.com')
.intercept('/unknown', 'HEAD')
.intercept('/unknown', 'GET')
.reply(200);
OembedAPI.read({url: 'http://example.com/unknown'})
@ -95,6 +95,35 @@ describe('API: oembed', function () {
});
});
it('returns match for unsupported provider but with oembed link tag', function (done) {
nock('https://host.tld')
.intercept('/page', 'GET')
.reply(200, `
<html>
<head>
<link rel="alternate" type="application/json+oembed"
href="https://host.tld/oembed" title="Oh embed"/>
</head>
</html>
`);
const requestMock = nock('https://host.tld')
.intercept('/oembed', 'GET')
.query(true)
.reply(200, {
html: 'test'
});
OembedAPI.read({url: 'https://host.tld/page'})
.then((results) => {
requestMock.isDone().should.be.true;
should.exist(results);
should.exist(results.html);
results.html.should.eql('test');
done();
}).catch(done);
});
it('returns error for fetch failure', function (done) {
let requestMock = nock('https://www.youtube.com')
.get('/oembed')