From a9759736d6c9e4ac7ec2df1620a84205c40c9c0a Mon Sep 17 00:00:00 2001 From: Hannah Wolfe Date: Tue, 16 Jun 2020 11:42:32 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fixed=20private=20blogging=20exp?= =?UTF-8?q?osing=20404=20and=20robots=20(#11922)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - There were various cases where it was possible to trigger a private site to display a 404 instead of redirecting to /private/ - Private mode was also not always displaying the correct robots.txt - This PR includes tests for all cases in test/frontend-acceptance/default_routes_spec.js & where possible the unit tests have also been updated for completeness - Fixing the 404 issues required - Better handling of paths using req.path instead of req.url in filterPrivateRoutes - Additional error handling, to cover the case that a tag/author RSS feed does not exist - Fixing the robots.txt required the order of middleware to be changed, so that private blogging gets a chance to render first - NOTE private blogging is the only app with a setupMiddleware function so nothing else is affected --- core/frontend/apps/private-blogging/index.js | 4 ++ .../apps/private-blogging/lib/middleware.js | 39 ++++++++++++-- core/server/web/site/app.js | 21 +++++--- .../default_routes_spec.js | 52 +++++++++++++++++++ .../apps/private-blogging/middleware_spec.js | 46 ++++++++++++++++ 5 files changed, 150 insertions(+), 12 deletions(-) diff --git a/core/frontend/apps/private-blogging/index.js b/core/frontend/apps/private-blogging/index.js index 5e1ea94c57..114948dc4b 100644 --- a/core/frontend/apps/private-blogging/index.js +++ b/core/frontend/apps/private-blogging/index.js @@ -42,5 +42,9 @@ module.exports = { setupMiddleware: function setupMiddleware(siteApp) { siteApp.use(middleware.checkIsPrivate); siteApp.use(middleware.filterPrivateRoutes); + }, + + setupErrorHandling: function setupErrorHandling(siteApp) { + siteApp.use(middleware.handle404); } }; diff --git a/core/frontend/apps/private-blogging/lib/middleware.js b/core/frontend/apps/private-blogging/lib/middleware.js index adde63c974..09a4df313c 100644 --- a/core/frontend/apps/private-blogging/lib/middleware.js +++ b/core/frontend/apps/private-blogging/lib/middleware.js @@ -50,11 +50,18 @@ const privateBlogging = { }, filterPrivateRoutes: function filterPrivateRoutes(req, res, next) { - if (!res.isPrivateBlog || req.url.lastIndexOf(privateRoute, 0) === 0) { + // If this site is not in private mode, skip + if (!res.isPrivateBlog) { return next(); } - if (req.url.lastIndexOf('/robots.txt', 0) === 0) { + // CASE: this is the /private/ page, continue (allow this to be rendered) + if (req.path === `${privateRoute}`) { + return next(); + } + + // CASE: this is the robots.txt file, serve a special private version + if (req.path === '/robots.txt') { return fs.readFile(path.resolve(__dirname, '../', 'robots.txt'), function readFile(err, buf) { if (err) { return next(err); @@ -71,9 +78,9 @@ const privateBlogging = { } // CASE: Allow private RSS feed urls. - // Any url which contains the hash and the postfix /rss is allowed to access a private rss feed without - // a session. As soon as a path matches, we rewrite the url. Even Express uses rewriting when using `app.use()`. - if (req.url.indexOf(settingsCache.get('public_hash') + '/rss') !== -1) { + // If the path matches the private rss feed URL we rewrite the url. Even Express uses rewriting when using `app.use()`. + let isPrivateRSS = new RegExp(`/${settingsCache.get('public_hash')}/rss(/)?$`); + if (isPrivateRSS.test(req.path)) { req.url = req.url.replace(settingsCache.get('public_hash') + '/', ''); return next(); } @@ -101,6 +108,7 @@ const privateBlogging = { } else { let redirectUrl = urlUtils.urlFor({relativeUrl: privateRoute}); redirectUrl += '?r=' + encodeURIComponent(req.url); + return res.redirect(redirectUrl); } }, @@ -147,6 +155,27 @@ const privateBlogging = { }; return next(); } + }, + + /** + * We should never render a 404 error for private sites, as these can leak information + */ + handle404: function handle404(err, req, res, next) { + // CASE: not a private site, skip to next handler + if (!res.isPrivateBlog) { + return next(err); + } + + // CASE: not a private 404, something else went wrong, show an error + if (err.statusCode !== 404) { + return next(err); + } + + // CASE: 404 - redirect this page back to /private/ if the user isn't verified + return privateBlogging.authenticatePrivateSession(req, res, function onSessionVerified() { + // CASE: User is logged in, render an error + return next(err); + }); } }; diff --git a/core/server/web/site/app.js b/core/server/web/site/app.js index 6362d0981b..43da6f1558 100644 --- a/core/server/web/site/app.js +++ b/core/server/web/site/app.js @@ -135,13 +135,6 @@ module.exports = function setupSiteApp(options = {}) { siteApp.use(themeMiddleware); debug('Themes done'); - // Theme static assets/files - siteApp.use(mw.staticTheme()); - debug('Static content done'); - - // Serve robots.txt if not found in theme - siteApp.use(mw.servePublicFile('robots.txt', 'text/plain', constants.ONE_HOUR_S)); - // setup middleware for internal apps // @TODO: refactor this to be a proper app middleware hook for internal apps config.get('apps:internal').forEach((appName) => { @@ -152,6 +145,13 @@ module.exports = function setupSiteApp(options = {}) { } }); + // Theme static assets/files + siteApp.use(mw.staticTheme()); + debug('Static content done'); + + // Serve robots.txt if not found in theme + siteApp.use(mw.servePublicFile('robots.txt', 'text/plain', constants.ONE_HOUR_S)); + // site map - this should probably be refactored to be an internal app sitemapHandler(siteApp); debug('Internal apps done'); @@ -183,6 +183,13 @@ module.exports = function setupSiteApp(options = {}) { // ### Error handlers siteApp.use(shared.middlewares.errorHandler.pageNotFound); + config.get('apps:internal').forEach((appName) => { + const app = require(path.join(config.get('paths').internalAppPath, appName)); + + if (Object.prototype.hasOwnProperty.call(app, 'setupErrorHandling')) { + app.setupErrorHandling(siteApp); + } + }); siteApp.use(shared.middlewares.errorHandler.handleThemeResponse); debug('Site setup end'); diff --git a/test/frontend-acceptance/default_routes_spec.js b/test/frontend-acceptance/default_routes_spec.js index 6eae9713ef..2976eb6fb0 100644 --- a/test/frontend-acceptance/default_routes_spec.js +++ b/test/frontend-acceptance/default_routes_spec.js @@ -419,6 +419,19 @@ describe('Default Frontend routing', function () { .end(doEnd(done)); }); + it('/private/?r=%2Fwelcome%2F should not redirect', function (done) { + request.get('/private/?r=%2Fwelcome%2F') + .expect(200) + .end(doEnd(done)); + }); + + it('should redirect, NOT 404 for private route with extra path', function (done) { + request.get('/private/welcome/') + .expect('Location', '/private/?r=%2Fprivate%2Fwelcome%2F') + .expect(302) + .end(doEnd(done)); + }); + it('should still serve private RSS feed', function (done) { request.get(`/${settingsCache.get('public_hash')}/rss/`) .expect(200) @@ -429,5 +442,44 @@ describe('Default Frontend routing', function () { doEnd(done)(err, res); }); }); + + it('should still serve private tag RSS feed', function (done) { + request.get(`/tag/getting-started/${settingsCache.get('public_hash')}/rss/`) + .expect(200) + .expect('Cache-Control', testUtils.cacheRules.private) + .expect('Content-Type', 'text/xml; charset=utf-8') + .end(function (err, res) { + res.text.should.match(//); + doEnd(done)(err, res); + }); + }); + + it('should redirect, NOT 404 for private tag RSS feed with extra path', function (done) { + request.get(`/tag/getting-started/${settingsCache.get('public_hash')}/rss/hack/`) + .expect('Location', `/private/?r=%2Ftag%2Fgetting-started%2F${settingsCache.get('public_hash')}%2Frss%2Fhack%2F`) + .expect(302) + .end(doEnd(done)); + }); + + // NOTE: this case is covered by extra error handling, and cannot be unit tested + it('should redirect, NOT 404 for unknown private RSS feed', function (done) { + // NOTE: the redirect will be to /hack/rss because we strip the hash from the URL before trying to serve RSS + // This isn't ideal, but it's better to expose this internal logic than it is a 404 page + request.get(`/hack/${settingsCache.get('public_hash')}/rss/`) + .expect('Location', '/private/?r=%2Fhack%2Frss%2F') + .expect(302) + .end(doEnd(done)); + }); + + // NOTE: this test extends the unit test, checking that there is no other robots.txt middleware overriding private blogging + it('should serve private robots.txt', function (done) { + request.get('/robots.txt') + .expect('Cache-Control', 'public, max-age=3600000') + .expect(200) + .end(function (err, res) { + res.text.should.match('User-agent: *\nDisallow: /'); + doEnd(done)(err, res); + }); + }); }); }); diff --git a/test/unit/apps/private-blogging/middleware_spec.js b/test/unit/apps/private-blogging/middleware_spec.js index f978119ab1..be2c61f3d0 100644 --- a/test/unit/apps/private-blogging/middleware_spec.js +++ b/test/unit/apps/private-blogging/middleware_spec.js @@ -69,6 +69,12 @@ describe('Private Blogging', function () { res.redirect.called.should.be.true(); res.redirect.calledWith('http://127.0.0.1:2369/').should.be.true(); }); + + it('handle404 should still 404', function () { + privateBlogging.handle404(new errors.NotFoundError(), req, res, next); + next.called.should.be.true(); + (next.firstCall.args[0] instanceof errors.NotFoundError).should.eql(true); + }); }); describe('Private Mode Enabled', function () { @@ -95,6 +101,22 @@ describe('Private Blogging', function () { }); describe('Logged Out Behaviour', function () { + it('authenticatePrivateSession should redirect', function () { + req.path = req.url = '/welcome/'; + privateBlogging.authenticatePrivateSession(req, res, next); + next.called.should.be.false(); + res.redirect.called.should.be.true(); + res.redirect.calledWith('/private/?r=%2Fwelcome%2F').should.be.true(); + }); + + it('handle404 should redirect', function () { + req.path = req.url = '/welcome/'; + privateBlogging.handle404(new errors.NotFoundError(), req, res, next); + next.called.should.be.false(); + res.redirect.called.should.be.true(); + res.redirect.calledWith('/private/?r=%2Fwelcome%2F').should.be.true(); + }); + describe('Site privacy managed by filterPrivateRoutes', function () { it('should call next for the /private/ route', function () { req.path = req.url = '/private/'; @@ -102,6 +124,14 @@ describe('Private Blogging', function () { next.called.should.be.true(); }); + it('should redirect to /private/ for private route with extra path', function () { + req.path = req.url = '/private/welcome/'; + + privateBlogging.filterPrivateRoutes(req, res, next); + res.redirect.calledOnce.should.be.true(); + res.redirect.calledWith('/private/?r=%2Fprivate%2Fwelcome%2F').should.be.true(); + }); + it('should redirect to /private/ for sitemap', function () { req.path = req.url = '/sitemap.xml'; @@ -152,6 +182,8 @@ describe('Private Blogging', function () { }); it('should render custom robots.txt', function () { + // Note this test doesn't cover the full site behaviour, + // another robots.txt can be incorrectly served if middleware is out of order req.url = req.path = '/robots.txt'; res.writeHead = sinon.spy(); res.end = sinon.spy(); @@ -180,6 +212,14 @@ describe('Private Blogging', function () { next.called.should.be.true(); req.url.should.eql('/tag/getting-started/rss/'); }); + + it('should redirect to /private/ for private rss with extra path', function () { + req.url = req.originalUrl = req.path = '/777aaa/rss/hackme/'; + + privateBlogging.filterPrivateRoutes(req, res, next); + res.redirect.calledOnce.should.be.true(); + res.redirect.calledWith('/private/?r=%2F777aaa%2Frss%2Fhackme%2F').should.be.true(); + }); }); describe('/private/ route', function () { @@ -264,6 +304,12 @@ describe('Private Blogging', function () { next.called.should.be.true(); }); + it('handle404 should still 404', function () { + privateBlogging.handle404(new errors.NotFoundError(), req, res, next); + next.called.should.be.true(); + (next.firstCall.args[0] instanceof errors.NotFoundError).should.eql(true); + }); + describe('Site privacy managed by filterPrivateRoutes', function () { it('should 404 for standard public /rss/ requests', function () { req.url = req.path = '/rss/';