Ghost/core/server/services/url/utils.js
Katharina Irrgang b392d1925a
Dynamic Routing Beta (#9596)
refs #9601

### Dynamic Routing

This is the beta version of dynamic routing. 

- we had a initial implementation of "channels" available in the codebase
- we have removed and moved this implementation 
- there is now a centralised place for dynamic routing - server/services/routing
- each routing component is represented by a router type e.g. collections, routes, static pages, taxonomies, rss, preview of posts
- keep as much as possible logic of routing helpers, middlewares and controllers
- ensure test coverage
- connect all the things together
  - yaml file + validation
  - routing + routers
  - url service
  - sitemaps
  - url access
- deeper implementation of yaml validations
  - e.g. hard require slashes
- ensure routing hierarchy/order
  - e.g. you enable the subscriber app
  - you have a custom static page, which lives under the same slug /subscribe
  - static pages are stronger than apps
  - e.g. the first collection owns the post it has filtered
  - a post cannot live in two collections
- ensure apps are still working and hook into the routers layer (or better said: and register in the routing service)
- put as much as possible comments to the code base for better understanding
- ensure a clean debug log
- ensure we can unmount routes
  - e.g. you have a collection permalink of /:slug/ represented by {globals.permalink}
  - and you change the permalink in the admin to dated permalink
  - the express route get's refreshed from /:slug/ to /:year/:month/:day/:slug/
  - unmount without server restart, yey
- ensure we are backwards compatible
  - e.g. render home.hbs for collection index if collection route is /
  - ensure you can access your configured permalink from the settings table with {globals.permalink}

### Render 503 if url service did not finish

- return 503 if the url service has not finished generating the resource urls

### Rewrite sitemaps

- we have rewritten the sitemaps "service", because the url generator does no longer happen on runtime
- we generate all urls on bootstrap
- the sitemaps service will consume created resource and router urls
- these urls will be shown on the xml pages
- we listen on url events
- we listen on router events
- we no longer have to fetch the resources, which is nice
  - the urlservice pre-fetches resources and emits their urls
- the urlservice is the only component who knows which urls are valid
- i made some ES6 adaptions
- we keep the caching logic -> only regenerate xml if there is a change
- updated tests
- checked test coverage (100%)

### Re-work usage of Url utility

- replace all usages of `urlService.utils.urlFor` by `urlService.getByResourceId`
  - only for resources e.g. post, author, tag
- this is important, because with dynamic routing we no longer create static urls based on the settings permalink on runtime
- adapt url utility
- adapt tests
2018-06-05 19:02:20 +02:00

443 lines
14 KiB
JavaScript

// Contains all path information to be used throughout the codebase.
// Assumes that config.url is set, and is valid
const moment = require('moment-timezone'),
_ = require('lodash'),
url = require('url'),
cheerio = require('cheerio'),
config = require('../../config'),
settingsCache = require('../settings/cache'),
// @TODO: unify this with the path in server/app.js
API_PATH = '/ghost/api/v0.1/',
STATIC_IMAGE_URL_PREFIX = 'content/images';
/**
* Returns the base URL of the blog as set in the config.
*
* Secure:
* If the request is secure, we want to force returning the blog url as https.
* Imagine Ghost runs with http, but nginx allows SSL connections.
*
* @param {boolean} secure
* @return {string} URL returns the url as defined in config, but always with a trailing `/`
*/
function getBlogUrl(secure) {
var blogUrl;
if (secure) {
blogUrl = config.get('url').replace('http://', 'https://');
} else {
blogUrl = config.get('url');
}
if (!blogUrl.match(/\/$/)) {
blogUrl += '/';
}
return blogUrl;
}
/**
* Returns a subdirectory URL, if defined so in the config.
* @return {string} URL a subdirectory if configured.
*/
function getSubdir() {
// Parse local path location
var localPath = url.parse(config.get('url')).path,
subdir;
// Remove trailing slash
if (localPath !== '/') {
localPath = localPath.replace(/\/$/, '');
}
subdir = localPath === '/' ? '' : localPath;
return subdir;
}
function deduplicateSubDir(url) {
var subDir = getSubdir(),
subDirRegex;
if (!subDir) {
return url;
}
subDir = subDir.replace(/^\/|\/+$/, '');
// we can have subdirs that match TLDs so we need to restrict matches to
// duplicates that start with a / or the beginning of the url
subDirRegex = new RegExp('(^|\/)' + subDir + '\/' + subDir + '\/');
return url.replace(subDirRegex, '$1' + subDir + '/');
}
function getProtectedSlugs() {
var subDir = getSubdir();
if (!_.isEmpty(subDir)) {
return config.get('slugs').protected.concat([subDir.split('/').pop()]);
} else {
return config.get('slugs').protected;
}
}
/** urlJoin
* Returns a URL/path for internal use in Ghost.
* @param {string} arguments takes arguments and concats those to a valid path/URL.
* @return {string} URL concatinated URL/path of arguments.
*/
function urlJoin() {
var args = Array.prototype.slice.call(arguments),
prefixDoubleSlash = false,
url;
// Remove empty item at the beginning
if (args[0] === '') {
args.shift();
}
// Handle schemeless protocols
if (args[0].indexOf('//') === 0) {
prefixDoubleSlash = true;
}
// join the elements using a slash
url = args.join('/');
// Fix multiple slashes
url = url.replace(/(^|[^:])\/\/+/g, '$1/');
// Put the double slash back at the beginning if this was a schemeless protocol
if (prefixDoubleSlash) {
url = url.replace(/^\//, '//');
}
url = deduplicateSubDir(url);
return url;
}
/**
* admin:url is optional
*/
function getAdminUrl() {
var adminUrl = config.get('admin:url'),
subDir = getSubdir();
if (!adminUrl) {
return;
}
if (!adminUrl.match(/\/$/)) {
adminUrl += '/';
}
adminUrl = urlJoin(adminUrl, subDir, '/');
adminUrl = deduplicateSubDir(adminUrl);
return adminUrl;
}
// ## createUrl
// Simple url creation from a given path
// Ensures that our urls contain the subdirectory if there is one
// And are correctly formatted as either relative or absolute
// Usage:
// createUrl('/', true) -> http://my-ghost-blog.com/
// E.g. /blog/ subdir
// createUrl('/welcome-to-ghost/') -> /blog/welcome-to-ghost/
// Parameters:
// - urlPath - string which must start and end with a slash
// - absolute (optional, default:false) - boolean whether or not the url should be absolute
// - secure (optional, default:false) - boolean whether or not to force SSL
// Returns:
// - a URL which always ends with a slash
function createUrl(urlPath, absolute, secure, trailingSlash) {
urlPath = urlPath || '/';
absolute = absolute || false;
var base;
// create base of url, always ends without a slash
if (absolute) {
base = getBlogUrl(secure);
} else {
base = getSubdir();
}
if (trailingSlash) {
if (!urlPath.match(/\/$/)) {
urlPath += '/';
}
}
return urlJoin(base, urlPath);
}
/**
* creates the url path for a post based on blog timezone and permalink pattern
*/
function replacePermalink(permalink, resource) {
let output = permalink,
primaryTagFallback = 'all',
publishedAtMoment = moment.tz(resource.published_at || Date.now(), settingsCache.get('active_timezone')),
permalinkLookUp = {
year: function () {
return publishedAtMoment.format('YYYY');
},
month: function () {
return publishedAtMoment.format('MM');
},
day: function () {
return publishedAtMoment.format('DD');
},
author: function () {
return resource.primary_author.slug;
},
primary_author: function () {
return resource.primary_author ? resource.primary_author.slug : primaryTagFallback;
},
primary_tag: function () {
return resource.primary_tag ? resource.primary_tag.slug : primaryTagFallback;
},
slug: function () {
return resource.slug;
},
id: function () {
return resource.id;
}
};
// replace tags like :slug or :year with actual values
output = output.replace(/(:[a-z_]+)/g, function (match) {
if (_.has(permalinkLookUp, match.substr(1))) {
return permalinkLookUp[match.substr(1)]();
}
});
return output;
}
// ## urlFor
// Synchronous url creation for a given context
// Can generate a url for a named path and given path.
// Determines what sort of context it has been given, and delegates to the correct generation method,
// Finally passing to createUrl, to ensure any subdirectory is honoured, and the url is absolute if needed
// Usage:
// urlFor('home', true) -> http://my-ghost-blog.com/
// E.g. /blog/ subdir
// urlFor({relativeUrl: '/my-static-page/'}) -> /blog/my-static-page/
// Parameters:
// - context - a string, or json object describing the context for which you need a url
// - data (optional) - a json object containing data needed to generate a url
// - absolute (optional, default:false) - boolean whether or not the url should be absolute
// This is probably not the right place for this, but it's the best place for now
// @TODO: rewrite, very hard to read, create private functions!
function urlFor(context, data, absolute) {
var urlPath = '/',
secure, imagePathRe,
knownObjects = ['image', 'nav'], baseUrl,
hostname,
// this will become really big
knownPaths = {
home: '/',
api: API_PATH,
sitemap_xsl: '/sitemap.xsl'
};
// Make data properly optional
if (_.isBoolean(data)) {
absolute = data;
data = null;
}
// Can pass 'secure' flag in either context or data arg
secure = (context && context.secure) || (data && data.secure);
if (_.isObject(context) && context.relativeUrl) {
urlPath = context.relativeUrl;
} else if (_.isString(context) && _.indexOf(knownObjects, context) !== -1) {
if (context === 'image' && data.image) {
urlPath = data.image;
imagePathRe = new RegExp('^' + getSubdir() + '/' + STATIC_IMAGE_URL_PREFIX);
absolute = imagePathRe.test(data.image) ? absolute : false;
if (absolute) {
// Remove the sub-directory from the URL because ghostConfig will add it back.
urlPath = urlPath.replace(new RegExp('^' + getSubdir()), '');
baseUrl = getBlogUrl(secure).replace(/\/$/, '');
urlPath = baseUrl + urlPath;
}
return urlPath;
} else if (context === 'nav' && data.nav) {
urlPath = data.nav.url;
secure = data.nav.secure || secure;
baseUrl = getBlogUrl(secure);
hostname = baseUrl.split('//')[1];
// If the hostname is present in the url
if (urlPath.indexOf(hostname) > -1
// do no not apply, if there is a subdomain, or a mailto link
&& !urlPath.split(hostname)[0].match(/\.|mailto:/)
// do not apply, if there is a port after the hostname
&& urlPath.split(hostname)[1].substring(0, 1) !== ':') {
// make link relative to account for possible mismatch in http/https etc, force absolute
urlPath = urlPath.split(hostname)[1];
urlPath = urlJoin('/', urlPath);
absolute = true;
}
}
} else if (context === 'home' && absolute) {
urlPath = getBlogUrl(secure);
// CASE: there are cases where urlFor('home') needs to be returned without trailing
// slash e. g. the `{{@blog.url}}` helper. See https://github.com/TryGhost/Ghost/issues/8569
if (data && data.trailingSlash === false) {
urlPath = urlPath.replace(/\/$/, '');
}
} else if (context === 'admin') {
urlPath = getAdminUrl() || getBlogUrl();
if (absolute) {
urlPath += 'ghost/';
} else {
urlPath = '/ghost/';
}
} else if (context === 'api') {
urlPath = getAdminUrl() || getBlogUrl();
// CASE: with or without protocol? If your blog url (or admin url) is configured to http, it's still possible that e.g. nginx allows both https+http.
// So it depends how you serve your blog. The main focus here is to avoid cors problems.
// @TODO: rename cors
if (data && data.cors) {
if (!urlPath.match(/^https:/)) {
urlPath = urlPath.replace(/^.*?:\/\//g, '//');
}
}
if (absolute) {
urlPath = urlPath.replace(/\/$/, '') + API_PATH;
} else {
urlPath = API_PATH;
}
} else if (_.isString(context) && _.indexOf(_.keys(knownPaths), context) !== -1) {
// trying to create a url for a named path
urlPath = knownPaths[context];
}
// This url already has a protocol so is likely an external url to be returned
// or it is an alternative scheme, protocol-less, or an anchor-only path
if (urlPath && (urlPath.indexOf('://') !== -1 || urlPath.match(/^(\/\/|#|[a-zA-Z0-9\-]+:)/))) {
return urlPath;
}
return createUrl(urlPath, absolute, secure);
}
function isSSL(urlToParse) {
var protocol = url.parse(urlToParse).protocol;
return protocol === 'https:';
}
function redirect301(res, redirectUrl) {
res.set({'Cache-Control': 'public, max-age=' + config.get('caching:301:maxAge')});
return res.redirect(301, redirectUrl);
}
function redirectToAdmin(status, res, adminPath) {
var redirectUrl = urlJoin(urlFor('admin'), adminPath, '/');
if (status === 301) {
return redirect301(res, redirectUrl);
}
return res.redirect(redirectUrl);
}
/**
* Make absolute URLs
* @param {string} html
* @param {string} siteUrl (blog URL)
* @param {string} itemUrl (URL of current context)
* @returns {object} htmlContent
* @description Takes html, blog url and item url and converts relative url into
* absolute urls. Returns an object. The html string can be accessed by calling `html()` on
* the variable that takes the result of this function
*/
function makeAbsoluteUrls(html, siteUrl, itemUrl) {
var htmlContent = cheerio.load(html, {decodeEntities: false});
// convert relative resource urls to absolute
['href', 'src'].forEach(function forEach(attributeName) {
htmlContent('[' + attributeName + ']').each(function each(ix, el) {
var baseUrl,
attributeValue,
parsed;
el = htmlContent(el);
attributeValue = el.attr(attributeName);
// if URL is absolute move on to the next element
try {
parsed = url.parse(attributeValue);
if (parsed.protocol) {
return;
}
// Do not convert protocol relative URLs
if (attributeValue.lastIndexOf('//', 0) === 0) {
return;
}
} catch (e) {
return;
}
// CASE: don't convert internal links
if (attributeValue[0] === '#') {
return;
}
// compose an absolute URL
// if the relative URL begins with a '/' use the blog URL (including sub-directory)
// as the base URL, otherwise use the post's URL.
baseUrl = attributeValue[0] === '/' ? siteUrl : itemUrl;
attributeValue = urlJoin(baseUrl, attributeValue);
el.attr(attributeName, attributeValue);
});
});
return htmlContent;
}
function absoluteToRelative(urlToModify) {
const urlObj = url.parse(urlToModify);
return urlObj.pathname;
}
function deduplicateDoubleSlashes(url) {
return url.replace(/\/\//g, '/');
}
module.exports.absoluteToRelative = absoluteToRelative;
module.exports.makeAbsoluteUrls = makeAbsoluteUrls;
module.exports.getProtectedSlugs = getProtectedSlugs;
module.exports.getSubdir = getSubdir;
module.exports.urlJoin = urlJoin;
module.exports.urlFor = urlFor;
module.exports.isSSL = isSSL;
module.exports.replacePermalink = replacePermalink;
module.exports.redirectToAdmin = redirectToAdmin;
module.exports.redirect301 = redirect301;
module.exports.createUrl = createUrl;
module.exports.deduplicateDoubleSlashes = deduplicateDoubleSlashes;
/**
* If you request **any** image in Ghost, it get's served via
* http://your-blog.com/content/images/2017/01/02/author.png
*
* /content/images/ is a static prefix for serving images!
*
* But internally the image is located for example in your custom content path:
* my-content/another-dir/images/2017/01/02/author.png
*/
module.exports.STATIC_IMAGE_URL_PREFIX = STATIC_IMAGE_URL_PREFIX;