mirror of
https://github.com/TryGhost/Ghost.git
synced 2024-12-19 08:31:43 +03:00
93ea9a2976
refs https://github.com/TryGhost/Toolbox/issues/524 - Fetching media from a remote server is an expensive network operation. Given there's probability for the content to reuse the same image in different posts or in multiple places, we could save on extra fetches by adding caching to the remote media fetch method
286 lines
9.1 KiB
JavaScript
286 lines
9.1 KiB
JavaScript
const mime = require('mime-types');
|
|
const request = require('@tryghost/request');
|
|
const errors = require('@tryghost/errors');
|
|
const logging = require('@tryghost/logging');
|
|
const path = require('path');
|
|
|
|
class ExternalMediaInliner {
|
|
/** @type {object} */
|
|
#PostModel;
|
|
|
|
/** @type {object} */
|
|
#PostMetaModel;
|
|
|
|
/** @type {object} */
|
|
#TagModel;
|
|
|
|
/** @type {object} */
|
|
#UserModel;
|
|
|
|
/**
|
|
*
|
|
* @param {Object} deps
|
|
* @param {Object} deps.PostModel - Post model
|
|
* @param {Object} deps.PostMetaModel - PostMeta model
|
|
* @param {Object} deps.TagModel - Tag model
|
|
* @param {Object} deps.UserModel - User model
|
|
* @param {(extension) => import('ghost-storage-base')} deps.getMediaStorage - getMediaStorage
|
|
*/
|
|
constructor(deps) {
|
|
this.#PostModel = deps.PostModel;
|
|
this.#PostMetaModel = deps.PostMetaModel;
|
|
this.#TagModel = deps.TagModel;
|
|
this.#UserModel = deps.UserModel;
|
|
this.getMediaStorage = deps.getMediaStorage;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {string} requestURL - url of remote media
|
|
* @returns {Promise<Object>}
|
|
*/
|
|
async #getRemoteMedia(requestURL) {
|
|
// @NOTE: this is the most expensive operation in the whole inlining process
|
|
// we should consider caching the results to improve performance
|
|
try {
|
|
return await request(requestURL, {
|
|
followRedirect: true,
|
|
encoding: null
|
|
});
|
|
} catch (error) {
|
|
// NOTE: add special case for 404s
|
|
logging.error(`Error downloading remote media: ${requestURL}`);
|
|
logging.error(new errors.DataImportError({
|
|
err: error
|
|
}));
|
|
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {Object} response - response from request
|
|
* @returns {Object}
|
|
*/
|
|
#extractFileDataFromResponse(requestURL, response) {
|
|
const headers = response.headers;
|
|
const contentType = headers['content-type'];
|
|
|
|
const filename = requestURL
|
|
.split('/')
|
|
.pop()
|
|
.split('#')[0]
|
|
.split('?')[0];
|
|
|
|
const extension = mime.extension(contentType) || filename.split('.').pop();
|
|
|
|
return {
|
|
fileBuffer: response.body,
|
|
filename: filename,
|
|
extension: `.${extension}`
|
|
};
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {Object} media - media to store locally
|
|
* @returns {Promise<string>} - path to stored media
|
|
*/
|
|
async #storeMediaLocally(media) {
|
|
const storage = this.getMediaStorage(media.extension);
|
|
|
|
if (!storage) {
|
|
logging.warn(`No storage adapter found for file extension: ${media.extension}`);
|
|
return null;
|
|
} else {
|
|
// @NOTE: this is extremely convoluted and should live on a
|
|
// storage adapter level
|
|
const targetDir = storage.getTargetDir(storage.storagePath);
|
|
const uniqueFileName = await storage.getUniqueFileName({
|
|
name: media.filename
|
|
}, targetDir);
|
|
const targetPath = path.relative(storage.storagePath, uniqueFileName);
|
|
const filePath = await storage.saveRaw(media.fileBuffer, targetPath);
|
|
return filePath;
|
|
}
|
|
}
|
|
|
|
async #inlineMibiledoc(mobiledoc, domains) {
|
|
for (const domain of domains) {
|
|
const regex = new RegExp(`"src":"(${domain}.*?)"`, 'igm');
|
|
const matches = mobiledoc.matchAll(regex);
|
|
|
|
for (const [,src] of matches) {
|
|
const response = await this.#getRemoteMedia(src);
|
|
|
|
let media;
|
|
if (response) {
|
|
media = this.#extractFileDataFromResponse(src, response);
|
|
}
|
|
|
|
if (media) {
|
|
const filePath = await this.#storeMediaLocally(media);
|
|
|
|
if (filePath) {
|
|
const inlinedSrc = `__GHOST_URL__${filePath}`;
|
|
|
|
// NOTE: does not account for duplicate images in mobiledoc
|
|
// in those cases would be processed twice
|
|
mobiledoc = mobiledoc.replace(src, inlinedSrc);
|
|
logging.info(`Inlined media: ${src} -> ${inlinedSrc}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return mobiledoc;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {Object} resourceModel - one of PostModel, TagModel, UserModel instances
|
|
* @param {String[]} fields - fields to inline
|
|
* @param {String[]} domains - domains to inline media from
|
|
* @returns Promise<Object> - updated fields map with local media paths
|
|
*/
|
|
async #inlineFields(resourceModel, fields, domains) {
|
|
const updatedFields = {};
|
|
|
|
for (const field of fields) {
|
|
for (const domain of domains) {
|
|
const src = resourceModel.get(field);
|
|
|
|
if (src && src.startsWith(domain)) {
|
|
const response = await this.#getRemoteMedia(src);
|
|
|
|
let media;
|
|
if (response) {
|
|
media = this.#extractFileDataFromResponse(src, response);
|
|
}
|
|
|
|
if (media) {
|
|
const filePath = await this.#storeMediaLocally(media);
|
|
|
|
if (filePath) {
|
|
const inlinedSrc = `__GHOST_URL__${filePath}`;
|
|
|
|
updatedFields[field] = inlinedSrc;
|
|
logging.info(`Added media to inline: ${src} -> ${inlinedSrc}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return updatedFields;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {Object[]} resources - array of model instances
|
|
* @param {Object} model - resource model
|
|
* @param {string[]} fields - fields to inline
|
|
* @param {string[]} domains - domains to inline media from
|
|
*/
|
|
async #inlineSimpleFields(resources, model, fields, domains) {
|
|
logging.info(`Starting inlining external media for ${resources?.length} ${model.tableName}`);
|
|
|
|
for (const resource of resources) {
|
|
try {
|
|
const updatedFields = await this.#inlineFields(resource, fields, domains);
|
|
|
|
if (Object.keys(updatedFields).length > 0) {
|
|
await model.edit(updatedFields, {
|
|
id: resource.id,
|
|
context: {
|
|
internal: true
|
|
}
|
|
});
|
|
}
|
|
} catch (err) {
|
|
logging.error(`Error inlining media for ${model.tableName}: ${resource.id}`);
|
|
logging.error(new errors.DataImportError({
|
|
err
|
|
}));
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {string[]} domains domains to inline media from
|
|
*/
|
|
async inline(domains) {
|
|
const {data: posts} = await this.#PostModel.findPage({
|
|
limit: 'all',
|
|
status: 'all'
|
|
});
|
|
const postsInilingFields = [
|
|
'feature_image'
|
|
];
|
|
|
|
logging.info(`Starting inlining external media for posts: ${posts?.length}`);
|
|
|
|
for (const post of posts) {
|
|
try {
|
|
const inlinedMobiledoc = await this.#inlineMibiledoc(post.get('mobiledoc'), domains);
|
|
const updatedFields = await this.#inlineFields(post, postsInilingFields, domains);
|
|
|
|
if (inlinedMobiledoc !== post.get('mobiledoc')) {
|
|
updatedFields.mobiledoc = inlinedMobiledoc;
|
|
}
|
|
|
|
if (Object.keys(updatedFields).length > 0) {
|
|
await this.#PostModel.edit(updatedFields, {
|
|
id: post.id,
|
|
context: {
|
|
internal: true
|
|
}
|
|
});
|
|
}
|
|
} catch (err) {
|
|
logging.error(`Error inlining media for post: ${post.id}`);
|
|
logging.error(new errors.DataImportError({
|
|
err
|
|
}));
|
|
}
|
|
}
|
|
|
|
const {data: postsMetas} = await this.#PostMetaModel.findPage({
|
|
limit: 'all'
|
|
});
|
|
const postsMetaInilingFields = [
|
|
'og_image',
|
|
'twitter_image'
|
|
];
|
|
|
|
await this.#inlineSimpleFields(postsMetas, this.#PostMetaModel, postsMetaInilingFields, domains);
|
|
|
|
const {data: tags} = await this.#TagModel.findPage({
|
|
limit: 'all'
|
|
});
|
|
const tagInliningFields = [
|
|
'feature_image',
|
|
'og_image',
|
|
'twitter_image'
|
|
];
|
|
|
|
await this.#inlineSimpleFields(tags, this.#TagModel, tagInliningFields, domains);
|
|
|
|
const {data: users} = await this.#UserModel.findPage({
|
|
limit: 'all'
|
|
});
|
|
const userInliningFields = [
|
|
'profile_image',
|
|
'cover_image'
|
|
];
|
|
|
|
await this.#inlineSimpleFields(users, this.#UserModel, userInliningFields, domains);
|
|
|
|
logging.info('Finished inlining external media for posts, tags, and users');
|
|
}
|
|
}
|
|
|
|
module.exports = ExternalMediaInliner;
|