2023-03-03 14:08:18 +03:00
|
|
|
const mime = require('mime-types');
|
|
|
|
const request = require('@tryghost/request');
|
|
|
|
const errors = require('@tryghost/errors');
|
|
|
|
const logging = require('@tryghost/logging');
|
2023-03-07 13:41:30 +03:00
|
|
|
const path = require('path');
|
2023-03-03 14:08:18 +03:00
|
|
|
|
2023-03-03 11:15:44 +03:00
|
|
|
class ExternalMediaInliner {
|
2023-03-03 14:08:18 +03:00
|
|
|
/** @type {object} */
|
|
|
|
#PostModel;
|
|
|
|
|
2023-03-06 17:50:24 +03:00
|
|
|
/** @type {object} */
|
|
|
|
#PostMetaModel;
|
|
|
|
|
|
|
|
/** @type {object} */
|
|
|
|
#TagModel;
|
|
|
|
|
|
|
|
/** @type {object} */
|
|
|
|
#UserModel;
|
|
|
|
|
2023-03-03 14:08:18 +03:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param {Object} deps
|
|
|
|
* @param {Object} deps.PostModel - Post model
|
2023-03-06 17:50:24 +03:00
|
|
|
* @param {Object} deps.PostMetaModel - PostMeta model
|
|
|
|
* @param {Object} deps.TagModel - Tag model
|
|
|
|
* @param {Object} deps.UserModel - User model
|
2023-03-03 14:08:18 +03:00
|
|
|
* @param {(extension) => import('ghost-storage-base')} deps.getMediaStorage - getMediaStorage
|
|
|
|
*/
|
|
|
|
constructor(deps) {
|
|
|
|
this.#PostModel = deps.PostModel;
|
2023-03-06 17:50:24 +03:00
|
|
|
this.#PostMetaModel = deps.PostMetaModel;
|
|
|
|
this.#TagModel = deps.TagModel;
|
|
|
|
this.#UserModel = deps.UserModel;
|
2023-03-03 14:08:18 +03:00
|
|
|
this.getMediaStorage = deps.getMediaStorage;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param {string} requestURL - url of remote media
|
|
|
|
* @returns {Promise<Object>}
|
|
|
|
*/
|
|
|
|
async #getRemoteMedia(requestURL) {
|
2023-03-08 09:16:28 +03:00
|
|
|
// @NOTE: this is the most expensive operation in the whole inlining process
|
|
|
|
// we should consider caching the results to improve performance
|
2023-03-03 14:08:18 +03:00
|
|
|
try {
|
|
|
|
return await request(requestURL, {
|
|
|
|
followRedirect: true,
|
|
|
|
encoding: null
|
|
|
|
});
|
|
|
|
} catch (error) {
|
|
|
|
// NOTE: add special case for 404s
|
|
|
|
logging.error(`Error downloading remote media: ${requestURL}`);
|
|
|
|
logging.error(new errors.DataImportError({
|
|
|
|
err: error
|
|
|
|
}));
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param {Object} response - response from request
|
|
|
|
* @returns {Object}
|
|
|
|
*/
|
|
|
|
#extractFileDataFromResponse(requestURL, response) {
|
|
|
|
const headers = response.headers;
|
|
|
|
const contentType = headers['content-type'];
|
|
|
|
|
|
|
|
const filename = requestURL
|
|
|
|
.split('/')
|
|
|
|
.pop()
|
|
|
|
.split('#')[0]
|
|
|
|
.split('?')[0];
|
|
|
|
|
|
|
|
const extension = mime.extension(contentType) || filename.split('.').pop();
|
|
|
|
|
|
|
|
return {
|
|
|
|
fileBuffer: response.body,
|
|
|
|
filename: filename,
|
|
|
|
extension: `.${extension}`
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-03-06 17:50:24 +03:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param {Object} media - media to store locally
|
|
|
|
* @returns {Promise<string>} - path to stored media
|
|
|
|
*/
|
|
|
|
async #storeMediaLocally(media) {
|
|
|
|
const storage = this.getMediaStorage(media.extension);
|
|
|
|
|
|
|
|
if (!storage) {
|
|
|
|
logging.warn(`No storage adapter found for file extension: ${media.extension}`);
|
|
|
|
return null;
|
|
|
|
} else {
|
2023-03-07 13:41:30 +03:00
|
|
|
// @NOTE: this is extremely convoluted and should live on a
|
|
|
|
// storage adapter level
|
2023-03-06 17:50:24 +03:00
|
|
|
const targetDir = storage.getTargetDir(storage.storagePath);
|
|
|
|
const uniqueFileName = await storage.getUniqueFileName({
|
|
|
|
name: media.filename
|
|
|
|
}, targetDir);
|
2023-03-07 13:41:30 +03:00
|
|
|
const targetPath = path.relative(storage.storagePath, uniqueFileName);
|
|
|
|
const filePath = await storage.saveRaw(media.fileBuffer, targetPath);
|
2023-03-06 17:50:24 +03:00
|
|
|
return filePath;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async #inlineMibiledoc(mobiledoc, domains) {
|
2023-03-03 14:08:18 +03:00
|
|
|
for (const domain of domains) {
|
2023-03-08 10:22:51 +03:00
|
|
|
// NOTE: the src could end with a quote, apostrophe or double-backslash. backlashes are added to mobiledoc
|
|
|
|
// as an escape character
|
|
|
|
const srcTerminationSymbols = `"|'|\\\\`;
|
|
|
|
const regex = new RegExp(`(${domain}.*?)(${srcTerminationSymbols})`, 'igm');
|
2023-03-03 14:08:18 +03:00
|
|
|
const matches = mobiledoc.matchAll(regex);
|
|
|
|
|
|
|
|
for (const [,src] of matches) {
|
|
|
|
const response = await this.#getRemoteMedia(src);
|
|
|
|
|
|
|
|
let media;
|
|
|
|
if (response) {
|
|
|
|
media = this.#extractFileDataFromResponse(src, response);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (media) {
|
2023-03-06 17:50:24 +03:00
|
|
|
const filePath = await this.#storeMediaLocally(media);
|
|
|
|
|
|
|
|
if (filePath) {
|
2023-03-03 14:08:18 +03:00
|
|
|
const inlinedSrc = `__GHOST_URL__${filePath}`;
|
|
|
|
|
|
|
|
// NOTE: does not account for duplicate images in mobiledoc
|
|
|
|
// in those cases would be processed twice
|
|
|
|
mobiledoc = mobiledoc.replace(src, inlinedSrc);
|
2023-03-06 17:50:24 +03:00
|
|
|
logging.info(`Inlined media: ${src} -> ${inlinedSrc}`);
|
2023-03-03 14:08:18 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return mobiledoc;
|
|
|
|
}
|
|
|
|
|
2023-03-06 17:50:24 +03:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param {Object} resourceModel - one of PostModel, TagModel, UserModel instances
|
|
|
|
* @param {String[]} fields - fields to inline
|
|
|
|
* @param {String[]} domains - domains to inline media from
|
|
|
|
* @returns Promise<Object> - updated fields map with local media paths
|
|
|
|
*/
|
|
|
|
async #inlineFields(resourceModel, fields, domains) {
|
|
|
|
const updatedFields = {};
|
|
|
|
|
|
|
|
for (const field of fields) {
|
|
|
|
for (const domain of domains) {
|
|
|
|
const src = resourceModel.get(field);
|
|
|
|
|
|
|
|
if (src && src.startsWith(domain)) {
|
|
|
|
const response = await this.#getRemoteMedia(src);
|
|
|
|
|
|
|
|
let media;
|
|
|
|
if (response) {
|
|
|
|
media = this.#extractFileDataFromResponse(src, response);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (media) {
|
|
|
|
const filePath = await this.#storeMediaLocally(media);
|
|
|
|
|
|
|
|
if (filePath) {
|
|
|
|
const inlinedSrc = `__GHOST_URL__${filePath}`;
|
|
|
|
|
|
|
|
updatedFields[field] = inlinedSrc;
|
|
|
|
logging.info(`Added media to inline: ${src} -> ${inlinedSrc}`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return updatedFields;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param {Object[]} resources - array of model instances
|
|
|
|
* @param {Object} model - resource model
|
|
|
|
* @param {string[]} fields - fields to inline
|
|
|
|
* @param {string[]} domains - domains to inline media from
|
|
|
|
*/
|
|
|
|
async #inlineSimpleFields(resources, model, fields, domains) {
|
|
|
|
logging.info(`Starting inlining external media for ${resources?.length} ${model.tableName}`);
|
|
|
|
|
|
|
|
for (const resource of resources) {
|
|
|
|
try {
|
|
|
|
const updatedFields = await this.#inlineFields(resource, fields, domains);
|
|
|
|
|
|
|
|
if (Object.keys(updatedFields).length > 0) {
|
|
|
|
await model.edit(updatedFields, {
|
|
|
|
id: resource.id,
|
|
|
|
context: {
|
|
|
|
internal: true
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
} catch (err) {
|
|
|
|
logging.error(`Error inlining media for ${model.tableName}: ${resource.id}`);
|
|
|
|
logging.error(new errors.DataImportError({
|
|
|
|
err
|
|
|
|
}));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-03 14:08:18 +03:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param {string[]} domains domains to inline media from
|
|
|
|
*/
|
|
|
|
async inline(domains) {
|
|
|
|
const {data: posts} = await this.#PostModel.findPage({
|
|
|
|
limit: 'all',
|
|
|
|
status: 'all'
|
|
|
|
});
|
2023-03-06 17:50:24 +03:00
|
|
|
const postsInilingFields = [
|
|
|
|
'feature_image'
|
|
|
|
];
|
|
|
|
|
|
|
|
logging.info(`Starting inlining external media for posts: ${posts?.length}`);
|
2023-03-03 14:08:18 +03:00
|
|
|
|
|
|
|
for (const post of posts) {
|
|
|
|
try {
|
2023-03-06 17:50:24 +03:00
|
|
|
const inlinedMobiledoc = await this.#inlineMibiledoc(post.get('mobiledoc'), domains);
|
|
|
|
const updatedFields = await this.#inlineFields(post, postsInilingFields, domains);
|
2023-03-03 14:08:18 +03:00
|
|
|
|
|
|
|
if (inlinedMobiledoc !== post.get('mobiledoc')) {
|
2023-03-06 17:50:24 +03:00
|
|
|
updatedFields.mobiledoc = inlinedMobiledoc;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Object.keys(updatedFields).length > 0) {
|
|
|
|
await this.#PostModel.edit(updatedFields, {
|
|
|
|
id: post.id,
|
|
|
|
context: {
|
|
|
|
internal: true
|
|
|
|
}
|
2023-03-03 14:08:18 +03:00
|
|
|
});
|
|
|
|
}
|
|
|
|
} catch (err) {
|
|
|
|
logging.error(`Error inlining media for post: ${post.id}`);
|
|
|
|
logging.error(new errors.DataImportError({
|
|
|
|
err
|
|
|
|
}));
|
|
|
|
}
|
|
|
|
}
|
2023-03-03 11:15:44 +03:00
|
|
|
|
2023-03-06 17:50:24 +03:00
|
|
|
const {data: postsMetas} = await this.#PostMetaModel.findPage({
|
|
|
|
limit: 'all'
|
|
|
|
});
|
|
|
|
const postsMetaInilingFields = [
|
|
|
|
'og_image',
|
|
|
|
'twitter_image'
|
|
|
|
];
|
|
|
|
|
|
|
|
await this.#inlineSimpleFields(postsMetas, this.#PostMetaModel, postsMetaInilingFields, domains);
|
|
|
|
|
|
|
|
const {data: tags} = await this.#TagModel.findPage({
|
|
|
|
limit: 'all'
|
|
|
|
});
|
|
|
|
const tagInliningFields = [
|
|
|
|
'feature_image',
|
|
|
|
'og_image',
|
|
|
|
'twitter_image'
|
|
|
|
];
|
|
|
|
|
|
|
|
await this.#inlineSimpleFields(tags, this.#TagModel, tagInliningFields, domains);
|
|
|
|
|
|
|
|
const {data: users} = await this.#UserModel.findPage({
|
|
|
|
limit: 'all'
|
|
|
|
});
|
|
|
|
const userInliningFields = [
|
|
|
|
'profile_image',
|
|
|
|
'cover_image'
|
|
|
|
];
|
|
|
|
|
|
|
|
await this.#inlineSimpleFields(users, this.#UserModel, userInliningFields, domains);
|
|
|
|
|
|
|
|
logging.info('Finished inlining external media for posts, tags, and users');
|
2023-03-03 14:08:18 +03:00
|
|
|
}
|
2023-03-03 11:15:44 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = ExternalMediaInliner;
|