Optimized collection repository queries (#18194)

refs https://github.com/TryGhost/Arch/issues/86

- Creating bookshelf models for each collection_post relation created a
massive overhead. On a dataset with 500k collections_posts records the
timing was roughly 7s comparing to 810ms after the optimization.
- Optimized memory and performance of collections fetching by querying post
ids only by default
This commit is contained in:
naz 2023-09-19 16:02:14 +08:00 committed by GitHub
parent bd013ed18c
commit 9cd542dc59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 60 additions and 12 deletions

View File

@ -29,7 +29,11 @@ module.exports = {
}, },
permissions: true, permissions: true,
query(frame) { query(frame) {
return collectionsService.api.getAll(frame.options); return collectionsService.api.getAll({
filter: frame.options.filter,
limit: frame.options.limit,
page: frame.options.page
});
} }
}, },

View File

@ -28,12 +28,14 @@ module.exports = class BookshelfCollectionsRepository {
async getById(id, options = {}) { async getById(id, options = {}) {
const model = await this.#model.findOne({id}, { const model = await this.#model.findOne({id}, {
require: false, require: false,
withRelated: ['collectionPosts'],
transacting: options.transaction transacting: options.transaction
}); });
if (!model) { if (!model) {
return null; return null;
} }
model.collectionPostIds = await this.#fetchCollectionPostIds(model.id, options);
return this.#modelToCollection(model); return this.#modelToCollection(model);
} }
@ -44,28 +46,63 @@ module.exports = class BookshelfCollectionsRepository {
async getBySlug(slug, options = {}) { async getBySlug(slug, options = {}) {
const model = await this.#model.findOne({slug}, { const model = await this.#model.findOne({slug}, {
require: false, require: false,
withRelated: ['collectionPosts'],
transacting: options.transaction transacting: options.transaction
}); });
if (!model) { if (!model) {
return null; return null;
} }
model.collectionPostIds = await this.#fetchCollectionPostIds(model.id, options);
return this.#modelToCollection(model); return this.#modelToCollection(model);
} }
/**
* NOTE: we are only fetching post_id column here to save memory on
* instances with a large amount of posts
*
* The method could be further optimized to fetch posts for
* multiple collections at a time.
*
* @param {string} collectionId collection to fetch post ids for
* @param {Object} options bookshelf options
*
* @returns {Promise<Array<{post_id: string}>>}
*/
async #fetchCollectionPostIds(collectionId, options = {}) {
const toSelect = options.columns || ['post_id'];
const query = this.#relationModel.query();
if (options.transaction) {
query.transacting(options.transaction);
}
return await query
.select(toSelect)
.where('collection_id', collectionId);
}
/** /**
* @param {object} [options] * @param {object} [options]
* @param {string} [options.filter] * @param {string} [options.filter]
* @param {string} [options.order] * @param {string} [options.order]
* @param {string[]} [options.withRelated]
* @param {import('knex').Transaction} [options.transaction] * @param {import('knex').Transaction} [options.transaction]
*/ */
async getAll(options = {}) { async getAll(options = {}) {
const models = await this.#model.findAll({ const models = await this.#model.findAll({
...options, ...options,
transacting: options.transaction, transacting: options.transaction
withRelated: ['collectionPosts']
}); });
for (const model of models) {
// NOTE: Ideally collection posts would be fetching as a part of findAll query.
// Because bookshelf introduced a massive processing and memory overhead
// we are fetching collection post ids separately using raw knex query
model.collectionPostIds = await this.#fetchCollectionPostIds(model.id, options);
}
return (await Promise.all(models.map(model => this.#modelToCollection(model)))).filter(entity => !!entity); return (await Promise.all(models.map(model => this.#modelToCollection(model)))).filter(entity => !!entity);
} }
@ -78,6 +115,10 @@ module.exports = class BookshelfCollectionsRepository {
} }
try { try {
// NOTE: collectionPosts are not a part of serialized model
// and are fetched separately to save memory
const posts = model.collectionPostIds;
return await Collection.create({ return await Collection.create({
id: json.id, id: json.id,
slug: json.slug, slug: json.slug,
@ -86,7 +127,7 @@ module.exports = class BookshelfCollectionsRepository {
filter: filter, filter: filter,
type: json.type, type: json.type,
featureImage: json.feature_image, featureImage: json.feature_image,
posts: json.collectionPosts.map(collectionPost => collectionPost.post_id), posts: posts.map(collectionPost => collectionPost.post_id),
createdAt: json.created_at, createdAt: json.created_at,
updatedAt: json.updated_at updatedAt: json.updated_at
}); });
@ -135,8 +176,7 @@ module.exports = class BookshelfCollectionsRepository {
{id: data.id}, {id: data.id},
{ {
require: false, require: false,
transacting: options.transaction, transacting: options.transaction
withRelated: ['collectionPosts']
} }
); );
@ -175,7 +215,11 @@ module.exports = class BookshelfCollectionsRepository {
if (collection.type === 'manual') { if (collection.type === 'manual') {
await this.#relationModel.query().delete().where('collection_id', collection.id).transacting(options.transaction); await this.#relationModel.query().delete().where('collection_id', collection.id).transacting(options.transaction);
} else { } else {
const existingRelations = existing.toJSON().collectionPosts; const collectionPostsOptions = {
transaction: options.transaction,
columns: ['id', 'post_id']
};
const existingRelations = await this.#fetchCollectionPostIds(collection.id, collectionPostsOptions);
for (const existingRelation of existingRelations) { for (const existingRelation of existingRelations) {
const found = collectionPostsRelations.find((thing) => { const found = collectionPostsRelations.find((thing) => {

View File

@ -117,7 +117,7 @@ describe('Collections API', function () {
}); });
}, this.skip.bind(this)); }, this.skip.bind(this));
const collectionRelatedQueries = queries.filter(query => query.sql.includes('collection')); const collectionRelatedQueries = queries.filter(query => query.sql.includes('collection'));
assert(collectionRelatedQueries.length === 2); assert(collectionRelatedQueries.length === 3);
}); });
it('Can browse Collections and include the posts count', async function () { it('Can browse Collections and include the posts count', async function () {
@ -572,7 +572,7 @@ describe('Collections API', function () {
}, this.skip.bind(this)); }, this.skip.bind(this));
const collectionRelatedQueries = queries.filter(query => query.sql.includes('collection')); const collectionRelatedQueries = queries.filter(query => query.sql.includes('collection'));
assert.equal(collectionRelatedQueries.length, 8); assert.equal(collectionRelatedQueries.length, 13);
} }
await agent await agent
@ -604,7 +604,7 @@ describe('Collections API', function () {
}, this.skip.bind(this)); }, this.skip.bind(this));
const collectionRelatedQueries = queries.filter(query => query.sql.includes('collection')); const collectionRelatedQueries = queries.filter(query => query.sql.includes('collection'));
assert.equal(collectionRelatedQueries.length, 14); assert.equal(collectionRelatedQueries.length, 18);
} }
await agent await agent