From 60fb2e31390ac131681f680ddbceb28b01b0a553 Mon Sep 17 00:00:00 2001 From: Simon Backx Date: Tue, 12 Dec 2023 12:50:55 +0100 Subject: [PATCH] Added quantities and seed option to the data generator (#19330) ref PROD-243 --- ghost/core/core/cli/generate-data.js | 26 ++++++++- ghost/data-generator/lib/DataGenerator.js | 64 ++++++++++++++++++++--- package.json | 1 + 3 files changed, 82 insertions(+), 9 deletions(-) diff --git a/ghost/core/core/cli/generate-data.js b/ghost/core/core/cli/generate-data.js index d6ec116c55..a62884d7bd 100644 --- a/ghost/core/core/cli/generate-data.js +++ b/ghost/core/core/cli/generate-data.js @@ -1,3 +1,4 @@ +/* eslint-disable ghost/filenames/match-exported-class */ const Command = require('./command'); const DataGenerator = require('@tryghost/data-generator'); const config = require('../shared/config'); @@ -6,11 +7,12 @@ module.exports = class DataGeneratorCommand extends Command { setup() { this.help('Generates random data to populate the database for development & testing'); this.argument('--base-data-pack', {type: 'string', defaultValue: '', desc: 'Base data pack file location, imported instead of random content'}); - this.argument('--quantity', {type: 'number', desc: 'When importing a single table, the quantity to import'}); this.argument('--clear-database', {type: 'boolean', defaultValue: false, desc: 'Clear all entries in the database before importing'}); this.argument('--tables', {type: 'string', desc: 'Only import the specified list of tables, where quantities can be specified by appending a colon followed by the quantity for each table. Example: --tables=members:1000,posts,tags,members_login_events'}); + this.argument('--quantities', {type: 'string', desc: 'Allows you to specify different default quantities for specific tables without affecting the tables that are generated. Example: --quantities=members:1000'}); this.argument('--with-default', {type: 'boolean', defaultValue: false, desc: 'Include default tables as well as those specified (simply override quantities)'}); this.argument('--print-dependencies', {type: 'boolean', defaultValue: false, desc: 'Prints the dependency tree for the data generator and exits'}); + this.argument('--seed', {type: 'number', defaultValue: '', desc: 'Use a seed to reliably generate the same data on multiple runs (timestamps will still change a little bit to remain up to date)'}); } initializeContext(context) { @@ -37,6 +39,24 @@ module.exports = class DataGeneratorCommand extends Command { quantity: parseInt(table.split(':')[1]) || undefined })); + /** + * @type {Record} + */ + const quantities = {}; + + if (argv.quantities) { + for (const quantity of argv.quantities.split(',')) { + const [table, amount] = quantity.split(':'); + + if (amount === undefined || !isFinite(parseInt(amount))) { + this.fatal(`Missing quantity for table ${table}`); + return; + } + + quantities[table] = parseInt(amount); + } + } + const dataGenerator = new DataGenerator({ baseDataPack: argv['base-data-pack'], knex, @@ -53,7 +73,9 @@ module.exports = class DataGeneratorCommand extends Command { clearDatabase: argv['clear-database'], tables, withDefault: argv['with-default'], - printDependencies: argv['print-dependencies'] + printDependencies: argv['print-dependencies'], + quantities, + seed: argv.seed || undefined }); try { await dataGenerator.importData(); diff --git a/ghost/data-generator/lib/DataGenerator.js b/ghost/data-generator/lib/DataGenerator.js index fcf387baea..29d00dcb74 100644 --- a/ghost/data-generator/lib/DataGenerator.js +++ b/ghost/data-generator/lib/DataGenerator.js @@ -3,6 +3,10 @@ const fs = require('fs/promises'); const JsonImporter = require('./utils/JsonImporter'); const {getProcessRoot} = require('@tryghost/root-utils'); const topologicalSort = require('./utils/topological-sort'); +const {faker} = require('@faker-js/faker'); +const {faker: americanFaker} = require('@faker-js/faker/locale/en_US'); +const crypto = require('crypto'); +const {Buffer} = require('node:buffer'); const importers = require('./importers').reduce((acc, val) => { acc[val.table] = val; @@ -11,6 +15,12 @@ const importers = require('./importers').reduce((acc, val) => { const schema = require('../../core/core/server/data/schema').tables; class DataGenerator { + /** + * + * @param {object} options + * @param {Record} [options.quantities] Pass in custom amounts for specific tables + * @param {number} [options.seed] If you pass the same seed, the same data will be generated if you used the same options too and if the data generation code remained the same. + */ constructor({ knex, tables, @@ -19,7 +29,9 @@ class DataGenerator { baseUrl, logger, printDependencies, - withDefault + withDefault, + seed, + quantities = {} }) { this.knex = knex; this.tableList = tables || []; @@ -30,6 +42,8 @@ class DataGenerator { this.logger = logger; this.withDefault = withDefault; this.printDependencies = printDependencies; + this.seed = seed; + this.quantities = quantities; } sortTableList() { @@ -176,13 +190,49 @@ class DataGenerator { await this.importBasePack(transaction); } + // Set quantities for tables for (const table of this.tableList) { - this.logger.info('Importing content for table', table.name); - // Add all common options to every importer, whether they use them or not - const tableImporter = new table.importer(this.knex, transaction, { - baseUrl: this.baseUrl - }); - await tableImporter.import(table.quantity ?? undefined); + if (this.quantities[table.name] !== undefined) { + table.quantity = this.quantities[table.name]; + } + } + + const cryptoRandomBytes = crypto.randomBytes; + + if (this.seed) { + // The probality distributions library uses crypto.randomBytes, which we can't seed, so we need to override it + crypto.randomBytes = (size) => { + const buffer = Buffer.alloc(size); + for (let i = 0; i < size; i++) { + buffer[i] = Math.floor(faker.datatype.number({min: 0, max: 255})); + } + return buffer; + }; + } + + try { + for (const table of this.tableList) { + if (this.seed) { + // We reset the seed for every table, so the chosen tables don't affect the data and changes in one importer don't affect the others + faker.seed(this.seed); + americanFaker.seed(this.seed); + } + + // Add all common options to every importer, whether they use them or not + const tableImporter = new table.importer(this.knex, transaction, { + baseUrl: this.baseUrl + }); + + const amount = table.quantity ?? tableImporter.defaultQuantity; + this.logger.info('Importing content for table', table.name, amount ? `(${amount} records)` : ''); + + await tableImporter.import(table.quantity ?? undefined); + } + } finally { + if (this.seed) { + // Revert crypto.randomBytes to the original function + crypto.randomBytes = cryptoRandomBytes; + } } // Finalise all tables - uses new table importer objects to avoid keeping all data in memory diff --git a/package.json b/package.json index ac23d5290a..1adca8371a 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ "fix": "yarn cache clean && rm -rf node_modules && yarn", "knex-migrator": "yarn workspace ghost run knex-migrator", "setup": "yarn && git submodule update --init && NODE_ENV=development node .github/scripts/setup.js", + "reset:data": "cd ghost/core && node index.js generate-data --clear-database --quantities members:100,posts:1 --seed 123", "docker:reset": "docker-compose -f .github/scripts/docker-compose.yml down -v && docker-compose -f .github/scripts/docker-compose.yml up -d --wait", "lint": "nx run-many -t lint", "test": "nx run-many -t test",