Added quantities and seed option to the data generator (#19330)

ref PROD-243
This commit is contained in:
Simon Backx 2023-12-12 12:50:55 +01:00 committed by GitHub
parent 2b85980302
commit 60fb2e3139
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 82 additions and 9 deletions

View File

@ -1,3 +1,4 @@
/* eslint-disable ghost/filenames/match-exported-class */
const Command = require('./command');
const DataGenerator = require('@tryghost/data-generator');
const config = require('../shared/config');
@ -6,11 +7,12 @@ module.exports = class DataGeneratorCommand extends Command {
setup() {
this.help('Generates random data to populate the database for development & testing');
this.argument('--base-data-pack', {type: 'string', defaultValue: '', desc: 'Base data pack file location, imported instead of random content'});
this.argument('--quantity', {type: 'number', desc: 'When importing a single table, the quantity to import'});
this.argument('--clear-database', {type: 'boolean', defaultValue: false, desc: 'Clear all entries in the database before importing'});
this.argument('--tables', {type: 'string', desc: 'Only import the specified list of tables, where quantities can be specified by appending a colon followed by the quantity for each table. Example: --tables=members:1000,posts,tags,members_login_events'});
this.argument('--quantities', {type: 'string', desc: 'Allows you to specify different default quantities for specific tables without affecting the tables that are generated. Example: --quantities=members:1000'});
this.argument('--with-default', {type: 'boolean', defaultValue: false, desc: 'Include default tables as well as those specified (simply override quantities)'});
this.argument('--print-dependencies', {type: 'boolean', defaultValue: false, desc: 'Prints the dependency tree for the data generator and exits'});
this.argument('--seed', {type: 'number', defaultValue: '', desc: 'Use a seed to reliably generate the same data on multiple runs (timestamps will still change a little bit to remain up to date)'});
}
initializeContext(context) {
@ -37,6 +39,24 @@ module.exports = class DataGeneratorCommand extends Command {
quantity: parseInt(table.split(':')[1]) || undefined
}));
/**
* @type {Record<string, number>}
*/
const quantities = {};
if (argv.quantities) {
for (const quantity of argv.quantities.split(',')) {
const [table, amount] = quantity.split(':');
if (amount === undefined || !isFinite(parseInt(amount))) {
this.fatal(`Missing quantity for table ${table}`);
return;
}
quantities[table] = parseInt(amount);
}
}
const dataGenerator = new DataGenerator({
baseDataPack: argv['base-data-pack'],
knex,
@ -53,7 +73,9 @@ module.exports = class DataGeneratorCommand extends Command {
clearDatabase: argv['clear-database'],
tables,
withDefault: argv['with-default'],
printDependencies: argv['print-dependencies']
printDependencies: argv['print-dependencies'],
quantities,
seed: argv.seed || undefined
});
try {
await dataGenerator.importData();

View File

@ -3,6 +3,10 @@ const fs = require('fs/promises');
const JsonImporter = require('./utils/JsonImporter');
const {getProcessRoot} = require('@tryghost/root-utils');
const topologicalSort = require('./utils/topological-sort');
const {faker} = require('@faker-js/faker');
const {faker: americanFaker} = require('@faker-js/faker/locale/en_US');
const crypto = require('crypto');
const {Buffer} = require('node:buffer');
const importers = require('./importers').reduce((acc, val) => {
acc[val.table] = val;
@ -11,6 +15,12 @@ const importers = require('./importers').reduce((acc, val) => {
const schema = require('../../core/core/server/data/schema').tables;
class DataGenerator {
/**
*
* @param {object} options
* @param {Record<string,number>} [options.quantities] Pass in custom amounts for specific tables
* @param {number} [options.seed] If you pass the same seed, the same data will be generated if you used the same options too and if the data generation code remained the same.
*/
constructor({
knex,
tables,
@ -19,7 +29,9 @@ class DataGenerator {
baseUrl,
logger,
printDependencies,
withDefault
withDefault,
seed,
quantities = {}
}) {
this.knex = knex;
this.tableList = tables || [];
@ -30,6 +42,8 @@ class DataGenerator {
this.logger = logger;
this.withDefault = withDefault;
this.printDependencies = printDependencies;
this.seed = seed;
this.quantities = quantities;
}
sortTableList() {
@ -176,13 +190,49 @@ class DataGenerator {
await this.importBasePack(transaction);
}
// Set quantities for tables
for (const table of this.tableList) {
this.logger.info('Importing content for table', table.name);
// Add all common options to every importer, whether they use them or not
const tableImporter = new table.importer(this.knex, transaction, {
baseUrl: this.baseUrl
});
await tableImporter.import(table.quantity ?? undefined);
if (this.quantities[table.name] !== undefined) {
table.quantity = this.quantities[table.name];
}
}
const cryptoRandomBytes = crypto.randomBytes;
if (this.seed) {
// The probality distributions library uses crypto.randomBytes, which we can't seed, so we need to override it
crypto.randomBytes = (size) => {
const buffer = Buffer.alloc(size);
for (let i = 0; i < size; i++) {
buffer[i] = Math.floor(faker.datatype.number({min: 0, max: 255}));
}
return buffer;
};
}
try {
for (const table of this.tableList) {
if (this.seed) {
// We reset the seed for every table, so the chosen tables don't affect the data and changes in one importer don't affect the others
faker.seed(this.seed);
americanFaker.seed(this.seed);
}
// Add all common options to every importer, whether they use them or not
const tableImporter = new table.importer(this.knex, transaction, {
baseUrl: this.baseUrl
});
const amount = table.quantity ?? tableImporter.defaultQuantity;
this.logger.info('Importing content for table', table.name, amount ? `(${amount} records)` : '');
await tableImporter.import(table.quantity ?? undefined);
}
} finally {
if (this.seed) {
// Revert crypto.randomBytes to the original function
crypto.randomBytes = cryptoRandomBytes;
}
}
// Finalise all tables - uses new table importer objects to avoid keeping all data in memory

View File

@ -30,6 +30,7 @@
"fix": "yarn cache clean && rm -rf node_modules && yarn",
"knex-migrator": "yarn workspace ghost run knex-migrator",
"setup": "yarn && git submodule update --init && NODE_ENV=development node .github/scripts/setup.js",
"reset:data": "cd ghost/core && node index.js generate-data --clear-database --quantities members:100,posts:1 --seed 123",
"docker:reset": "docker-compose -f .github/scripts/docker-compose.yml down -v && docker-compose -f .github/scripts/docker-compose.yml up -d --wait",
"lint": "nx run-many -t lint",
"test": "nx run-many -t test",