Added metric for time to create a database connection (#21696)

ref
https://linear.app/ghost/issue/ENG-1783/add-time-to-create-connection-metric

- Since we've added the "time to acquire" metric to get visibility into
contention in the connection pool, we've seen some anomalies where it
takes a surprisingly long time to acquire a connection (~60ms) when not
under load. Hypothesis is that these anomalies occur when there aren't
any open connections, so Ghost has to establish a new connection with
the DB, and that's the part that's actually taking most of that time.
This new metric should help confirm/deny that hypothesis.
- This will also be an interesting metric to keep an eye on and/or alert
on — if Ghost can't create new connections with its database
performantly, it's not going to perform very well.
This commit is contained in:
Chris Raible 2024-11-22 00:26:04 -08:00 committed by GitHub
parent 2ff82c7ac0
commit d89c7d5f25
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 58 additions and 0 deletions

View File

@ -34,6 +34,7 @@ export class PrometheusClient {
public gateway: client.Pushgateway<client.RegistryContentType> | undefined; // public for testing public gateway: client.Pushgateway<client.RegistryContentType> | undefined; // public for testing
public queries: Map<string, () => void> = new Map(); public queries: Map<string, () => void> = new Map();
public acquires: Map<number, () => void> = new Map(); public acquires: Map<number, () => void> = new Map();
public creates: Map<number, () => void> = new Map();
private config: PrometheusClientConfig; private config: PrometheusClientConfig;
private prefix; private prefix;
@ -339,6 +340,15 @@ export class PrometheusClient {
pruneAgedBuckets: false pruneAgedBuckets: false
}); });
const createDurationSummary = this.registerSummary({
name: `db_connection_create_duration_seconds`,
help: 'Summary of the duration of creating a connection in seconds',
percentiles: [0.5, 0.9, 0.99],
maxAgeSeconds: 60,
ageBuckets: 6,
pruneAgedBuckets: false
});
knexInstance.on('query', (query) => { knexInstance.on('query', (query) => {
// Add the query to the map // Add the query to the map
this.queries.set(query.__knexQueryUid, queryDurationSummary.startTimer()); this.queries.set(query.__knexQueryUid, queryDurationSummary.startTimer());
@ -349,6 +359,20 @@ export class PrometheusClient {
this.queries.delete(query.__knexQueryUid); this.queries.delete(query.__knexQueryUid);
}); });
knexInstance.client.pool.on('createRequest', (eventId: number) => {
this.creates.set(eventId, createDurationSummary.startTimer());
});
knexInstance.client.pool.on('createSuccess', (eventId: number) => {
this.creates.get(eventId)?.();
this.creates.delete(eventId);
});
knexInstance.client.pool.on('createFail', (eventId: number) => {
this.creates.get(eventId)?.();
this.creates.delete(eventId);
});
knexInstance.client.pool.on('acquireRequest', (eventId: number) => { knexInstance.client.pool.on('acquireRequest', (eventId: number) => {
this.acquires.set(eventId, acquireDurationSummary.startTimer()); this.acquires.set(eventId, acquireDurationSummary.startTimer());
}); });

View File

@ -308,6 +308,22 @@ describe('Prometheus Client', function () {
clock.restore(); clock.restore();
} }
function simulateCreate(duration: number) {
const clock = sinon.useFakeTimers();
poolEventEmitter.emit('createRequest');
clock.tick(duration);
poolEventEmitter.emit('createSuccess');
clock.restore();
}
function simulateCreateFail(duration: number) {
const clock = sinon.useFakeTimers();
poolEventEmitter.emit('createRequest');
clock.tick(duration);
poolEventEmitter.emit('createFail');
clock.restore();
}
beforeEach(function () { beforeEach(function () {
knexEventEmitter = new EventEmitter(); knexEventEmitter = new EventEmitter();
poolEventEmitter = new EventEmitter(); poolEventEmitter = new EventEmitter();
@ -439,6 +455,24 @@ describe('Prometheus Client', function () {
const metricValues = await instance.getMetricValues('db_connection_acquire_duration_seconds'); const metricValues = await instance.getMetricValues('db_connection_acquire_duration_seconds');
assert.equal(metricValues?.[0].value, 0.5); assert.equal(metricValues?.[0].value, 0.5);
}); });
it('should collect the db connection create duration metrics when a connection is created', async function () {
instance = new PrometheusClient();
instance.init();
instance.instrumentKnex(knexMock);
simulateCreate(500);
const metricValues = await instance.getMetricValues('db_connection_create_duration_seconds');
assert.equal(metricValues?.[0].value, 0.5);
});
it('should collect the db connection create duration metrics when a connection fails to be created', async function () {
instance = new PrometheusClient();
instance.init();
instance.instrumentKnex(knexMock);
simulateCreateFail(500);
const metricValues = await instance.getMetricValues('db_connection_create_duration_seconds');
assert.equal(metricValues?.[0].value, 0.5);
});
}); });
describe('Custom Metrics', function () { describe('Custom Metrics', function () {