From d89c7d5f2547fdd29ac16d31105fa83156a30d31 Mon Sep 17 00:00:00 2001 From: Chris Raible Date: Fri, 22 Nov 2024 00:26:04 -0800 Subject: [PATCH] Added metric for time to create a database connection (#21696) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ref https://linear.app/ghost/issue/ENG-1783/add-time-to-create-connection-metric - Since we've added the "time to acquire" metric to get visibility into contention in the connection pool, we've seen some anomalies where it takes a surprisingly long time to acquire a connection (~60ms) when not under load. Hypothesis is that these anomalies occur when there aren't any open connections, so Ghost has to establish a new connection with the DB, and that's the part that's actually taking most of that time. This new metric should help confirm/deny that hypothesis. - This will also be an interesting metric to keep an eye on and/or alert on — if Ghost can't create new connections with its database performantly, it's not going to perform very well. --- .../src/PrometheusClient.ts | 24 +++++++++++++ .../test/prometheus-client.test.ts | 34 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/ghost/prometheus-metrics/src/PrometheusClient.ts b/ghost/prometheus-metrics/src/PrometheusClient.ts index f658296f9c..57f83a31fd 100644 --- a/ghost/prometheus-metrics/src/PrometheusClient.ts +++ b/ghost/prometheus-metrics/src/PrometheusClient.ts @@ -34,6 +34,7 @@ export class PrometheusClient { public gateway: client.Pushgateway | undefined; // public for testing public queries: Map void> = new Map(); public acquires: Map void> = new Map(); + public creates: Map void> = new Map(); private config: PrometheusClientConfig; private prefix; @@ -339,6 +340,15 @@ export class PrometheusClient { pruneAgedBuckets: false }); + const createDurationSummary = this.registerSummary({ + name: `db_connection_create_duration_seconds`, + help: 'Summary of the duration of creating a connection in seconds', + percentiles: [0.5, 0.9, 0.99], + maxAgeSeconds: 60, + ageBuckets: 6, + pruneAgedBuckets: false + }); + knexInstance.on('query', (query) => { // Add the query to the map this.queries.set(query.__knexQueryUid, queryDurationSummary.startTimer()); @@ -349,6 +359,20 @@ export class PrometheusClient { this.queries.delete(query.__knexQueryUid); }); + knexInstance.client.pool.on('createRequest', (eventId: number) => { + this.creates.set(eventId, createDurationSummary.startTimer()); + }); + + knexInstance.client.pool.on('createSuccess', (eventId: number) => { + this.creates.get(eventId)?.(); + this.creates.delete(eventId); + }); + + knexInstance.client.pool.on('createFail', (eventId: number) => { + this.creates.get(eventId)?.(); + this.creates.delete(eventId); + }); + knexInstance.client.pool.on('acquireRequest', (eventId: number) => { this.acquires.set(eventId, acquireDurationSummary.startTimer()); }); diff --git a/ghost/prometheus-metrics/test/prometheus-client.test.ts b/ghost/prometheus-metrics/test/prometheus-client.test.ts index 57b244edbc..808a5e639c 100644 --- a/ghost/prometheus-metrics/test/prometheus-client.test.ts +++ b/ghost/prometheus-metrics/test/prometheus-client.test.ts @@ -308,6 +308,22 @@ describe('Prometheus Client', function () { clock.restore(); } + function simulateCreate(duration: number) { + const clock = sinon.useFakeTimers(); + poolEventEmitter.emit('createRequest'); + clock.tick(duration); + poolEventEmitter.emit('createSuccess'); + clock.restore(); + } + + function simulateCreateFail(duration: number) { + const clock = sinon.useFakeTimers(); + poolEventEmitter.emit('createRequest'); + clock.tick(duration); + poolEventEmitter.emit('createFail'); + clock.restore(); + } + beforeEach(function () { knexEventEmitter = new EventEmitter(); poolEventEmitter = new EventEmitter(); @@ -439,6 +455,24 @@ describe('Prometheus Client', function () { const metricValues = await instance.getMetricValues('db_connection_acquire_duration_seconds'); assert.equal(metricValues?.[0].value, 0.5); }); + + it('should collect the db connection create duration metrics when a connection is created', async function () { + instance = new PrometheusClient(); + instance.init(); + instance.instrumentKnex(knexMock); + simulateCreate(500); + const metricValues = await instance.getMetricValues('db_connection_create_duration_seconds'); + assert.equal(metricValues?.[0].value, 0.5); + }); + + it('should collect the db connection create duration metrics when a connection fails to be created', async function () { + instance = new PrometheusClient(); + instance.init(); + instance.instrumentKnex(knexMock); + simulateCreateFail(500); + const metricValues = await instance.getMetricValues('db_connection_create_duration_seconds'); + assert.equal(metricValues?.[0].value, 0.5); + }); }); describe('Custom Metrics', function () {