diff --git a/.github/helm/affine/charts/graphql/templates/monitoring.yaml b/.github/helm/affine/charts/graphql/templates/monitoring.yaml deleted file mode 100644 index 5b7da3aa7c..0000000000 --- a/.github/helm/affine/charts/graphql/templates/monitoring.yaml +++ /dev/null @@ -1,13 +0,0 @@ -{{- if .Values.global.gke.enabled -}} -apiVersion: monitoring.googleapis.com/v1 -kind: PodMonitoring -metadata: - name: "{{ .Chart.Name }}-monitoring" -spec: - selector: - matchLabels: - app.kubernetes.io/name: "{{ include "graphql.name" . }}" - endpoints: - - port: {{ .Values.service.port }} - interval: 30s -{{- end }} diff --git a/.github/helm/affine/charts/sync/templates/monitoring.yaml b/.github/helm/affine/charts/sync/templates/monitoring.yaml deleted file mode 100644 index b7a03e6401..0000000000 --- a/.github/helm/affine/charts/sync/templates/monitoring.yaml +++ /dev/null @@ -1,13 +0,0 @@ -{{- if .Values.global.gke.enabled -}} -apiVersion: monitoring.googleapis.com/v1 -kind: PodMonitoring -metadata: - name: "{{ .Chart.Name }}-monitoring" -spec: - selector: - matchLabels: - app.kubernetes.io/name: "{{ include "sync.name" . }}" - endpoints: - - port: {{ .Values.service.port }} - interval: 30s -{{- end }} diff --git a/packages/backend/server/package.json b/packages/backend/server/package.json index c85508e259..d893852232 100644 --- a/packages/backend/server/package.json +++ b/packages/backend/server/package.json @@ -39,6 +39,9 @@ "@node-rs/jsonwebtoken": "^0.2.3", "@opentelemetry/api": "^1.7.0", "@opentelemetry/core": "^1.18.1", + "@opentelemetry/exporter-prometheus": "^0.45.1", + "@opentelemetry/exporter-zipkin": "^1.18.1", + "@opentelemetry/host-metrics": "^0.33.2", "@opentelemetry/instrumentation": "^0.45.1", "@opentelemetry/instrumentation-graphql": "^0.36.0", "@opentelemetry/instrumentation-http": "^0.45.1", diff --git a/packages/backend/server/src/app.ts b/packages/backend/server/src/app.ts index 8441983272..cab18af060 100644 --- a/packages/backend/server/src/app.ts +++ b/packages/backend/server/src/app.ts @@ -3,7 +3,6 @@ import { Module } from '@nestjs/common'; import { AppController } from './app.controller'; import { CacheModule } from './cache'; import { ConfigModule } from './config'; -import { MetricsModule } from './metrics'; import { BusinessModules } from './modules'; import { AuthModule } from './modules/auth'; import { PrismaModule } from './prisma'; @@ -16,7 +15,6 @@ const BasicModules = [ ConfigModule.forRoot(), CacheModule, StorageModule.forRoot(), - MetricsModule, SessionModule, RateLimiterModule, AuthModule, diff --git a/packages/backend/server/src/graphql.module.ts b/packages/backend/server/src/graphql.module.ts index 04bc1034dd..e9c86ffa35 100644 --- a/packages/backend/server/src/graphql.module.ts +++ b/packages/backend/server/src/graphql.module.ts @@ -8,14 +8,13 @@ import { fileURLToPath } from 'url'; import { Config } from './config'; import { GQLLoggerPlugin } from './graphql/logger-plugin'; -import { Metrics } from './metrics/metrics'; @Global() @Module({ imports: [ GraphQLModule.forRootAsync({ driver: ApolloDriver, - useFactory: (config: Config, metrics: Metrics) => { + useFactory: (config: Config) => { return { ...config.graphql, path: `${config.path}/graphql`, @@ -31,10 +30,10 @@ import { Metrics } from './metrics/metrics'; req, res, }), - plugins: [new GQLLoggerPlugin(metrics)], + plugins: [new GQLLoggerPlugin()], }; }, - inject: [Config, Metrics], + inject: [Config], }), ], }) diff --git a/packages/backend/server/src/graphql/logger-plugin.ts b/packages/backend/server/src/graphql/logger-plugin.ts index ee7c6670a3..67c399d667 100644 --- a/packages/backend/server/src/graphql/logger-plugin.ts +++ b/packages/backend/server/src/graphql/logger-plugin.ts @@ -7,40 +7,39 @@ import { Plugin } from '@nestjs/apollo'; import { Logger } from '@nestjs/common'; import { Response } from 'express'; -import { Metrics } from '../metrics/metrics'; +import { metrics } from '../metrics/metrics'; import { ReqContext } from '../types'; @Plugin() export class GQLLoggerPlugin implements ApolloServerPlugin { protected logger = new Logger(GQLLoggerPlugin.name); - constructor(private readonly metrics: Metrics) {} - requestDidStart( reqContext: GraphQLRequestContext ): Promise>> { const res = reqContext.contextValue.req.res as Response; const operation = reqContext.request.operationName; - this.metrics.gqlRequest(1, { operation }); - const timer = this.metrics.gqlTimer({ operation }); + metrics().gqlRequest.add(1, { operation }); + const start = Date.now(); return Promise.resolve({ willSendResponse: () => { - const costInMilliseconds = timer() * 1000; + const costInMilliseconds = Date.now() - start; res.setHeader( 'Server-Timing', `gql;dur=${costInMilliseconds};desc="GraphQL"` ); + metrics().gqlTimer.record(costInMilliseconds, { operation }); return Promise.resolve(); }, didEncounterErrors: () => { - this.metrics.gqlError(1, { operation }); - const costInMilliseconds = timer() * 1000; + const costInMilliseconds = Date.now() - start; res.setHeader( 'Server-Timing', `gql;dur=${costInMilliseconds};desc="GraphQL ${operation}"` ); + metrics().gqlTimer.record(costInMilliseconds, { operation }); return Promise.resolve(); }, }); diff --git a/packages/backend/server/src/index.ts b/packages/backend/server/src/index.ts index a124df389a..1f42851a94 100644 --- a/packages/backend/server/src/index.ts +++ b/packages/backend/server/src/index.ts @@ -1,22 +1,9 @@ /// -import { MetricExporter } from '@google-cloud/opentelemetry-cloud-monitoring-exporter'; -import { TraceExporter } from '@google-cloud/opentelemetry-cloud-trace-exporter'; +import { start as startAutoMetrics } from './metrics'; +startAutoMetrics(); + import { NestFactory } from '@nestjs/core'; import type { NestExpressApplication } from '@nestjs/platform-express'; -import { - CompositePropagator, - W3CBaggagePropagator, - W3CTraceContextPropagator, -} from '@opentelemetry/core'; -import gql from '@opentelemetry/instrumentation-graphql'; -import { HttpInstrumentation } from '@opentelemetry/instrumentation-http'; -import ioredis from '@opentelemetry/instrumentation-ioredis'; -import { NestInstrumentation } from '@opentelemetry/instrumentation-nestjs-core'; -import socketIO from '@opentelemetry/instrumentation-socket.io'; -import { PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics'; -import { NodeSDK } from '@opentelemetry/sdk-node'; -import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-node'; -import { PrismaInstrumentation } from '@prisma/instrumentation'; import cookieParser from 'cookie-parser'; import { static as staticMiddleware } from 'express'; import graphqlUploadExpress from 'graphql-upload/graphqlUploadExpress.mjs'; @@ -28,35 +15,6 @@ import { serverTimingAndCache } from './middleware/timing'; import { RedisIoAdapter } from './modules/sync/redis-adapter'; const { NODE_ENV, AFFINE_ENV } = process.env; - -if (NODE_ENV === 'production') { - const traceExporter = new TraceExporter(); - const tracing = new NodeSDK({ - traceExporter, - metricReader: new PeriodicExportingMetricReader({ - exporter: new MetricExporter(), - }), - spanProcessor: new BatchSpanProcessor(traceExporter), - textMapPropagator: new CompositePropagator({ - propagators: [ - new W3CBaggagePropagator(), - new W3CTraceContextPropagator(), - ], - }), - instrumentations: [ - new NestInstrumentation(), - new ioredis.IORedisInstrumentation(), - new socketIO.SocketIoInstrumentation({ traceReserved: true }), - new gql.GraphQLInstrumentation({ mergeItems: true }), - new HttpInstrumentation(), - new PrismaInstrumentation(), - ], - serviceName: 'affine-cloud', - }); - - tracing.start(); -} - const app = await NestFactory.create(AppModule, { cors: true, rawBody: true, diff --git a/packages/backend/server/src/metrics/controller.ts b/packages/backend/server/src/metrics/controller.ts deleted file mode 100644 index c130cf9005..0000000000 --- a/packages/backend/server/src/metrics/controller.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { Controller, Get, Res } from '@nestjs/common'; -import type { Response } from 'express'; -import { register } from 'prom-client'; - -import { PrismaService } from '../prisma'; - -@Controller() -export class MetricsController { - constructor(private readonly prisma: PrismaService) {} - - @Get('/metrics') - async index(@Res() res: Response): Promise { - res.header('Content-Type', register.contentType); - const prismaMetrics = await this.prisma.$metrics.prometheus(); - const appMetrics = await register.metrics(); - res.send(appMetrics + prismaMetrics); - } -} diff --git a/packages/backend/server/src/metrics/index.ts b/packages/backend/server/src/metrics/index.ts index a4f375e25d..655c717c3e 100644 --- a/packages/backend/server/src/metrics/index.ts +++ b/packages/backend/server/src/metrics/index.ts @@ -1,13 +1,3 @@ -import { Global, Module } from '@nestjs/common'; - -import { MetricsController } from '../metrics/controller'; -import { Metrics } from './metrics'; - -@Global() -@Module({ - providers: [Metrics], - exports: [Metrics], - controllers: [MetricsController], -}) -export class MetricsModule {} -export { Metrics }; +export * from './metrics'; +export { start } from './opentelemetry'; +export * from './utils'; diff --git a/packages/backend/server/src/metrics/metrics.ts b/packages/backend/server/src/metrics/metrics.ts index 5df022c80e..b91223cf97 100644 --- a/packages/backend/server/src/metrics/metrics.ts +++ b/packages/backend/server/src/metrics/metrics.ts @@ -1,31 +1,76 @@ -import { Injectable, OnModuleDestroy } from '@nestjs/common'; -import { register } from 'prom-client'; +import opentelemetry, { Attributes, Observable } from '@opentelemetry/api'; -import { metricsCreator } from './utils'; +interface AsyncMetric { + ob: Observable; + get value(): any; + get attrs(): Attributes | undefined; +} -@Injectable() -export class Metrics implements OnModuleDestroy { - onModuleDestroy(): void { - register.clear(); +let _metrics: ReturnType | undefined = undefined; + +export function getMeter(name = 'business') { + return opentelemetry.metrics.getMeter(name); +} + +function createBusinessMetrics() { + const meter = getMeter(); + const asyncMetrics: AsyncMetric[] = []; + + function createGauge(name: string) { + let value: any; + let attrs: Attributes | undefined; + const ob = meter.createObservableGauge(name); + asyncMetrics.push({ + ob, + get value() { + return value; + }, + get attrs() { + return attrs; + }, + }); + + return (newValue: any, newAttrs?: Attributes) => { + value = newValue; + attrs = newAttrs; + }; } - socketIOEventCounter = metricsCreator.counter('socket_io_counter', ['event']); - socketIOEventTimer = metricsCreator.timer('socket_io_timer', ['event']); - socketIOConnectionGauge = metricsCreator.gauge( - 'socket_io_connection_counter' + const metrics = { + socketIOConnectionGauge: createGauge('socket_io_connection'), + + gqlRequest: meter.createCounter('gql_request'), + gqlError: meter.createCounter('gql_error'), + gqlTimer: meter.createHistogram('gql_timer'), + + jwstCodecMerge: meter.createCounter('jwst_codec_merge'), + jwstCodecDidnotMatch: meter.createCounter('jwst_codec_didnot_match'), + jwstCodecFail: meter.createCounter('jwst_codec_fail'), + + authCounter: meter.createCounter('auth'), + authFailCounter: meter.createCounter('auth_fail'), + + docHistoryCounter: meter.createCounter('doc_history_created'), + docRecoverCounter: meter.createCounter('doc_history_recovered'), + }; + + meter.addBatchObservableCallback( + result => { + asyncMetrics.forEach(metric => { + result.observe(metric.ob, metric.value, metric.attrs); + }); + }, + asyncMetrics.map(({ ob }) => ob) ); - gqlRequest = metricsCreator.counter('gql_request', ['operation']); - gqlError = metricsCreator.counter('gql_error', ['operation']); - gqlTimer = metricsCreator.timer('gql_timer', ['operation']); - - jwstCodecMerge = metricsCreator.counter('jwst_codec_merge'); - jwstCodecDidnotMatch = metricsCreator.counter('jwst_codec_didnot_match'); - jwstCodecFail = metricsCreator.counter('jwst_codec_fail'); - - authCounter = metricsCreator.counter('auth'); - authFailCounter = metricsCreator.counter('auth_fail', ['reason']); - - docHistoryCounter = metricsCreator.counter('doc_history_created'); - docRecoverCounter = metricsCreator.counter('doc_history_recovered'); + return metrics; } + +export function registerBusinessMetrics() { + if (!_metrics) { + _metrics = createBusinessMetrics(); + } + + return _metrics; +} +export const metrics = registerBusinessMetrics; diff --git a/packages/backend/server/src/metrics/opentelemetry.ts b/packages/backend/server/src/metrics/opentelemetry.ts new file mode 100644 index 0000000000..77c6d55b3c --- /dev/null +++ b/packages/backend/server/src/metrics/opentelemetry.ts @@ -0,0 +1,127 @@ +import { MetricExporter } from '@google-cloud/opentelemetry-cloud-monitoring-exporter'; +import { TraceExporter } from '@google-cloud/opentelemetry-cloud-trace-exporter'; +import { + CompositePropagator, + W3CBaggagePropagator, + W3CTraceContextPropagator, +} from '@opentelemetry/core'; +import { PrometheusExporter } from '@opentelemetry/exporter-prometheus'; +import { ZipkinExporter } from '@opentelemetry/exporter-zipkin'; +import { HostMetrics } from '@opentelemetry/host-metrics'; +import { Instrumentation } from '@opentelemetry/instrumentation'; +import { GraphQLInstrumentation } from '@opentelemetry/instrumentation-graphql'; +import { HttpInstrumentation } from '@opentelemetry/instrumentation-http'; +import { IORedisInstrumentation } from '@opentelemetry/instrumentation-ioredis'; +import { NestInstrumentation } from '@opentelemetry/instrumentation-nestjs-core'; +import { SocketIoInstrumentation } from '@opentelemetry/instrumentation-socket.io'; +import { + ConsoleMetricExporter, + MetricReader, + PeriodicExportingMetricReader, +} from '@opentelemetry/sdk-metrics'; +import { NodeSDK } from '@opentelemetry/sdk-node'; +import { + BatchSpanProcessor, + ConsoleSpanExporter, + SpanExporter, +} from '@opentelemetry/sdk-trace-node'; +import { PrismaInstrumentation } from '@prisma/instrumentation'; + +import { registerBusinessMetrics } from './metrics'; + +abstract class OpentelemetryFactor { + abstract getMetricReader(): MetricReader; + abstract getSpanExporter(): SpanExporter; + + getInstractions(): Instrumentation[] { + return [ + new NestInstrumentation(), + new IORedisInstrumentation(), + new SocketIoInstrumentation({ traceReserved: true }), + new GraphQLInstrumentation({ mergeItems: true }), + new HttpInstrumentation(), + new PrismaInstrumentation(), + ]; + } + + create() { + const traceExporter = this.getSpanExporter(); + return new NodeSDK({ + traceExporter, + metricReader: this.getMetricReader(), + spanProcessor: new BatchSpanProcessor(traceExporter), + textMapPropagator: new CompositePropagator({ + propagators: [ + new W3CBaggagePropagator(), + new W3CTraceContextPropagator(), + ], + }), + instrumentations: this.getInstractions(), + serviceName: 'affine-cloud', + }); + } +} + +class GCloudOpentelemetryFactor extends OpentelemetryFactor { + override getMetricReader(): MetricReader { + return new PeriodicExportingMetricReader({ + exportIntervalMillis: 30000, + exportTimeoutMillis: 60000, + exporter: new MetricExporter(), + }); + } + + override getSpanExporter(): SpanExporter { + return new TraceExporter(); + } +} + +class LocalOpentelemetryFactor extends OpentelemetryFactor { + override getMetricReader(): MetricReader { + return new PrometheusExporter(); + } + + override getSpanExporter(): SpanExporter { + return new ZipkinExporter(); + } +} + +class DebugOpentelemetryFactor extends OpentelemetryFactor { + override getMetricReader(): MetricReader { + return new PeriodicExportingMetricReader({ + exporter: new ConsoleMetricExporter(), + }); + } + + override getSpanExporter(): SpanExporter { + return new ConsoleSpanExporter(); + } +} + +function createSDK() { + let factor: OpentelemetryFactor | null = null; + if (process.env.NODE_ENV === 'production') { + factor = new GCloudOpentelemetryFactor(); + } else if (process.env.DEBUG_METRICS) { + factor = new DebugOpentelemetryFactor(); + } else { + factor = new LocalOpentelemetryFactor(); + } + + return factor?.create(); +} + +function registerCustomMetrics() { + const host = new HostMetrics({ name: 'instance-host-metrics' }); + host.start(); +} + +export function start() { + const sdk = createSDK(); + + if (sdk) { + sdk.start(); + registerCustomMetrics(); + registerBusinessMetrics(); + } +} diff --git a/packages/backend/server/src/metrics/utils.ts b/packages/backend/server/src/metrics/utils.ts index a4ee1ab86a..1c4fa313ff 100644 --- a/packages/backend/server/src/metrics/utils.ts +++ b/packages/backend/server/src/metrics/utils.ts @@ -1,99 +1,11 @@ -import { Counter, Gauge, register, Summary } from 'prom-client'; +import { Attributes } from '@opentelemetry/api'; -function getOr(name: string, or: () => T): T { - return (register.getSingleMetric(name) as T) || or(); -} - -type LabelValues = Partial>; -type MetricsCreator = ( - value: number, - labels: LabelValues -) => void; -type TimerMetricsCreator = ( - labels: LabelValues -) => () => number; - -export const metricsCreatorGenerator = () => { - const counterCreator = ( - name: string, - labelNames?: T[] - ): MetricsCreator => { - const counter = getOr( - name, - () => - new Counter({ - name, - help: name, - ...(labelNames ? { labelNames } : {}), - }) - ); - - return (value: number, labels: LabelValues) => { - counter.inc(labels, value); - }; - }; - - const gaugeCreator = ( - name: string, - labelNames?: T[] - ): MetricsCreator => { - const gauge = getOr( - name, - () => - new Gauge({ - name, - help: name, - ...(labelNames ? { labelNames } : {}), - }) - ); - - return (value: number, labels: LabelValues) => { - gauge.set(labels, value); - }; - }; - - const timerCreator = ( - name: string, - labelNames?: T[] - ): TimerMetricsCreator => { - const summary = getOr( - name, - () => - new Summary({ - name, - help: name, - ...(labelNames ? { labelNames } : {}), - }) - ); - - return (labels: LabelValues) => { - const now = process.hrtime(); - - return () => { - const delta = process.hrtime(now); - const value = delta[0] + delta[1] / 1e9; - - summary.observe(labels, value); - return value; - }; - }; - }; - - return { - counter: counterCreator, - gauge: gaugeCreator, - timer: timerCreator, - }; -}; - -export const metricsCreator = metricsCreatorGenerator(); +import { getMeter } from './metrics'; export const CallTimer = ( name: string, - labels: Record = {} + attrs?: Attributes ): MethodDecorator => { - const timer = metricsCreator.timer(name, Object.keys(labels)); - // @ts-expect-error allow return ( _target, @@ -106,19 +18,27 @@ export const CallTimer = ( } desc.value = function (...args: any[]) { - const endTimer = timer(labels); + const timer = getMeter().createHistogram(name, { + description: `function call time costs of ${name}`, + }); + const start = Date.now(); + + const end = () => { + timer.record(Date.now() - start, attrs); + }; + let result: any; try { result = originalMethod.apply(this, args); } catch (e) { - endTimer(); + end(); throw e; } if (result instanceof Promise) { - return result.finally(endTimer); + return result.finally(end); } else { - endTimer(); + end(); return result; } }; @@ -129,10 +49,8 @@ export const CallTimer = ( export const CallCounter = ( name: string, - labels: Record = {} + attrs?: Attributes ): MethodDecorator => { - const count = metricsCreator.counter(name, Object.keys(labels)); - // @ts-expect-error allow return ( _target, @@ -145,7 +63,11 @@ export const CallCounter = ( } desc.value = function (...args: any[]) { - count(1, labels); + const count = getMeter().createCounter(name, { + description: `function call counter of ${name}`, + }); + + count.add(1, attrs); return originalMethod.apply(this, args); }; diff --git a/packages/backend/server/src/modules/auth/next-auth.controller.ts b/packages/backend/server/src/modules/auth/next-auth.controller.ts index 1df2205e5b..f8d8faa043 100644 --- a/packages/backend/server/src/modules/auth/next-auth.controller.ts +++ b/packages/backend/server/src/modules/auth/next-auth.controller.ts @@ -23,7 +23,7 @@ import type { AuthAction, CookieOption, NextAuthOptions } from 'next-auth'; import { AuthHandler } from 'next-auth/core'; import { Config } from '../../config'; -import { Metrics } from '../../metrics/metrics'; +import { metrics } from '../../metrics'; import { PrismaService } from '../../prisma/service'; import { SessionService } from '../../session'; import { AuthThrottlerGuard, Throttle } from '../../throttler'; @@ -46,7 +46,6 @@ export class NextAuthController { private readonly authService: AuthService, @Inject(NextAuthOptionsProvide) private readonly nextAuthOptions: NextAuthOptions, - private readonly metrics: Metrics, private readonly session: SessionService ) { // eslint-disable-next-line @typescript-eslint/no-non-null-assertion @@ -90,7 +89,7 @@ export class NextAuthController { res.redirect(`/signin${query}`); return; } - this.metrics.authCounter(1, {}); + metrics().authCounter.add(1); const [action, providerId] = req.url // start with request url .slice(BASE_URL.length) // make relative to baseUrl .replace(/\?.*/, '') // remove query part, use only path part @@ -127,7 +126,7 @@ export class NextAuthController { const options = this.nextAuthOptions; if (req.method === 'POST' && action === 'session') { if (typeof req.body !== 'object' || typeof req.body.data !== 'object') { - this.metrics.authFailCounter(1, { reason: 'invalid_session_data' }); + metrics().authFailCounter.add(1, { reason: 'invalid_session_data' }); throw new BadRequestException(`Invalid new session data`); } const user = await this.updateSession(req, req.body.data); @@ -210,7 +209,7 @@ export class NextAuthController { if (redirect?.endsWith('api/auth/error?error=AccessDenied')) { this.logger.log(`Early access redirect headers: ${req.headers}`); - this.metrics.authFailCounter(1, { + metrics().authFailCounter.add(1, { reason: 'no_early_access_permission', }); if ( diff --git a/packages/backend/server/src/modules/doc/history.ts b/packages/backend/server/src/modules/doc/history.ts index 61ae73e55d..ddb869d18e 100644 --- a/packages/backend/server/src/modules/doc/history.ts +++ b/packages/backend/server/src/modules/doc/history.ts @@ -6,7 +6,7 @@ import { Cron, CronExpression } from '@nestjs/schedule'; import type { Snapshot } from '@prisma/client'; import { Config } from '../../config'; -import { Metrics } from '../../metrics'; +import { metrics } from '../../metrics'; import { PrismaService } from '../../prisma'; import { SubscriptionStatus } from '../payment/service'; import { Permission } from '../workspaces/types'; @@ -16,8 +16,7 @@ export class DocHistoryManager { private readonly logger = new Logger(DocHistoryManager.name); constructor( private readonly config: Config, - private readonly db: PrismaService, - private readonly metrics: Metrics + private readonly db: PrismaService ) {} @OnEvent('doc:manager:snapshot:beforeUpdate') @@ -69,7 +68,7 @@ export class DocHistoryManager { // safe to ignore // only happens when duplicated history record created in multi processes }); - this.metrics.docHistoryCounter(1, {}); + metrics().docHistoryCounter.add(1, {}); this.logger.log( `History created for ${snapshot.id} in workspace ${snapshot.workspaceId}.` ); @@ -183,7 +182,7 @@ export class DocHistoryManager { // which is not the solution in CRDT. // let user revert in client and update the data in sync system // `await this.db.snapshot.update();` - this.metrics.docRecoverCounter(1, {}); + metrics().docRecoverCounter.add(1, {}); return history.timestamp; } diff --git a/packages/backend/server/src/modules/doc/manager.ts b/packages/backend/server/src/modules/doc/manager.ts index 154546da33..e531f6057e 100644 --- a/packages/backend/server/src/modules/doc/manager.ts +++ b/packages/backend/server/src/modules/doc/manager.ts @@ -19,7 +19,7 @@ import { import { Cache } from '../../cache'; import { Config } from '../../config'; -import { Metrics } from '../../metrics/metrics'; +import { metrics } from '../../metrics/metrics'; import { PrismaService } from '../../prisma'; import { mergeUpdatesInApplyWay as jwstMergeUpdates } from '../../storage'; @@ -70,7 +70,6 @@ export class DocManager implements OnModuleInit, OnModuleDestroy { private readonly automation: boolean, private readonly db: PrismaService, private readonly config: Config, - private readonly metrics: Metrics, private readonly cache: Cache, private readonly event: EventEmitter2 ) {} @@ -126,13 +125,13 @@ export class DocManager implements OnModuleInit, OnModuleDestroy { this.config.doc.manager.experimentalMergeWithJwstCodec && updates.length < 100 /* avoid overloading */ ) { - this.metrics.jwstCodecMerge(1, {}); + metrics().jwstCodecMerge.add(1); const yjsResult = Buffer.from(encodeStateAsUpdate(doc)); let log = false; try { const jwstResult = jwstMergeUpdates(updates); if (!compare(yjsResult, jwstResult)) { - this.metrics.jwstCodecDidnotMatch(1, {}); + metrics().jwstCodecDidnotMatch.add(1); this.logger.warn( `jwst codec result doesn't match yjs codec result for: ${guid}` ); @@ -143,7 +142,7 @@ export class DocManager implements OnModuleInit, OnModuleDestroy { } } } catch (e) { - this.metrics.jwstCodecFail(1, {}); + metrics().jwstCodecFail.add(1); this.logger.warn(`jwst apply update failed for ${guid}: ${e}`); log = true; } finally { diff --git a/packages/backend/server/src/modules/sync/events/events.gateway.ts b/packages/backend/server/src/modules/sync/events/events.gateway.ts index 6471c91afe..2921411860 100644 --- a/packages/backend/server/src/modules/sync/events/events.gateway.ts +++ b/packages/backend/server/src/modules/sync/events/events.gateway.ts @@ -11,8 +11,8 @@ import { import { Server, Socket } from 'socket.io'; import { encodeStateAsUpdate, encodeStateVector } from 'yjs'; -import { Metrics } from '../../../metrics/metrics'; -import { CallCounter, CallTimer } from '../../../metrics/utils'; +import { metrics } from '../../../metrics'; +import { CallTimer } from '../../../metrics/utils'; import { DocID } from '../../../utils/doc'; import { Auth, CurrentUser } from '../../auth'; import { DocManager } from '../../doc'; @@ -68,8 +68,7 @@ export const GatewayErrorWrapper = (): MethodDecorator => { const SubscribeMessage = (event: string) => applyDecorators( GatewayErrorWrapper(), - CallCounter('socket_io_counter', { event }), - CallTimer('socket_io_timer', { event }), + CallTimer('socket_io_event_duration', { event }), RawSubscribeMessage(event) ); @@ -97,7 +96,6 @@ export class EventsGateway implements OnGatewayConnection, OnGatewayDisconnect { constructor( private readonly docManager: DocManager, - private readonly metric: Metrics, private readonly permissions: PermissionService ) {} @@ -106,12 +104,12 @@ export class EventsGateway implements OnGatewayConnection, OnGatewayDisconnect { handleConnection() { this.connectionCount++; - this.metric.socketIOConnectionGauge(this.connectionCount, {}); + metrics().socketIOConnectionGauge(this.connectionCount); } handleDisconnect() { this.connectionCount--; - this.metric.socketIOConnectionGauge(this.connectionCount, {}); + metrics().socketIOConnectionGauge(this.connectionCount); } @Auth() diff --git a/packages/backend/server/src/modules/workspaces/controller.ts b/packages/backend/server/src/modules/workspaces/controller.ts index 6af8867d5a..2d20272f94 100644 --- a/packages/backend/server/src/modules/workspaces/controller.ts +++ b/packages/backend/server/src/modules/workspaces/controller.ts @@ -4,14 +4,13 @@ import { ForbiddenException, Get, Inject, - Logger, NotFoundException, Param, Res, } from '@nestjs/common'; import type { Response } from 'express'; -import format from 'pretty-time'; +import { CallTimer } from '../../metrics'; import { PrismaService } from '../../prisma'; import { StorageProvide } from '../../storage'; import { DocID } from '../../utils/doc'; @@ -23,8 +22,6 @@ import { Permission } from './types'; @Controller('/api/workspaces') export class WorkspacesController { - private readonly logger = new Logger('WorkspacesController'); - constructor( @Inject(StorageProvide) private readonly storage: Storage, private readonly permission: PermissionService, @@ -37,6 +34,7 @@ export class WorkspacesController { // // NOTE: because graphql can't represent a File, so we have to use REST API to get blob @Get('/:id/blobs/:name') + @CallTimer('doc_controller', { method: 'get_blob' }) async blob( @Param('id') workspaceId: string, @Param('name') name: string, @@ -61,13 +59,13 @@ export class WorkspacesController { @Get('/:id/docs/:guid') @Auth() @Publicable() + @CallTimer('doc_controller', { method: 'get_doc' }) async doc( @CurrentUser() user: UserType | undefined, @Param('id') ws: string, @Param('guid') guid: string, @Res() res: Response ) { - const start = process.hrtime(); const docId = new DocID(guid, ws); if ( // if a user has the permission @@ -104,11 +102,11 @@ export class WorkspacesController { res.setHeader('content-type', 'application/octet-stream'); res.send(update); - this.logger.debug(`workspaces doc api: ${format(process.hrtime(start))}`); } @Get('/:id/docs/:guid/histories/:timestamp') @Auth() + @CallTimer('doc_controller', { method: 'get_history' }) async history( @CurrentUser() user: UserType, @Param('id') ws: string, diff --git a/packages/backend/server/tests/auth.spec.ts b/packages/backend/server/tests/auth.spec.ts index fa06c852a6..c60f46d8af 100644 --- a/packages/backend/server/tests/auth.spec.ts +++ b/packages/backend/server/tests/auth.spec.ts @@ -5,7 +5,6 @@ import test from 'ava'; import { ConfigModule } from '../src/config'; import { GqlModule } from '../src/graphql.module'; -import { MetricsModule } from '../src/metrics'; import { AuthModule } from '../src/modules/auth'; import { AuthResolver } from '../src/modules/auth/resolver'; import { AuthService } from '../src/modules/auth/service'; @@ -40,7 +39,6 @@ test.beforeEach(async () => { PrismaModule, GqlModule, AuthModule, - MetricsModule, RateLimiterModule, ], }).compile(); diff --git a/packages/backend/server/tests/doc.spec.ts b/packages/backend/server/tests/doc.spec.ts index 9a08909691..83147b96fe 100644 --- a/packages/backend/server/tests/doc.spec.ts +++ b/packages/backend/server/tests/doc.spec.ts @@ -10,7 +10,6 @@ import { Doc as YDoc, encodeStateAsUpdate } from 'yjs'; import { CacheModule } from '../src/cache'; import { Config, ConfigModule } from '../src/config'; -import { MetricsModule } from '../src/metrics'; import { DocManager, DocModule } from '../src/modules/doc'; import { PrismaModule, PrismaService } from '../src/prisma'; import { flushDB } from './utils'; @@ -19,7 +18,6 @@ const createModule = () => { return Test.createTestingModule({ imports: [ PrismaModule, - MetricsModule, CacheModule, EventEmitterModule.forRoot(), ConfigModule.forRoot(), diff --git a/packages/backend/server/tests/history.spec.ts b/packages/backend/server/tests/history.spec.ts index 5555206a3b..103a7d231a 100644 --- a/packages/backend/server/tests/history.spec.ts +++ b/packages/backend/server/tests/history.spec.ts @@ -6,7 +6,6 @@ import test from 'ava'; import * as Sinon from 'sinon'; import { ConfigModule } from '../src/config'; -import { MetricsModule } from '../src/metrics'; import { DocHistoryManager } from '../src/modules/doc'; import { PrismaModule, PrismaService } from '../src/prisma'; import { flushDB } from './utils'; @@ -20,12 +19,7 @@ let db: PrismaService; test.beforeEach(async () => { await flushDB(); m = await Test.createTestingModule({ - imports: [ - PrismaModule, - MetricsModule, - ScheduleModule.forRoot(), - ConfigModule.forRoot(), - ], + imports: [PrismaModule, ScheduleModule.forRoot(), ConfigModule.forRoot()], providers: [DocHistoryManager], }).compile(); diff --git a/packages/backend/server/tests/mailer.e2e.ts b/packages/backend/server/tests/mailer.e2e.ts index f069fcb21b..10b2193252 100644 --- a/packages/backend/server/tests/mailer.e2e.ts +++ b/packages/backend/server/tests/mailer.e2e.ts @@ -12,7 +12,6 @@ import ava, { type TestFn } from 'ava'; import { ConfigModule } from '../src/config'; import { GqlModule } from '../src/graphql.module'; -import { MetricsModule } from '../src/metrics'; import { AuthModule } from '../src/modules/auth'; import { AuthService } from '../src/modules/auth/service'; import { PrismaModule } from '../src/prisma'; @@ -44,7 +43,6 @@ test.beforeEach(async t => { PrismaModule, GqlModule, AuthModule, - MetricsModule, RateLimiterModule, ], }).compile(); diff --git a/packages/backend/server/tests/prometheus-metrics.spec.ts b/packages/backend/server/tests/prometheus-metrics.spec.ts deleted file mode 100644 index af0d6dd4a5..0000000000 --- a/packages/backend/server/tests/prometheus-metrics.spec.ts +++ /dev/null @@ -1,78 +0,0 @@ -import { Test, TestingModule } from '@nestjs/testing'; -import test from 'ava'; -import { register } from 'prom-client'; - -import { MetricsModule } from '../src/metrics'; -import { Metrics } from '../src/metrics/metrics'; -import { PrismaModule } from '../src/prisma'; - -let metrics: Metrics; -let module: TestingModule; - -test.beforeEach(async () => { - module = await Test.createTestingModule({ - imports: [MetricsModule, PrismaModule], - }).compile(); - - metrics = module.get(Metrics); -}); - -test.afterEach.always(async () => { - await module.close(); -}); - -test('should be able to increment counter', async t => { - metrics.socketIOEventCounter(1, { event: 'client-handshake' }); - const socketIOCounterMetric = register.getSingleMetric('socket_io_counter'); - t.truthy(socketIOCounterMetric); - - t.truthy( - JSON.stringify((await socketIOCounterMetric!.get()).values) === - '[{"value":1,"labels":{"event":"client-handshake"}}]' - ); - t.pass(); -}); - -test('should be able to timer', async t => { - let minimum: number; - { - const endTimer = metrics.socketIOEventTimer({ event: 'client-handshake' }); - const a = performance.now(); - await new Promise(resolve => setTimeout(resolve, 50)); - const b = performance.now(); - minimum = b - a; - endTimer(); - } - - let maximum: number; - { - const a = performance.now(); - const endTimer = metrics.socketIOEventTimer({ event: 'client-handshake' }); - await new Promise(resolve => setTimeout(resolve, 100)); - endTimer(); - const b = performance.now(); - maximum = b - a; - } - - const socketIOTimerMetric = register.getSingleMetric('socket_io_timer'); - t.truthy(socketIOTimerMetric); - - const observations = (await socketIOTimerMetric!.get()).values; - - for (const observation of observations) { - if ( - observation.labels.event === 'client-handshake' && - 'quantile' in observation.labels - ) { - t.truthy( - observation.value >= minimum / 1000, - 'observation.value should be greater than minimum' - ); - t.truthy( - observation.value <= maximum / 1000, - 'observation.value should be less than maximum' - ); - } - } - t.pass(); -}); diff --git a/yarn.lock b/yarn.lock index 24af8189b4..7ecfc155b2 100644 --- a/yarn.lock +++ b/yarn.lock @@ -726,6 +726,9 @@ __metadata: "@node-rs/jsonwebtoken": "npm:^0.2.3" "@opentelemetry/api": "npm:^1.7.0" "@opentelemetry/core": "npm:^1.18.1" + "@opentelemetry/exporter-prometheus": "npm:^0.45.1" + "@opentelemetry/exporter-zipkin": "npm:^1.18.1" + "@opentelemetry/host-metrics": "npm:^0.33.2" "@opentelemetry/instrumentation": "npm:^0.45.1" "@opentelemetry/instrumentation-graphql": "npm:^0.36.0" "@opentelemetry/instrumentation-http": "npm:^0.45.1" @@ -8739,6 +8742,19 @@ __metadata: languageName: node linkType: hard +"@opentelemetry/exporter-prometheus@npm:^0.45.1": + version: 0.45.1 + resolution: "@opentelemetry/exporter-prometheus@npm:0.45.1" + dependencies: + "@opentelemetry/core": "npm:1.18.1" + "@opentelemetry/resources": "npm:1.18.1" + "@opentelemetry/sdk-metrics": "npm:1.18.1" + peerDependencies: + "@opentelemetry/api": ^1.3.0 + checksum: 0d9999eca90b407de1e496e00bc9d991c5bb2ccf069c53a0de9054457170426efdce833e958c53273b22957dcbfa064a5559f8c9358114dfa48c873799d4fc27 + languageName: node + linkType: hard + "@opentelemetry/exporter-trace-otlp-grpc@npm:0.45.1": version: 0.45.1 resolution: "@opentelemetry/exporter-trace-otlp-grpc@npm:0.45.1" @@ -8786,7 +8802,7 @@ __metadata: languageName: node linkType: hard -"@opentelemetry/exporter-zipkin@npm:1.18.1": +"@opentelemetry/exporter-zipkin@npm:1.18.1, @opentelemetry/exporter-zipkin@npm:^1.18.1": version: 1.18.1 resolution: "@opentelemetry/exporter-zipkin@npm:1.18.1" dependencies: @@ -8800,6 +8816,18 @@ __metadata: languageName: node linkType: hard +"@opentelemetry/host-metrics@npm:^0.33.2": + version: 0.33.2 + resolution: "@opentelemetry/host-metrics@npm:0.33.2" + dependencies: + "@opentelemetry/sdk-metrics": "npm:^1.8.0" + systeminformation: "npm:^5.0.0" + peerDependencies: + "@opentelemetry/api": ^1.3.0 + checksum: 35140ecb2c4e97ab49873411a9e81a23238cf392112212f936f238091a6572c0802d6364ccea428cb7f0595cb28f4cbafbe6fa5c23510ebb1243cf7f398871e5 + languageName: node + linkType: hard + "@opentelemetry/instrumentation-graphql@npm:^0.36.0": version: 0.36.0 resolution: "@opentelemetry/instrumentation-graphql@npm:0.36.0" @@ -9013,7 +9041,7 @@ __metadata: languageName: node linkType: hard -"@opentelemetry/sdk-metrics@npm:1.18.1, @opentelemetry/sdk-metrics@npm:^1.18.1": +"@opentelemetry/sdk-metrics@npm:1.18.1, @opentelemetry/sdk-metrics@npm:^1.18.1, @opentelemetry/sdk-metrics@npm:^1.8.0": version: 1.18.1 resolution: "@opentelemetry/sdk-metrics@npm:1.18.1" dependencies: @@ -33454,6 +33482,16 @@ __metadata: languageName: node linkType: hard +"systeminformation@npm:^5.0.0": + version: 5.21.17 + resolution: "systeminformation@npm:5.21.17" + bin: + systeminformation: lib/cli.js + checksum: bc910730829f1b232a1ca824bb35dfb211489430a1a612fe9e0d27fd3cb73c0793a00cbff7512c782cac2f2dfd2416a7e698b410f6dd5b4f66cbf5ae458c9cab + conditions: (os=darwin | os=linux | os=win32 | os=freebsd | os=openbsd | os=netbsd | os=sunos | os=android) + languageName: node + linkType: hard + "tabbable@npm:^6.0.1": version: 6.2.0 resolution: "tabbable@npm:6.2.0"