mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-15 01:12:56 +03:00
docs: restructure observability, security & caching sections
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/9554 Co-authored-by: Rikin Kachhia <54616969+rikinsk@users.noreply.github.com> GitOrigin-RevId: d14c65920049a0cc39ddc9488af34fc3d6b67467
This commit is contained in:
parent
3b16bcb3ab
commit
b53f1d7406
@ -57,7 +57,7 @@ X-Hasura-Role: admin
|
||||
| analyze_query_variables | false | boolean | Enables logging of the values of the query variables provided for each request. Default is `false`. |
|
||||
|
||||
Please see the corresponding
|
||||
[feature documentation for the usage of these configurations](/observability/operations.mdx#capture-query-variables).
|
||||
[feature documentation for the usage of these configurations](/observability/cloud-monitoring/operations.mdx#capture-query-variables).
|
||||
|
||||
## remove_metrics_config {#metadata-remove-metrics-config}
|
||||
|
||||
@ -77,7 +77,7 @@ X-Hasura-Role: admin
|
||||
|
||||
<ProductBadge self />
|
||||
|
||||
The OpenTelemetry configuration enables export of [distributed traces](/observability/tracing.mdx) to an
|
||||
The OpenTelemetry configuration enables export of [distributed traces](/observability/cloud-monitoring/tracing.mdx) to an
|
||||
[OpenTelemetry](https://opentelemetry.io/) compliant APM receiver.
|
||||
|
||||
## set_opentelemetry_config {#metadata-set-opentelemetry-config}
|
||||
|
@ -708,7 +708,7 @@ needs to have the public key.
|
||||
|
||||
By providing a set of JWT secrets to GraphQL Engine on Cloud or Enterprise versions, you can set it up to authenticate
|
||||
using various JWT issuers. This configuration option allows for greater flexibility in authentication. Read more here:
|
||||
[Multiple JWT Secrets](/security/multiple-jwt-secrets.mdx).
|
||||
[Multiple JWT Secrets](/auth/authentication/multiple-jwt-secrets.mdx).
|
||||
|
||||
## Security considerations
|
||||
|
||||
|
@ -12,7 +12,7 @@ keywords:
|
||||
- multiple
|
||||
- admin
|
||||
- secrets
|
||||
sidebar_position: 4
|
||||
sidebar_position: 50
|
||||
sidebar_label: Multiple Admin Secrets
|
||||
sidebar_class_name: cloud-and-enterprise-icon
|
||||
---
|
@ -11,7 +11,7 @@ keywords:
|
||||
- multiple
|
||||
- JWT
|
||||
- secrets
|
||||
sidebar_position: 5
|
||||
sidebar_position: 60
|
||||
sidebar_label: Multiple JWT Secrets
|
||||
sidebar_class_name: cloud-and-enterprise-icon
|
||||
---
|
@ -95,4 +95,4 @@ In the above example, this configuration restricts the number of accessible rows
|
||||
`{"id":{"_eq":"X-Hasura-User-Id"}}`) to 1.
|
||||
|
||||
Setting row fetch limits is useful for preventing abuse of your API especially if it is exposed to the public. You
|
||||
can [also configure other limits](/deployment/best-practices/security.mdx#limit-the-api).
|
||||
can [also configure other limits](/security/security-best-practices.mdx#limit-the-api).
|
@ -23,7 +23,8 @@ Schemas.
|
||||
Cached responses are stored for a period of time in a LRU (least-recently used) cache, and removed from the cache as per
|
||||
a user-specified TTL (time-to-live) which defaults to 60 seconds.
|
||||
|
||||
For self-hosted Enterprise Edition, refer to the [enable caching](/enterprise/caching.mdx) documentation configure
|
||||
For self-hosted Enterprise Edition, refer to the [enable caching](/caching/enterprise-caching.mdx) documentation
|
||||
configure
|
||||
various parameters.
|
||||
|
||||
## Getting started
|
||||
|
@ -6,7 +6,8 @@ keywords:
|
||||
- metrics
|
||||
- prometheus
|
||||
- grafana
|
||||
sidebar_position: 5
|
||||
sidebar_position: 6
|
||||
sidebar_class_name: enterprise-icon
|
||||
---
|
||||
|
||||
import Thumbnail from '@site/src/components/Thumbnail';
|
||||
@ -24,14 +25,14 @@ This can help towards monitoring and further optimization of the cache utilizati
|
||||
|
||||
## Exposed metrics
|
||||
|
||||
The graphql engine exposes the `hasura_cache_request_count` Prometheus metric.
|
||||
It represents a `counter` and is incremented every time a request with `@cached` directive is served.
|
||||
The graphql engine exposes the [hasura_cache_request_count](/observability/enterprise-edition/prometheus/metrics.mdx/#hasura-cache-request-count)
|
||||
Prometheus metric. It represents a `counter` and is incremented every time a request with `@cached` directive is served.
|
||||
|
||||
|
||||
It has one label `status`, which can have values of either `hit` or `miss`.
|
||||
|
||||
| status | description |
|
||||
|-----------|-------------------|
|
||||
|--------|-----------------------------------------------------|
|
||||
| `hit` | request served from the cache |
|
||||
| `miss` | request served from the source (not found in cache) |
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
---
|
||||
sidebar_label: Enable caching
|
||||
sidebar_label: Enable caching in EE
|
||||
sidebar_position: 5
|
||||
description: Hasura Enterprise Edition caching
|
||||
description: Caching in Hasura Enterprise Edition
|
||||
title: 'Enterprise Edition: Enable GraphQL caching'
|
||||
sidebar_class_name: enterprise-icon
|
||||
keywords:
|
||||
- hasura
|
||||
- docs
|
||||
@ -15,7 +16,7 @@ keywords:
|
||||
import HeadingIcon from '@site/src/components/HeadingIcon';
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
# Enable GraphQL Caching
|
||||
# Enable Caching in Hasura Enterprise Edition
|
||||
|
||||
<ProductBadge standard pro ee self />
|
||||
|
@ -10,7 +10,6 @@ keywords:
|
||||
- caching
|
||||
sidebar_position: 1
|
||||
hide_table_of_contents: true
|
||||
sidebar_class_name: cloud-and-enterprise-icon
|
||||
---
|
||||
|
||||
import HeadingIcon from '@site/src/components/HeadingIcon';
|
||||
@ -26,7 +25,7 @@ import Caching from '@site/static/icons/features/caching.svg';
|
||||
<div className="overview-text">
|
||||
<p>
|
||||
Hasura Cloud and Enterprise Editions provide a caching layer that can be used to cache the response of a GraphQL
|
||||
query. This can help reduce the number of requests to your datasources and improve the performance of your
|
||||
query. This can help reduce the number of requests to your data sources and improve the performance of your
|
||||
application.
|
||||
</p>
|
||||
<p>
|
||||
|
@ -41,6 +41,7 @@ Schema Registry is available on Hasura Cloud from `v2.26.0-cloud.1` and above.
|
||||
## How it works
|
||||
|
||||
Whenever there is any operation on the Hasura Engine that could change the GraphQL schema, Hasura sends an event to the
|
||||
|
||||
Schema Registry along with the GraphQL schemas for all defined roles. Operations which could change the GraphQL schema
|
||||
include:
|
||||
|
||||
@ -84,6 +85,7 @@ The changes between subsequent schemas are computed using the open source
|
||||
Breaking changes are typically the changes that could potentially break your GraphQL operations (queries, mutations or
|
||||
subscriptions) at the GraphQL operation validation layer.
|
||||
|
||||
|
||||
For example, if a field `name` is removed from a GraphQL object type `user`, that counts as a breaking change as it
|
||||
could potentially fail an existing GraphQL operation that queries the `name` field in the `user` type.
|
||||
|
||||
|
@ -16,7 +16,7 @@ sidebar_position: 105
|
||||
Best practices are the goal of all organizations with many different facets benefiting from those practices. This is
|
||||
particularly true with enterprise software and Hasura is no different. The guides below are broken down by category.
|
||||
|
||||
- [Database Observability](/deployment/best-practices/db-observability.mdx)
|
||||
- [Metadata](/deployment/best-practices/metadata.mdx)
|
||||
- [Observability](/deployment/best-practices/observability.mdx)
|
||||
- [Security](/deployment/best-practices/security.mdx)
|
||||
- [Database Observability](/observability/db-observability.mdx)
|
||||
- [Metadata](/migrations-metadata-seeds/metadata-best-practices.mdx)
|
||||
- [Observability](/observability/observability-best-practices.mdx)
|
||||
- [Security](/security/security-best-practices.mdx)
|
||||
|
@ -113,7 +113,7 @@ When you use [webhook or JWT mode for authentication](/auth/authentication/index
|
||||
|
||||
### Admin Secrets
|
||||
|
||||
A list of valid admin [secret keys](/security/multiple-admin-secrets.mdx) any one of which can be used to access the
|
||||
A list of valid admin [secret keys](/auth/authentication/multiple-admin-secrets.mdx) any one of which can be used to access the
|
||||
Hasura instance.
|
||||
|
||||
| | |
|
||||
@ -367,7 +367,7 @@ Enable the Hasura Console (served by the server on `/` and `/console`).
|
||||
|
||||
### Enable High-cardinality Labels for Metrics
|
||||
|
||||
Enable high-cardinality labels for [Prometheus Metrics](/observability/prometheus/metrics.mdx). Enabling this setting
|
||||
Enable high-cardinality labels for [Prometheus Metrics](/observability/enterprise-edition/prometheus/metrics.mdx). Enabling this setting
|
||||
will add more labels to some of the metrics (e.g. `operation_name` label for Graphql subscription metrics).
|
||||
|
||||
| | |
|
||||
@ -381,7 +381,7 @@ will add more labels to some of the metrics (e.g. `operation_name` label for Gra
|
||||
|
||||
### Enable Log Compression
|
||||
|
||||
Enable sending compressed logs to [metrics server](/observability/prometheus/metrics.mdx).
|
||||
Enable sending compressed logs to [metrics server](/observability/enterprise-edition/prometheus/metrics.mdx).
|
||||
|
||||
| | |
|
||||
| ------------------- | ------------------------------------------ |
|
||||
@ -578,7 +578,7 @@ Having an [admin secret](#admin-secret-key) set is mandatory for setting this va
|
||||
|
||||
### JWT Secrets
|
||||
|
||||
List of [JWT secrets](/security/multiple-jwt-secrets.mdx) to authenticate with different JWT issuers.
|
||||
List of [JWT secrets](/auth/authentication/multiple-jwt-secrets.mdx) to authenticate with different JWT issuers.
|
||||
|
||||
| | |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
@ -624,7 +624,7 @@ This variable sets the level for [Hasura's logs](/deployment/logging.mdx#logging
|
||||
|
||||
### Max Cache Size
|
||||
|
||||
The [maximum cache size](/enterprise/caching.mdx), measured in MB, for queries.
|
||||
The [maximum cache size](/caching/enterprise-caching.mdx), measured in MB, for queries.
|
||||
|
||||
| | |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------- |
|
||||
@ -804,7 +804,7 @@ The maximum number of query plans that can be cached, where `0` disables the cac
|
||||
|
||||
### Rate-Limit Redis TLS Hostname
|
||||
|
||||
The hostname to use for SNI when connecting to a rate-limiting [Redis instance over TLS](/enterprise/caching.mdx).
|
||||
The hostname to use for SNI when connecting to a rate-limiting [Redis instance over TLS](/caching/enterprise-caching.mdx).
|
||||
|
||||
| | |
|
||||
| ------------------- | ---------------------------------------------- |
|
||||
@ -816,7 +816,7 @@ The hostname to use for SNI when connecting to a rate-limiting [Redis instance o
|
||||
|
||||
### Rate-Limit Redis URL
|
||||
|
||||
The [Redis URL](/enterprise/caching.mdx) to use for rate limiting.
|
||||
The [Redis URL](/caching/enterprise-caching.mdx) to use for rate limiting.
|
||||
|
||||
| | |
|
||||
| ------------------- | ---------------------------------------- |
|
||||
@ -829,7 +829,7 @@ The [Redis URL](/enterprise/caching.mdx) to use for rate limiting.
|
||||
|
||||
### Rate-Limit Redis Use TLS
|
||||
|
||||
Whether to use TLS to connect to a caching [Redis instance](/enterprise/caching.mdx).
|
||||
Whether to use TLS to connect to a caching [Redis instance](/caching/enterprise-caching.mdx).
|
||||
|
||||
| | |
|
||||
| ------------------- | -------------------------------------------- |
|
||||
@ -860,7 +860,7 @@ The [URL for a read replica](/databases/database-config/read-replicas.mdx#adding
|
||||
|
||||
### Redis TLS Hostname
|
||||
|
||||
The hostname to use for SNI when connecting to a [caching Redis instance over TLS](/enterprise/caching.mdx).
|
||||
The hostname to use for SNI when connecting to a [caching Redis instance over TLS](/caching/enterprise-caching.mdx).
|
||||
|
||||
| | |
|
||||
| ------------------- | ----------------------------------- |
|
||||
@ -873,7 +873,7 @@ The hostname to use for SNI when connecting to a [caching Redis instance over TL
|
||||
### Redis TLS Shared CA Store Path
|
||||
|
||||
The path to a shared CA store to use to connect to both (caching and rate-limiting)
|
||||
[Redis URLs over TLS](/enterprise/caching.mdx).
|
||||
[Redis URLs over TLS](/caching/enterprise-caching.mdx).
|
||||
|
||||
| | |
|
||||
| ------------------- | ------------------------------------------------------------ |
|
||||
@ -885,8 +885,8 @@ The path to a shared CA store to use to connect to both (caching and rate-limiti
|
||||
|
||||
### Redis URL
|
||||
|
||||
The Redis URL to use for [query caching](/enterprise/caching.mdx) and
|
||||
[Webhook Auth Caching](/auth/authentication/webhook.mdx#webhook-auth-caching).
|
||||
The Redis URL to use for [query caching](/caching/enterprise-caching.mdx) and [Webhook Auth
|
||||
Caching](/auth/authentication/webhook.mdx#webhook-auth-caching).
|
||||
|
||||
| | |
|
||||
| ------------------- | ---------------------------------------- |
|
||||
|
@ -74,7 +74,7 @@ The Enterprise Edition log-types that can be enabled/disabled are:
|
||||
| `api-limit-log` | Logs errors in [API limit](/security/api-limits.mdx) | `error` |
|
||||
| `livequery-poller-log` | Logs information for active subscriptions (poller-id, generated sql, polling time, subscriber count, subscription kind, etc.) | `info` |
|
||||
| `response-caching-log` | Logs response information and errors from [query caching](/caching/overview.mdx) | `info`, `error` and `debug` |
|
||||
| `tracing-log` | Logs information about [tracing spans](/observability/tracing.mdx) | `info` |
|
||||
| `tracing-log` | Logs information about [tracing spans](/observability/cloud-monitoring/tracing.mdx) | `info` |
|
||||
| `metrics` | Logs tenant metrics information | `info` |
|
||||
| `health-check-log` | Logs source Health Check events which includes health status of a data source | `info` and `warn` |
|
||||
|
||||
@ -924,4 +924,4 @@ The `subscription_options` field is an object with the following properties:
|
||||
You can integrate the logs emitted by the Hasura Engine with external monitoring tools for better visibility as per your
|
||||
convenience.
|
||||
|
||||
For some examples, see [Guides: Integrating with monitoring frameworks](/observability/integrations/index.mdx)
|
||||
For some examples, see [Guides: Integrating with monitoring frameworks](/observability/cloud/index.mdx)
|
||||
|
@ -141,7 +141,7 @@ Observability tools help us track issues, alert us to errors, and allow us to mo
|
||||
is critical in production. There are many open-source and commercial services. However, you may have to combine many
|
||||
tools because of the architectural complexity. For more information, check out our
|
||||
[observability section](/observability/overview.mdx) and our
|
||||
[observability best practices](/deployment/best-practices/observability.mdx).
|
||||
[observability best practives](/observability/observability-best-practices.mdx).
|
||||
|
||||
## Software architecture and best practices
|
||||
|
||||
|
@ -27,6 +27,6 @@ access to your GraphQL endpoint and the Hasura Console:
|
||||
If you're looking at adding access control rules for your data to your GraphQL API then head to
|
||||
[Authentication / access control](/auth/overview.mdx). You can also find more information about
|
||||
[Hasura security in general here](/security/overview.mdx) and best practices
|
||||
[here](/deployment/best-practices/security.mdx).
|
||||
[here](/security/security-best-practices.mdx).
|
||||
|
||||
:::
|
||||
|
@ -33,4 +33,4 @@ When you are ready to move Hasura to production, check out our
|
||||
continuous availability.
|
||||
- We recommend running Hasura with at least 4 CPU cores and a minimum of 8 GB RAM in production. Please set autoscaling
|
||||
on CPU.
|
||||
- [Enable and consume metrics](/observability/prometheus/index.mdx).
|
||||
- [Enable and consume metrics](/observability/enterprise-edition/prometheus/index.mdx).
|
||||
|
@ -97,7 +97,7 @@ import Enterprise from '@site/static/icons/features/enterprise.svg';
|
||||
</div>
|
||||
</VersionedLink>
|
||||
<h2 style={{ gridColumn: `1 / -1`, marginTop: `1.2rem`, marginBottom: `.3rem`, justifySelf: `start`, fontSize: `1.8rem` }}>Performance</h2>
|
||||
<VersionedLink to="/enterprise/caching/">
|
||||
<VersionedLink to="/caching/enterprise-caching/">
|
||||
<div className="card">
|
||||
<h3>Caching</h3>
|
||||
<p>Learn how to configure caching in Hasura Enterprise Edition to improve the performance of your GraphQL API.</p>
|
||||
@ -137,13 +137,13 @@ import Enterprise from '@site/static/icons/features/enterprise.svg';
|
||||
<p>Prevent unauthorized access to your GraphQL API by disabling GraphQL introspection in Hasura Enterprise.</p>
|
||||
</div>
|
||||
</VersionedLink>
|
||||
<VersionedLink to="/security/multiple-admin-secrets/">
|
||||
<VersionedLink to="/auth/authentication/multiple-admin-secrets/">
|
||||
<div className="card">
|
||||
<h3>Multiple Admin Secrets</h3>
|
||||
<p>Configure multiple admin secrets in Hasura Enterprise Edition.</p>
|
||||
</div>
|
||||
</VersionedLink>
|
||||
<VersionedLink to="/security/multiple-jwt-secrets/">
|
||||
<VersionedLink to="/auth/authentication/multiple-jwt-secrets/">
|
||||
<div className="card">
|
||||
<h3>Multiple JWT Secrets</h3>
|
||||
<p>Configure multiple JWT secrets in Hasura Enterprise Edition to support multiple authentication providers.</p>
|
||||
@ -165,13 +165,13 @@ import Enterprise from '@site/static/icons/features/enterprise.svg';
|
||||
</p>
|
||||
</div>
|
||||
</VersionedLink>
|
||||
<VersionedLink to="/observability/prometheus/index/">
|
||||
<VersionedLink to="/observability/enterprise-edition/prometheus/index/">
|
||||
<div className="card">
|
||||
<h3>Metrics via Prometheus</h3>
|
||||
<p>Learn how to configure Prometheus in Hasura Enterprise Edition to monitor your GraphQL API.</p>
|
||||
</div>
|
||||
</VersionedLink>
|
||||
<VersionedLink to="/enterprise/opentelemetry/">
|
||||
<VersionedLink to="/observability/enterprise-edition/opentelemetry/">
|
||||
<div className="card">
|
||||
<h3>Traces via OpenTelemetry</h3>
|
||||
<p>Learn how to configure OpenTelemetry in Hasura Enterprise Edition.</p>
|
||||
|
@ -50,145 +50,19 @@ events queue to the webhook.
|
||||
|
||||
<ProductBadge self />
|
||||
|
||||
Hasura exposes a set of [Prometheus metrics](/observability/prometheus/metrics.mdx) that can be used to monitor the
|
||||
Event Trigger system and help diagnose performance issues.
|
||||
Hasura EE exposes a set of [Prometheus metrics](/observability/enterprise-edition/prometheus/metrics.mdx/#hasura-event-triggers-metrics)
|
||||
that can be used to monitor the Event Trigger system and help diagnose performance issues.
|
||||
|
||||
### Event fetch time per batch
|
||||
The following metrics can be used to monitor the performance of Hasura Event Triggers system:
|
||||
|
||||
Hasura fetches the events in batches (by default 100) from the Hasura Event tables in the database. This metric
|
||||
represents the time taken to fetch a batch of events from the database.
|
||||
|
||||
A higher metric indicates slower polling of events from the database, you should consider looking into the performance
|
||||
of your database.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------------------------------------ |
|
||||
| Name | `hasura_event_fetch_time_per_batch_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
|
||||
| Labels | none |
|
||||
|
||||
### Event invocations total
|
||||
|
||||
This metric represents the number of HTTP requests that have been made to the webhook server for delivering events.
|
||||
|
||||
| | |
|
||||
| ------ | ---------------------------------------------------------- |
|
||||
| Name | `hasura_event_invocations_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed, `trigger_name`, `source_name` |
|
||||
|
||||
### Event processed total
|
||||
|
||||
Total number of events processed. Represents the Event Trigger egress.
|
||||
|
||||
| | |
|
||||
| ------ | ---------------------------------------------------------- |
|
||||
| Name | `hasura_event_processed_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed, `trigger_name`, `source_name` |
|
||||
|
||||
### Event processing time
|
||||
|
||||
Time time taken for an event to be processed.
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------------------------------------------- |
|
||||
| Name | `hasura_event_processing_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100 |
|
||||
| Labels | `trigger_name`, `source_name` |
|
||||
|
||||
The processing of an event involves the following steps:
|
||||
|
||||
1. Hasura Engine fetching the event from Hasura Event tables in the database and adding it to the Hasura Events queue
|
||||
2. An HTTP worker picking up the event from the Hasura Events queue
|
||||
3. An HTTP worker delivering the event to the webhook
|
||||
|
||||
:::info Event delivery failure
|
||||
|
||||
Note, if the delivery of the event fails - the delivery of the event is retried based on its `next_retry_at`
|
||||
configuration.
|
||||
|
||||
:::
|
||||
|
||||
This metric represent the time taken for an event to be delivered since it was created (if the first attempt) or retried
|
||||
(after the first attempt). **This metric can be considered as the end-to-end processing time for an event.**
|
||||
|
||||
For e.g., say an event was created at `2021-01-01 10:00:30` and it has a `next_retry_at` configuration which says if the
|
||||
event delivery fails, the event should be retried after 30 seconds.
|
||||
|
||||
At `2021-01-01 10:01:30`: the event was fetched from the Hasura Event tables, picked up by the HTTP worker, and the
|
||||
delivery was attempted. The delivery failed and the `next_retry_at` of `2021-01-01 10:02:00` was set for the event.
|
||||
|
||||
Now at `2021-01-01 10:02:00`: the event was fetched again from the Hasura Event tables, picked up by the HTTP worker,
|
||||
and the delivery was attempted at `2021-01-01 10:03: 30`. This time, the delivery was successful.
|
||||
|
||||
The processing time for the second delivery try would be:
|
||||
|
||||
Processing Time = event delivery time - event next retried time
|
||||
|
||||
Processing Time = `2021-01-01 10:03:30` - `2021-01-01 10:02:00` = `90 seconds`
|
||||
|
||||
### Event queue time
|
||||
|
||||
Hasura fetches the events from the Hasura Event tables in the database and adds it to the Hasura Events queue. The event
|
||||
queue time represents the time taken for an event to be picked up by the HTTP worker after it has been added to the
|
||||
"Events Queue".
|
||||
|
||||
Higher value of this metric implies slow event processing. In this case, you can consider increasing the
|
||||
[HTTP pool size](/deployment/graphql-engine-flags/reference.mdx/#events-http-pool-size) or optimizing the webhook
|
||||
server.
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------------------------------------------- |
|
||||
| Name | `hasura_event_queue_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100 |
|
||||
| Labels | `trigger_name`, `source_name` |
|
||||
|
||||
### Event Triggers HTTP Workers
|
||||
|
||||
Current number of active Event Trigger HTTP workers. Compare this number to the
|
||||
[HTTP pool size](/deployment/graphql-engine-flags/reference.mdx/#events-http-pool-size). Consider increasing it if the
|
||||
metric is near the current configured value.
|
||||
|
||||
| | |
|
||||
| ------ | ----------------------------------- |
|
||||
| Name | `hasura_event_trigger_http_workers` |
|
||||
| Type | Gauge |
|
||||
| Labels | none |
|
||||
|
||||
### Event webhook processing time
|
||||
|
||||
The time between when an HTTP worker picks an event for delivery to the time it sends the event payload to the webhook.
|
||||
|
||||
A higher processing time indicates slow webhook, you should try to optimize the event webhook.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------ |
|
||||
| Name | `hasura_event_webhook_processing_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
|
||||
| Labels | `trigger_name`, `source_name` |
|
||||
|
||||
### Events fetched per batch
|
||||
|
||||
Number of events fetched from the Hasura Event tables in the database per batch. This number should be equal or less
|
||||
than the [events fetch batch size](/deployment/graphql-engine-flags/reference.mdx/#events-fetch-batch-size).
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------- |
|
||||
| Name | `hasura_events_fetched_per_batch` |
|
||||
| Type | Gauge |
|
||||
| Labels | none |
|
||||
|
||||
Since polling the database to continuously check if there are any pending events is an expensive operation, Hasura only
|
||||
polls the database if there are any pending events. This metric can be used to understand if there are any pending
|
||||
events in the Hasura Event Tables.
|
||||
|
||||
:::info Dependent on pending events
|
||||
|
||||
Note that Hasura only fetches events from the Hasura Event tables if there are any pending events. If there are no
|
||||
pending events, this metric will be 0.
|
||||
|
||||
:::
|
||||
- [`hasura_event_fetch_time_per_batch_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-fetch-time-per-batch)
|
||||
- [`hasura_event_invocations_total`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-invocations-total)
|
||||
- [`hasura_event_processed_total`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-processed-total)
|
||||
- [`hasura_event_processing_time_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-processing-time)
|
||||
- [`hasura_event_queue_time_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-queue-time)
|
||||
- [`hasura_event_trigger_http_workers`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-triggers-http-workers)
|
||||
- [`hasura_event_webhook_processing_time_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-webhook-processing-time)
|
||||
- [`hasura_events_fetched_per_batch`](/observability/enterprise-edition/prometheus/metrics.mdx/#events-fetched-per-batch)
|
||||
|
||||
## Golden signals for Hasura Event Triggers
|
||||
|
@ -122,11 +122,11 @@ The following environment variables can be utilized to configure different value
|
||||
the CLI:
|
||||
|
||||
| Environment variable | Config file key | Description |
|
||||
| ------------------------------------------------ | --------------------------------- |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
|--------------------------------------------------|-----------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `HASURA_GRAPHQL_VERSION` | `version` | Config version to be used. |
|
||||
| `HASURA_GRAPHQL_ENDPOINT` | `endpoint` | http(s) endpoint for Hasura GraphQL Engine. |
|
||||
| `HASURA_GRAPHQL_ADMIN_SECRET` | `admin_secret` | Admin secret for Hasura GraphQL Engine. |
|
||||
| `HASURA_GRAPHQL_ADMIN_SECRETS` | `admin_secrets` | [Admin secrets](/security/multiple-admin-secrets.mdx) for Hasura GraphQL Engine _(Cloud/Enterprise Edition only)_. eg: `HASURA_GRAPHQL_ADMIN_SECRETS='["foo", "bar", "baz"]'` |
|
||||
| `HASURA_GRAPHQL_ADMIN_SECRETS` | `admin_secrets` | [Admin secrets](/auth/authentication/multiple-admin-secrets.mdx) for Hasura GraphQL Engine _(Cloud/Enterprise Edition only)_. eg: `HASURA_GRAPHQL_ADMIN_SECRETS='["foo", "bar", "baz"]'` |
|
||||
| `HASURA_GRAPHQL_ACCESS_KEY` | `access_key` | Access key for Hasura GraphQL Engine. Note: Deprecated. Use admin secret instead. |
|
||||
| `HASURA_GRAPHQL_INSECURE_SKIP_TLS_VERIFY` | `insecure_skip_tls_verify` | Skip verifying SSL certificate for the Hasura endpoint. Useful if you have a self-singed certificate and don't have access to the CA cert. |
|
||||
| `HASURA_GRAPHQL_CERTIFICATE_AUTHORITY` | `certificate_authority` | Path to the CA certificate for validating the self-signed certificate for the Hasura endpoint. |
|
||||
|
@ -89,7 +89,7 @@ to your project now should be executed by the version control process. If you ha
|
||||
Console for any reason, please follow the `metadata export` command above and update your version control with the
|
||||
latest Metadata. Otherwise, you may lose the manual changes in your project the next time GitHub integration runs.
|
||||
|
||||
You can learn more in our [Metadata Best Practices guide](/deployment/best-practices/metadata.mdx).
|
||||
You can learn more in our [Metadata Best Practices guide](/migrations-metadata-seeds/metadata-best-practices.mdx).
|
||||
|
||||
:::
|
||||
|
||||
@ -163,7 +163,7 @@ You need to ensure the your Cloud project has been appropriately set up to execu
|
||||
added to your Cloud project as well.
|
||||
- Ensure that you have connected the required database(s) with the right name and connection params as you have in the
|
||||
Metadata to the Cloud project.
|
||||
- Follow the steps in our [Metadata Best Practices](/deployment/best-practices/metadata.mdx) guide to ensure that your
|
||||
- Follow the steps in our [Metadata Best Practices](/migrations-metadata-seeds/metadata-best-practices.mdx) guide to ensure that your
|
||||
Metadata is in the right format and structure.
|
||||
|
||||
## Troubleshooting failures {#github-integration-troubleshooting}
|
||||
|
@ -108,7 +108,7 @@ import Cloud from '@site/static/icons/features/cloud.svg';
|
||||
<VersionedLink to="/hasura-cloud/plans/">API request duration (up to 60 seconds)</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/observability/websockets/">Concurrent websocket connections (up to 10)</VersionedLink>
|
||||
<VersionedLink to="/observability/cloud-monitoring/websockets/">Concurrent websocket connections (up to 10)</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/api-reference/restified/">RESTified endpoints</VersionedLink>
|
||||
@ -193,7 +193,7 @@ import Cloud from '@site/static/icons/features/cloud.svg';
|
||||
<VersionedLink to="/hasura-cloud/plans/">API request duration (up to 120 seconds)</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/observability/websockets/">Concurrent websocket connections (up to 100)</VersionedLink>
|
||||
<VersionedLink to="/observability/cloud-monitoring/websockets/">Concurrent websocket connections (up to 100)</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/caching/overview/">Query caching (up to 100 MB)</VersionedLink>
|
||||
@ -228,16 +228,16 @@ import Cloud from '@site/static/icons/features/cloud.svg';
|
||||
<VersionedLink to="/security/disable-graphql-introspection/">Disable GraphQL introspection</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/security/multiple-jwt-secrets/">Multiple JWT secrets</VersionedLink>
|
||||
<VersionedLink to="/auth/authentication/multiple-jwt-secrets/">Multiple JWT secrets</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/security/multiple-admin-secrets/">Multiple admin keys</VersionedLink>
|
||||
<VersionedLink to="/auth/authentication/multiple-admin-secrets/">Multiple admin keys</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/observability/overview/">Metrics, logs, and traces</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/observability/integrations/index/">Observability integration</VersionedLink>
|
||||
<VersionedLink to="/observability/cloud/index/">Observability integration</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/api-reference/metadata-api/observability/#logs-and-metrics-configuration">
|
||||
@ -282,7 +282,7 @@ import Cloud from '@site/static/icons/features/cloud.svg';
|
||||
<VersionedLink to="/hasura-cloud/plans/">API request duration (no limit)</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/observability/websockets/">Concurrent websocket connections (no limit)</VersionedLink>
|
||||
<VersionedLink to="/observability/cloud-monitoring/websockets/">Concurrent websocket connections (no limit)</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/caching/overview/">Query caching (no limit)</VersionedLink>
|
||||
|
@ -4,7 +4,7 @@ keywords:
|
||||
- hasura
|
||||
- docs
|
||||
- best practices
|
||||
sidebar_label: Metadata
|
||||
sidebar_label: Metadata Best Practices
|
||||
---
|
||||
|
||||
# Metadata Best Practices
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"label": "Observability",
|
||||
"position": 63,
|
||||
"className": "cloud-icon"
|
||||
"className": "cloud-and-enterprise-icon"
|
||||
}
|
5
docs/docs/observability/cloud-monitoring/_category_.json
Normal file
5
docs/docs/observability/cloud-monitoring/_category_.json
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"label": "Built-in Monitoring",
|
||||
"position": 4,
|
||||
"className": "cloud-icon"
|
||||
}
|
@ -1,14 +1,16 @@
|
||||
---
|
||||
description: Hasura Observability is a set of tools that help you monitor and debug your GraphQL API
|
||||
title: How It Works
|
||||
title: Monitoring on Hasura Cloud
|
||||
keywords:
|
||||
- hasura
|
||||
- docs
|
||||
- observability
|
||||
sidebar_position: 2
|
||||
- monitoring
|
||||
sidebar_position: 1
|
||||
slug: index
|
||||
---
|
||||
|
||||
# Observability in Hasura Cloud
|
||||
# Built-in Monitoring in Hasura Cloud
|
||||
|
||||
Observability is a critical aspect of any application, and Hasura Cloud provides developers with a powerful set of tools
|
||||
to monitor and debug their applications. In this document, we'll explore the observability features available in Hasura
|
||||
@ -16,42 +18,42 @@ Cloud and how they can help you build better applications.
|
||||
|
||||
## Error Reporting
|
||||
|
||||
Hasura Cloud provides detailed [error reporting](/observability/errors.mdx) for GraphQL queries and mutations. Whenever
|
||||
Hasura Cloud provides detailed [error reporting](errors.mdx) for GraphQL queries and mutations. Whenever
|
||||
an error occurs, Hasura Cloud captures the error message, query, and other relevant information, allowing you to quickly
|
||||
identify and fix the issue. This feature is particularly useful when debugging complex GraphQL queries and mutations.
|
||||
|
||||
## Usage Summaries
|
||||
|
||||
Hasura Cloud provides [usage summaries](/observability/usage.mdx) for your GraphQL operations, allowing you to monitor
|
||||
Hasura Cloud provides [usage summaries](usage.mdx) for your GraphQL operations, allowing you to monitor
|
||||
the performance of your application and identify any performance bottlenecks. The usage summaries can be filtered by
|
||||
time range, operation type, and other parameters, making it easy to pinpoint performance issues.
|
||||
|
||||
## GraphQL Operations
|
||||
|
||||
Hasura Cloud provides detailed metrics for your [GraphQL operations](/observability/operations.mdx), including query
|
||||
Hasura Cloud provides detailed metrics for your [GraphQL operations](operations.mdx), including query
|
||||
latency, request count, and error rate. This information can be used to monitor the performance of your application and
|
||||
identify any issues that may be impacting your users.
|
||||
|
||||
## Websockets
|
||||
|
||||
Hasura Cloud supports [WebSockets](/observability/websockets.mdx), allowing you to build real-time applications that can
|
||||
Hasura Cloud supports [WebSockets](websockets.mdx), allowing you to build real-time applications that can
|
||||
push data updates to the client in real-time without having to continuously poll the server. Hasura Cloud provides
|
||||
detailed metrics for your WebSocket connections, including connection count, message count, and error rate.
|
||||
|
||||
## Subscription Workers
|
||||
|
||||
Hasura Cloud provides [subscription workers](/observability/subscription-workers.mdx) that can be used to process
|
||||
Hasura Cloud provides [subscription workers](subscription-workers.mdx) that can be used to process
|
||||
subscriptions and deliver real-time updates to your clients. The subscription workers are fully managed and can be
|
||||
scaled up or down based on your application's needs.
|
||||
|
||||
## Distributed Tracing
|
||||
|
||||
Hasura Cloud provides [distributed tracing](/observability/tracing.mdx) capabilities, allowing you to trace requests
|
||||
Hasura Cloud provides [distributed tracing](/observability/cloud-monitoring/tracing.mdx) capabilities, allowing you to trace requests
|
||||
across multiple services and identify any performance bottlenecks. The tracing information can be used to optimize your
|
||||
application's performance and ensure that it is running smoothly.
|
||||
|
||||
## Query Tags
|
||||
|
||||
Hasura Cloud provides [query tags](/observability/query-tags.mdx), which can be used to tag your GraphQL queries and
|
||||
Hasura Cloud provides [query tags](query-tags.mdx), which can be used to tag your GraphQL queries and
|
||||
mutations with metadata. This metadata can be used to filter and group your usage summaries and metrics, making it easy
|
||||
to identify trends and patterns in your application's usage.
|
5
docs/docs/observability/cloud/_category_.json
Normal file
5
docs/docs/observability/cloud/_category_.json
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"label": "Hasura Cloud integrations",
|
||||
"position": 5,
|
||||
"className": "cloud-icon"
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
sidebar_position: 3
|
||||
sidebar_position: 6
|
||||
description: Azure monitor Integration on Hasura Cloud
|
||||
title: 'Cloud: Azure Monitor Integration'
|
||||
keywords:
|
||||
@ -17,7 +17,7 @@ import Thumbnail from '@site/src/components/Thumbnail';
|
||||
import HeadingIcon from '@site/src/components/HeadingIcon';
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
# Azure Monitor Integration
|
||||
# Azure Monitor Integration on Hasura Cloud
|
||||
|
||||
<ProductBadge standard pro ee />
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
sidebar_label: Datadog
|
||||
sidebar_position: 2
|
||||
sidebar_position: 4
|
||||
description: Datadog Integration on Hasura Cloud
|
||||
title: 'Cloud: Datadog Integration'
|
||||
keywords:
|
||||
@ -17,7 +17,7 @@ import Thumbnail from '@site/src/components/Thumbnail';
|
||||
import HeadingIcon from '@site/src/components/HeadingIcon';
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
# Datadog Integration
|
||||
# Datadog Integration on Hasura Cloud
|
||||
|
||||
<ProductBadge standard pro ee />
|
||||
|
@ -1,8 +1,7 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
slug: index
|
||||
title: 'Cloud: Integrations with external services'
|
||||
description: Configure integrations with Hasura Cloud
|
||||
title: 'Cloud: Observability integrations with external services'
|
||||
description: Configure observability integrations with Hasura Cloud
|
||||
keywords:
|
||||
- hasura
|
||||
- docs
|
||||
@ -13,12 +12,14 @@ keywords:
|
||||
- observability
|
||||
- monitoring
|
||||
- monitoring framework
|
||||
sidebar_class_name: cloud-icon
|
||||
slug: index
|
||||
---
|
||||
|
||||
import HeadingIcon from '@site/src/components/HeadingIcon';
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
# Integrations with External Services
|
||||
# Hasura Cloud observability integrations with external services
|
||||
|
||||
<ProductBadge standard pro ee />
|
||||
|
||||
@ -27,6 +28,17 @@ import ProductBadge from '@site/src/components/ProductBadge';
|
||||
To be able to effectively monitor, diagnose and troubleshoot your application stack in production, Hasura Cloud will
|
||||
export metrics, logs and traces to observability tools / APM vendors.
|
||||
|
||||
## Supported integrations
|
||||
|
||||
Check out the following guides on how to export telemetry data from Hasura Cloud to the observability tool of your
|
||||
choice:
|
||||
|
||||
- [Datadog](datadog.mdx)
|
||||
- [New Relic](newrelic.mdx)
|
||||
- [Azure monitor](azure-monitor.mdx)
|
||||
- [Prometheus](prometheus.mdx)
|
||||
- [OpenTelemetry](opentelemetry.mdx)
|
||||
|
||||
## Log types
|
||||
|
||||
Hasura Cloud combines various
|
||||
@ -128,7 +140,7 @@ Hasura Cloud APM integrations export the following metrics:
|
||||
## Traces
|
||||
|
||||
Hasura Cloud APM integrations export the same trace logs as Hasura GraphQL Engine. You can find more information about
|
||||
tracing [here](/observability/tracing.mdx).
|
||||
tracing [here](/observability/cloud-monitoring/tracing.mdx).
|
||||
|
||||
:::info Sampling
|
||||
|
||||
@ -213,14 +225,3 @@ Hasura samples all trace logs and exports only 5% to your configured APM provide
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Supported integrations
|
||||
|
||||
Check out the following guides on how to export telemetry data from Hasura Cloud to the observability tool of your
|
||||
choice:
|
||||
|
||||
- [Datadog](/observability/integrations/datadog.mdx)
|
||||
- [New Relic](/observability/integrations/newrelic.mdx)
|
||||
- [Azure monitor](/observability/integrations/azure-monitor.mdx)
|
||||
- [Prometheus](/observability/prometheus/cloud-integration.mdx)
|
||||
- [OpenTelemetry](/observability/integrations/opentelemetry.mdx)
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
sidebar_position: 2
|
||||
sidebar_position: 5
|
||||
description: New Relic Integration on Hasura Cloud
|
||||
title: 'Cloud: New Relic Integration'
|
||||
keywords:
|
||||
@ -18,7 +18,7 @@ import Thumbnail from '@site/src/components/Thumbnail';
|
||||
import HeadingIcon from '@site/src/components/HeadingIcon';
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
# New Relic Integration
|
||||
# New Relic Integration on Hasura Cloud
|
||||
|
||||
<ProductBadge standard pro ee />
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
sidebar_label: OpenTelemetry
|
||||
sidebar_position: 5
|
||||
sidebar_position: 3
|
||||
description: OpenTelemetry Integration on Hasura Cloud
|
||||
title: 'Cloud: OpenTelemetry Integration'
|
||||
keywords:
|
||||
@ -18,7 +18,7 @@ import Thumbnail from '@site/src/components/Thumbnail';
|
||||
import HeadingIcon from '@site/src/components/HeadingIcon';
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
# OpenTelemetry Integration
|
||||
# Export Traces to OpenTelemetry Compliant Receiver from Hasura Cloud
|
||||
|
||||
<ProductBadge standard pro ee />
|
||||
|
||||
@ -29,7 +29,7 @@ receiver. This can be configured on the Integrations tab on the project's settin
|
||||
|
||||
:::info Note
|
||||
|
||||
For Hasura Cloud projects, the OpenTelemetry Integration is only available on the `Standard` (pay-as-you-go) tier and
|
||||
For Hasura Cloud projects, the OpenTelemetry Integration is only available on the `Professional` tier and
|
||||
above.
|
||||
|
||||
:::
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
sidebar_label: Integrate with Hasura Cloud
|
||||
sidebar_position: 3
|
||||
sidebar_label: Prometheus
|
||||
sidebar_position: 2
|
||||
description: Prometheus Integration on Hasura Cloud
|
||||
title: 'Cloud: Prometheus Integration'
|
||||
keywords:
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
title: 'Best Practices: Database observability'
|
||||
title: 'Database observability'
|
||||
description: Database observability best practices
|
||||
keywords:
|
||||
- hasura
|
||||
@ -8,6 +8,7 @@ keywords:
|
||||
- observability
|
||||
- database observability
|
||||
sidebar_label: Database observability
|
||||
sidebar_position: 6
|
||||
---
|
||||
|
||||
# Database Observability
|
||||
@ -45,8 +46,8 @@ tables.
|
||||
In this example, we will use Postgres and Datadog:
|
||||
|
||||
Log into a `psql` session as a user who has
|
||||
[CREATEROLE privileges](https://www.postgresql.org/docs/current/static/app-createuser.html), create a `datadog`user and
|
||||
password, and grant it read access to `pg_stat_database.`
|
||||
[CREATEROLE privileges](https://www.postgresql.org/docs/current/static/app-createuser.html), create a `datadog`user and
|
||||
password, and grant it read access to `pg_stat_database.`
|
||||
|
||||
```
|
||||
create user datadog with password '<PASSWORD>';
|
||||
@ -134,7 +135,7 @@ import Thumbnail from '@site/src/components/Thumbnail';
|
||||
/>
|
||||
|
||||
- [Sign up](https://www.datadoghq.com) for Datadog.
|
||||
- [Enable Datadog integration for your Hasura Project](/observability/integrations/datadog.mdx).
|
||||
- [Enable Datadog integration for your Hasura Project](/observability/cloud/datadog.mdx).
|
||||
- Set up Tags.
|
||||
- [Enable Hasura Integration for Datadog](https://app.datadoghq.com/integrations).
|
||||
- Create a New Dashboard and import the JSON file by following the instructions
|
@ -0,0 +1,5 @@
|
||||
{
|
||||
"label": "Hasura EE integrations",
|
||||
"position": 5,
|
||||
"className": "enterprise-icon"
|
||||
}
|
38
docs/docs/observability/enterprise-edition/index.mdx
Normal file
38
docs/docs/observability/enterprise-edition/index.mdx
Normal file
@ -0,0 +1,38 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
title: 'EE: Observability integrations with external services'
|
||||
description: Configure observability integrations with Hasura EE
|
||||
keywords:
|
||||
- hasura
|
||||
- docs
|
||||
- cloud
|
||||
- integrations
|
||||
- exporter
|
||||
- integration
|
||||
- observability
|
||||
- monitoring
|
||||
- monitoring framework
|
||||
sidebar_class_name: ee-icon
|
||||
slug: index
|
||||
---
|
||||
|
||||
import HeadingIcon from '@site/src/components/HeadingIcon';
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
# Hasura EE observability integrations with external services
|
||||
|
||||
<ProductBadge standard pro ee />
|
||||
|
||||
## Overview
|
||||
|
||||
To be able to effectively monitor, diagnose and troubleshoot your application stack in production, Hasura Enterprise
|
||||
Edition will export metrics, logs and traces to observability tools / APM vendors.
|
||||
|
||||
## Supported integrations
|
||||
|
||||
Check out the following guides on how to export telemetry data from Hasura Cloud to the observability tool of your
|
||||
choice:
|
||||
|
||||
- [Prometheus](prometheus/index.mdx)
|
||||
- [OpenTelemetry](opentelemetry.mdx)
|
||||
|
@ -1,8 +1,8 @@
|
||||
---
|
||||
sidebar_label: Traces via OpenTelemetry
|
||||
sidebar_label: OpenTelemetry
|
||||
description: Traces via OpenTelemetry for Hasura Enterprise Edition
|
||||
title: 'Traces via OpenTelemetry'
|
||||
sidebar_class_name: beta-tag
|
||||
sidebar_class_name: beta-icon
|
||||
keywords:
|
||||
- hasura
|
||||
- docs
|
||||
@ -18,7 +18,7 @@ import TabItem from '@theme/TabItem';
|
||||
import Thumbnail from '@site/src/components/Thumbnail';
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
# Export Traces to OpenTelemetry Compliant Receiver
|
||||
# Export Traces to OpenTelemetry Compliant Receiver from Hasura EE
|
||||
|
||||
<div className="badge-container">
|
||||
<ProductBadge self />
|
||||
@ -27,7 +27,7 @@ import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
## Introduction
|
||||
|
||||
[Distributed traces](/observability/tracing.mdx) track and map journeys of user requests across various services or
|
||||
[Distributed traces](/observability/cloud-monitoring/tracing.mdx) track and map journeys of user requests across various services or
|
||||
components which can then be analyzed via observability tools.
|
||||
|
||||
Traces are typically used to diagnose or debug which part of your application could potentially be responsible for a
|
@ -0,0 +1,4 @@
|
||||
{
|
||||
"label": "Prometheus",
|
||||
"position": 2
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
slug: index
|
||||
title: 'EE: Integrations with Prometheus'
|
||||
description: Configure integrations with Hasura Enterprise Edition
|
||||
description: Prometheus Integration on Hasura EE
|
||||
keywords:
|
||||
- hasura
|
||||
- docs
|
||||
@ -19,20 +19,16 @@ keywords:
|
||||
import HeadingIcon from '@site/src/components/HeadingIcon';
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
# Integrate Prometheus with Hasura Enterprise Edition
|
||||
|
||||
<ProductBadge self pro ee />
|
||||
# Prometheus Integration with Hasura EE
|
||||
|
||||
## Overview
|
||||
|
||||
In this section, you'll find information on how to integrate [Prometheus](https://prometheus.io/) with Hasura Enterprise
|
||||
Edition:
|
||||
In this section, you'll find information on how to integrate [Prometheus](https://prometheus.io/) with Hasura
|
||||
Enterprise edition:
|
||||
|
||||
- [Available metrics](/observability/prometheus/metrics.mdx): Learn about metrics available to monitor the health,
|
||||
performance and reliability of the Hasura GraphQL Engine.
|
||||
- [Integrate with Hasura Cloud](/observability/prometheus/cloud-integration.mdx): Configure Prometheus integration with
|
||||
Hasura Enterprise Edition.
|
||||
- [Integrate Prometheus with Hasura EE and build a Grafana Dashboard](/observability/prometheus/grafana-dashboard.mdx):
|
||||
- [Integrate Prometheus with Hasura EE and build a Grafana Dashboard](integrate-prometheus-grafana.mdx):
|
||||
Configure Prometheus integration with Hasura Enterprise Edition.
|
||||
- [Pre-built dashboards](/observability/prometheus/pre-built-dashboards.mdx): Learn about pre-built dashboards available
|
||||
- [Pre-built dashboards](pre-built-dashboards.mdx): Learn about pre-built dashboards available
|
||||
for Hasura Enterprise Edition.
|
||||
- [Available metrics](metrics.mdx): Learn about metrics available to monitor the health,
|
||||
performance and reliability of the Hasura GraphQL Engine.
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
sidebar_position: 2
|
||||
sidebar_label: Integrate with Hasura EE
|
||||
title: 'Integrate Prometheus with Hasura EE'
|
||||
sidebar_label: Integrate and build a Grafana Dashboard
|
||||
title: 'Integrate Prometheus with Hasura EE and build a Grafana Dashboard'
|
||||
description: Install Prometheus server and Grafana to create a basic observability dashboard for Hasura.
|
||||
keywords:
|
||||
- hasura
|
||||
@ -15,6 +15,7 @@ keywords:
|
||||
- grafana
|
||||
- monitoring
|
||||
- monitoring framework
|
||||
toc_max_heading_level: 4
|
||||
---
|
||||
|
||||
import HeadingIcon from '@site/src/components/HeadingIcon';
|
||||
@ -35,11 +36,47 @@ approaches depending on your use case:
|
||||
- **Containerized**: If you are running Prometheus and Grafana in a containerized environment, follow the
|
||||
[containerized installation](#containerized-installation) instructions.
|
||||
|
||||
## Self-hosted installation
|
||||
## Step 1: Enable metrics endpoint
|
||||
|
||||
### Install and configure Prometheus
|
||||
By default, the Prometheus metrics endpoint is disabled. To enable Prometheus metrics, configure the environment variable
|
||||
below:
|
||||
|
||||
#### Step 1. Set up the environment
|
||||
```bash
|
||||
HASURA_GRAPHQL_ENABLED_APIS=metadata,graphql,config,metrics
|
||||
```
|
||||
|
||||
Secure the Prometheus metrics endpoint with a secret:
|
||||
|
||||
```bash
|
||||
HASURA_GRAPHQL_METRICS_SECRET=<secret>
|
||||
```
|
||||
|
||||
```bash
|
||||
curl 'http://127.0.0.1:8080/v1/metrics' -H 'Authorization: Bearer <secret>'
|
||||
```
|
||||
|
||||
:::info Configure a secret
|
||||
|
||||
The metrics endpoint should be configured with a secret to prevent misuse and should not be exposed over the internet.
|
||||
|
||||
:::
|
||||
|
||||
:::tip High-cardinality Labels
|
||||
|
||||
Starting in `v2.26.0`, Hasura GraphQL Engine exposes metrics with high-cardinality labels by default.
|
||||
|
||||
You can disable
|
||||
[the cardinality of labels for metrics](/deployment/graphql-engine-flags/reference.mdx#enable-high-cardinality-labels-for-metrics)
|
||||
if you are experiencing high memory usage, which can be due to a large number of labels in the metrics (typically more
|
||||
than 10000).
|
||||
|
||||
:::
|
||||
|
||||
## Option 1: Self-hosted installation
|
||||
|
||||
### Step 2: Install and configure Prometheus
|
||||
|
||||
#### Step 2.1: Set up the environment
|
||||
|
||||
You will need to create a Prometheus user and group, and a directory for Prometheus to store its data. You will also
|
||||
need to create a directory for Prometheus to store its configuration files.
|
||||
@ -54,7 +91,7 @@ sudo mkdir /var/lib/prometheus
|
||||
for i in rules rules.d files_sd; do sudo mkdir -p /etc/prometheus/${i}; done
|
||||
```
|
||||
|
||||
#### Step 2. Install Prometheus
|
||||
#### Step 2.2: Install Prometheus
|
||||
|
||||
The following set of commands will help you download and install Prometheus:
|
||||
|
||||
@ -75,7 +112,7 @@ You can check to see if Prometheus is installed correctly by running the followi
|
||||
prometheus --version
|
||||
```
|
||||
|
||||
#### Step 3. Connect Prometheus to Hasura
|
||||
#### Step 2.3: Connect Prometheus to Hasura
|
||||
|
||||
To connect Prometheus to Hasura, you will need to create a configuration file for Prometheus. The following commands
|
||||
will help you do this:
|
||||
@ -124,7 +161,7 @@ scrape_configs:
|
||||
- targets: ['hasura_deployment_url:8080']
|
||||
```
|
||||
|
||||
#### Step 4. Set firewall rules
|
||||
#### Step 2.4: Set firewall rules
|
||||
|
||||
If you are using a firewall, you will need to set the following rules:
|
||||
|
||||
@ -132,7 +169,7 @@ If you are using a firewall, you will need to set the following rules:
|
||||
sudo ufw allow 9090/tcp
|
||||
```
|
||||
|
||||
#### Step 5. Set up a password for Prometheus web access
|
||||
#### Step 2.5: Set up a password for Prometheus web access
|
||||
|
||||
To set up a password for Prometheus web access, you will need to create a hashed password. First, we'll create the YAML
|
||||
file which will store the password. Inside `/etc/prometheus/`, run the following:
|
||||
@ -177,7 +214,7 @@ To check yourself, use `promtool` to check the configuration file:
|
||||
promtool check web-config /etc/prometheus/web.yml
|
||||
```
|
||||
|
||||
#### Step 6. Restart Prometheus
|
||||
#### Step 2.6: Restart Prometheus
|
||||
|
||||
To restart Prometheus, run the following command:
|
||||
|
||||
@ -201,9 +238,9 @@ go_gc_duration_seconds{quantile="0.25"} 0
|
||||
# etc...
|
||||
```
|
||||
|
||||
### Install and configure Grafana
|
||||
### Step 3: Install and configure Grafana
|
||||
|
||||
#### Step 7. Install Grafana
|
||||
#### Step 3.1: Install Grafana
|
||||
|
||||
You can install Grafana by running the following commands:
|
||||
|
||||
@ -224,7 +261,7 @@ After logging in, you will be prompted to change the default password. Set your
|
||||
|
||||
:::
|
||||
|
||||
#### Step 8. Create a Prometheus data source
|
||||
#### Step 3.2: Create a Prometheus data source
|
||||
|
||||
In Grafana, from the settings icon on the sidebar, open the `Configuration` menu and select `Data Sources`. Then, click
|
||||
on `Add data source` and select `Prometheus` as the type.
|
||||
@ -238,7 +275,7 @@ everything is working correctly, you should see a green `Data source is working`
|
||||
width="1000px"
|
||||
/>
|
||||
|
||||
#### Step 9. Create a Prometheus graph
|
||||
#### Step 3.3: Create a Prometheus graph
|
||||
|
||||
Click the graph title and select `Edit`. Then, select the `Metrics` tab and select your Prometheus data source. Then,
|
||||
enter any Prometheus expression ino the `Query` field while using the `Metric` field to lookup metrics via autocomplete.
|
||||
@ -263,11 +300,11 @@ status labels of a returned query result, separated by a dash, you could use the
|
||||
|
||||
:::
|
||||
|
||||
## Containerized installation
|
||||
## Option 2: Containerized installation
|
||||
|
||||
### Install and configure Prometheus and Grafana
|
||||
### Step 2: Install and configure Prometheus and Grafana
|
||||
|
||||
#### Step 1. Prepare the Prometheus configuration file
|
||||
#### Step 2.1: Prepare the Prometheus configuration file
|
||||
|
||||
Create a file named `prometheus.yml` on your host with the following information:
|
||||
|
||||
@ -302,7 +339,7 @@ scrape_configs:
|
||||
- targets: ['ip_address_of_hasura_installation:8080']
|
||||
```
|
||||
|
||||
#### Step 2. Pull the Prometheus and Grafana Docker containers
|
||||
#### Step 2.2: Pull the Prometheus and Grafana Docker containers
|
||||
|
||||
For Prometheus, run the following command:
|
||||
|
||||
@ -316,9 +353,9 @@ Then, for Grafana, run the following:
|
||||
docker run -d -p 3000:3000 grafana/grafana-enterprise
|
||||
```
|
||||
|
||||
### Configure Grafana
|
||||
### Step 3: Configure Grafana
|
||||
|
||||
#### Step 3. Adding a Prometheus as a data source in Grafana
|
||||
#### Step 3.1: Adding a Prometheus as a data source in Grafana
|
||||
|
||||
In Grafana, from the settings icon on the sidebar, open the `Configuration` menu and select `Data Sources`. Then, click
|
||||
on `Add data source` and select `Prometheus` as the type.
|
||||
@ -332,7 +369,7 @@ everything is working correctly, you should see a green `Alerting supported` mes
|
||||
width="1000px"
|
||||
/>
|
||||
|
||||
#### Step 5. Add Hasura metrics to the dashboard
|
||||
#### Step 3.2: Add Hasura metrics to the dashboard
|
||||
|
||||
Click on the `Add Panel` icon in the top-right corner of the Grafana dashboard. Then, select `Add New Panel` or
|
||||
`Add New Row`.
|
@ -0,0 +1,412 @@
|
||||
---
|
||||
sidebar_label: Available Metrics
|
||||
description: Metrics via Prometheus for Hasura Enterprise Edition
|
||||
title: 'Enterprise Edition: Metrics via Prometheus'
|
||||
keywords:
|
||||
- hasura
|
||||
- docs
|
||||
- enterprise
|
||||
sidebar_position: 4
|
||||
toc_max_heading_level: 4
|
||||
---
|
||||
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
# Metrics exported via Prometheus
|
||||
|
||||
<ProductBadge self />
|
||||
|
||||
## Metrics exported
|
||||
|
||||
The following metrics are exported by Hasura GraphQL Engine:
|
||||
|
||||
### GraphQL request metrics
|
||||
|
||||
#### Hasura GraphQL execution time seconds
|
||||
|
||||
Execution time of successful GraphQL requests (excluding subscriptions). If more requests are falling in the higher
|
||||
buckets, you should consider [tuning the performance](/deployment/performance-tuning.mdx).
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------ |
|
||||
| Name | `hasura_graphql_execution_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
|
||||
| Labels | `operation_type`: query \| mutation |
|
||||
|
||||
:::info GraphQL request execution time
|
||||
|
||||
- Uses wall-clock time, so it includes time spent waiting on I/O.
|
||||
- Includes authorization, parsing, validation, planning, and execution (calls to databases, Remote Schemas).
|
||||
|
||||
:::
|
||||
|
||||
#### Hasura GraphQL requests total
|
||||
|
||||
Number of GraphQL requests received, representing the GraphQL query/mutation traffic on the server.
|
||||
|
||||
| | |
|
||||
| ------ | -------------------------------------------------------------- |
|
||||
| Name | `hasura_graphql_requests_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `operation_type`: query \| mutation \| subscription \| unknown |
|
||||
|
||||
The `unknown` operation type will be returned for queries that fail authorization, parsing, or certain validations. The
|
||||
`response_status` label will be `success` for successful requests and `failed` for failed requests.
|
||||
|
||||
|
||||
### Hasura Event Triggers metrics
|
||||
|
||||
See more details on Event trigger observability [here](/event-triggers/observability-and-performance.mdx).
|
||||
|
||||
#### Event fetch time per batch
|
||||
|
||||
Hasura fetches the events in batches (by default 100) from the Hasura Event tables in the database. This metric
|
||||
represents the time taken to fetch a batch of events from the database.
|
||||
|
||||
A higher metric indicates slower polling of events from the database, you should consider looking into the performance
|
||||
of your database.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------------------------------------ |
|
||||
| Name | `hasura_event_fetch_time_per_batch_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
|
||||
| Labels | none |
|
||||
|
||||
#### Event invocations total
|
||||
|
||||
This metric represents the number of HTTP requests that have been made to the webhook server for delivering events.
|
||||
|
||||
| | |
|
||||
| ------ | ---------------------------------------------------------- |
|
||||
| Name | `hasura_event_invocations_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed, `trigger_name`, `source_name` |
|
||||
|
||||
#### Event processed total
|
||||
|
||||
Total number of events processed. Represents the Event Trigger egress.
|
||||
|
||||
| | |
|
||||
| ------ | ---------------------------------------------------------- |
|
||||
| Name | `hasura_event_processed_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed, `trigger_name`, `source_name` |
|
||||
|
||||
#### Event processing time
|
||||
|
||||
Time taken for an event to be processed.
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------------------------------------------- |
|
||||
| Name | `hasura_event_processing_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100 |
|
||||
| Labels | `trigger_name`, `source_name` |
|
||||
|
||||
The processing of an event involves the following steps:
|
||||
|
||||
1. Hasura Engine fetching the event from Hasura Event tables in the database and adding it to the Hasura Events queue
|
||||
2. An HTTP worker picking up the event from the Hasura Events queue
|
||||
3. An HTTP worker delivering the event to the webhook
|
||||
|
||||
:::info Event delivery failure
|
||||
|
||||
Note, if the delivery of the event fails - the delivery of the event is retried based on its `next_retry_at`
|
||||
configuration.
|
||||
|
||||
:::
|
||||
|
||||
This metric represents the time taken for an event to be delivered since it was created (if the first attempt) or retried
|
||||
(after the first attempt). **This metric can be considered as the end-to-end processing time for an event.**
|
||||
|
||||
For e.g., say an event was created at `2021-01-01 10:00:30` and it has a `next_retry_at` configuration which says if the
|
||||
event delivery fails, the event should be retried after 30 seconds.
|
||||
|
||||
At `2021-01-01 10:01:30`: the event was fetched from the Hasura Event tables, picked up by the HTTP worker, and the
|
||||
delivery was attempted. The delivery failed and the `next_retry_at` of `2021-01-01 10:02:00` was set for the event.
|
||||
|
||||
Now at `2021-01-01 10:02:00`: the event was fetched again from the Hasura Event tables, picked up by the HTTP worker,
|
||||
and the delivery was attempted at `2021-01-01 10:03: 30`. This time, the delivery was successful.
|
||||
|
||||
The processing time for the second delivery try would be:
|
||||
|
||||
Processing Time = event delivery time - event next retried time
|
||||
|
||||
Processing Time = `2021-01-01 10:03:30` - `2021-01-01 10:02:00` = `90 seconds`
|
||||
|
||||
#### Event queue time
|
||||
|
||||
Hasura fetches the events from the Hasura Event tables in the database and adds it to the Hasura Events queue. The event
|
||||
queue time represents the time taken for an event to be picked up by the HTTP worker after it has been added to the
|
||||
"Events Queue".
|
||||
|
||||
Higher value of this metric implies slow event processing. In this case, you can consider increasing the
|
||||
[HTTP pool size](/deployment/graphql-engine-flags/reference.mdx/#events-http-pool-size) or optimizing the webhook
|
||||
server.
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------------------------------------------- |
|
||||
| Name | `hasura_event_queue_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100 |
|
||||
| Labels | `trigger_name`, `source_name` |
|
||||
|
||||
#### Event Triggers HTTP Workers
|
||||
|
||||
Current number of active Event Trigger HTTP workers. Compare this number to the
|
||||
[HTTP pool size](/deployment/graphql-engine-flags/reference.mdx/#events-http-pool-size). Consider increasing it if the
|
||||
metric is near the current configured value.
|
||||
|
||||
| | |
|
||||
| ------ | ----------------------------------- |
|
||||
| Name | `hasura_event_trigger_http_workers` |
|
||||
| Type | Gauge |
|
||||
| Labels | none |
|
||||
|
||||
#### Event webhook processing time
|
||||
|
||||
The time between when an HTTP worker picks an event for delivery to the time it sends the event payload to the webhook.
|
||||
|
||||
A higher processing time indicates slow webhook, you should try to optimize the event webhook.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------ |
|
||||
| Name | `hasura_event_webhook_processing_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
|
||||
| Labels | `trigger_name`, `source_name` |
|
||||
|
||||
#### Events fetched per batch
|
||||
|
||||
Number of events fetched from the Hasura Event tables in the database per batch. This number should be equal or less
|
||||
than the [events fetch batch size](/deployment/graphql-engine-flags/reference.mdx/#events-fetch-batch-size).
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------- |
|
||||
| Name | `hasura_events_fetched_per_batch` |
|
||||
| Type | Gauge |
|
||||
| Labels | none |
|
||||
|
||||
Since polling the database to continuously check if there are any pending events is an expensive operation, Hasura only
|
||||
polls the database if there are any pending events. This metric can be used to understand if there are any pending
|
||||
events in the Hasura Event Tables.
|
||||
|
||||
:::info Dependent on pending events
|
||||
|
||||
Note that Hasura only fetches events from the Hasura Event tables if there are any pending events. If there are no
|
||||
pending events, this metric will be 0.
|
||||
|
||||
:::
|
||||
|
||||
### Subscription metrics
|
||||
|
||||
See more details on subscriptions observability [here](/subscriptions/observability-and-performance.mdx).
|
||||
|
||||
#### Active Subscriptions
|
||||
|
||||
Current number of active subscriptions, representing the subscription load on the server.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------------------------------------ |
|
||||
| Name | `hasura_active_subscriptions` |
|
||||
| Type | Gauge |
|
||||
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
|
||||
|
||||
#### Active Subscription Pollers
|
||||
|
||||
Current number of active subscription pollers. A subscription poller
|
||||
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
|
||||
similar subscriptions together. The value of this metric should be proportional to the number of uniquely parameterized
|
||||
subscriptions (i.e., subscriptions with the same selection set, but with different input arguments and session variables
|
||||
are multiplexed on the same poller). If this metric is high then it may be an indication that there are too many
|
||||
uniquely parameterized subscriptions which could be optimized for better performance.
|
||||
|
||||
| | |
|
||||
| ------ | -------------------------------------------- |
|
||||
| Name | `hasura_active_subscription_pollers` |
|
||||
| Type | Gauge |
|
||||
| Labels | `subscription_kind`: streaming \| live-query |
|
||||
|
||||
#### Active Subscription Pollers in Error State
|
||||
|
||||
Current number of active subscription pollers that are in the error state. A subscription poller
|
||||
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
|
||||
similar subscriptions together. A non-zero value of this metric indicates that there are runtime errors in atleast one
|
||||
of the subscription pollers that are running in Hasura. In most of the cases, runtime errors in subscriptions are caused
|
||||
due to the changes at the data model layer and fixing the issue at the data model layer should automatically fix the
|
||||
runtime errors.
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------------------------- |
|
||||
| Name | `hasura_active_subscription_pollers_in_error_state` |
|
||||
| Type | Gauge |
|
||||
| Labels | `subscription_kind`: streaming \| live-query |
|
||||
|
||||
#### Subscription Total Time
|
||||
|
||||
The time taken to complete running of one subscription poller.
|
||||
|
||||
A subscription poller
|
||||
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
|
||||
similar subscriptions together. This subscription poller runs every 1 second by default and queries the database with
|
||||
the multiplexed query to fetch the latest data. In a polling instance, the poller not only queries the database but does
|
||||
other operations like splitting similar queries into batches (by default 100) before fetching their data from the
|
||||
database, etc. **This metric is the total time taken to complete all the operations in a single poll.**
|
||||
|
||||
In a single poll, the subscription poller splits the different variables for the multiplexed query into batches (by
|
||||
default 100) and executes the batches. We use the `hasura_subscription_db_execution_time_seconds` metric to observe the
|
||||
time taken for each batch to execute on the database. This means for a single poll there can be multiple values for
|
||||
`hasura_subscription_db_execution_time_seconds` metric.
|
||||
|
||||
Let's look at an example to understand these metrics better:
|
||||
|
||||
Say we have 650 subscriptions with the same selection set but different input arguments. These 650 subscriptions will be
|
||||
grouped to form one multiplexed query. A single poller would be created to run this multiplexed query. This poller will
|
||||
run every 1 second.
|
||||
|
||||
The default batch size in hasura is 100, so the 650 subscriptions will be split into 7 batches for execution during a
|
||||
single poll.
|
||||
|
||||
During a single poll:
|
||||
|
||||
- Batch 1: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
|
||||
- Batch 2: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
|
||||
- Batch 3: `hasura_subscription_db_execution_time_seconds` = 0.003 seconds
|
||||
- Batch 4: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
|
||||
- Batch 5: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
|
||||
- Batch 6: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
|
||||
- Batch 7: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
|
||||
|
||||
The `hasura_subscription_total_time_seconds` would be sum of all the database execution times shown in the batches, plus
|
||||
some extra process time for other tasks the poller does during a single poll. In this case, it would be approximately
|
||||
0.013 seconds.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------------------------------------ |
|
||||
| Name | `hasura_subscription_total_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.000001, 0.0001, 0.01, 0.1, 0.3, 1, 3, 10, 30, 100 |
|
||||
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
|
||||
|
||||
#### Subscription Database Execution Time
|
||||
|
||||
The time taken to run the subscription's multiplexed query in the database for a single batch.
|
||||
|
||||
A subscription poller
|
||||
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
|
||||
similar subscriptions together. During every run (every 1 second by default), the poller splits the different variables
|
||||
for the multiplexed query into batches (by default 100) and execute the batches. This metric observes the time taken for
|
||||
each batch to execute on the database.
|
||||
|
||||
If this metric is high, then it may be an indication that the database is not performing as expected, you should
|
||||
consider investigating the subscription query and see if indexes can help improve performance.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------------------------------------ |
|
||||
| Name | `hasura_subscription_db_execution_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.000001, 0.0001, 0.01, 0.1, 0.3, 1, 3, 10, 30, 100 |
|
||||
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
|
||||
|
||||
### Cache metrics
|
||||
|
||||
See more details on caching metrics [here](/caching/caching-metrics.mdx)
|
||||
|
||||
#### Hasura cache request count
|
||||
|
||||
Tracks cache hit and miss requests, which helps in monitoring and optimizing cache utilization.
|
||||
|
||||
| | |
|
||||
| ------ | ---------------------------- |
|
||||
| Name | `hasura_cache_request_count` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: hit \| miss |
|
||||
|
||||
### Cron trigger metrics
|
||||
|
||||
#### Hasura cron events invocation total
|
||||
|
||||
Total number of cron events invoked, representing the number of invocations made for cron events.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------- |
|
||||
| Name | `hasura_cron_events_invocation_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed |
|
||||
|
||||
#### Hasura cron events processed total
|
||||
|
||||
Total number of cron events processed, representing the number of invocations made for cron events. Compare this to
|
||||
`hasura_cron_events_invocation_total`. A high difference between the two metrics indicates high failure rate of the cron
|
||||
webhook.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------ |
|
||||
| Name | `hasura_cron_events_processed_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed |
|
||||
|
||||
|
||||
### One-off Scheduled events metrics
|
||||
|
||||
#### Hasura one-off events invocation total
|
||||
|
||||
Total number of one-off events invoked, representing the number of invocations made for one-off events.
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------------- |
|
||||
| Name | `hasura_oneoff_events_invocation_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed |
|
||||
|
||||
#### Hasura one-off events processed total
|
||||
|
||||
Total number of one-off events processed, representing the number of invocations made for one-off events. Compare this
|
||||
to `hasura_oneoff_events_invocation_total`. A high difference between the two metrics indicates high failure rate of the
|
||||
one-off webhook.
|
||||
|
||||
| | |
|
||||
| ------ | -------------------------------------- |
|
||||
| Name | `hasura_oneoff_events_processed_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed |
|
||||
|
||||
|
||||
### Hasura HTTP connections
|
||||
|
||||
Current number of active HTTP connections (excluding WebSocket connections), representing the HTTP load on the server.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------- |
|
||||
| Name | `hasura_http_connections` |
|
||||
| Type | Gauge |
|
||||
| Labels | none |
|
||||
|
||||
### Hasura WebSocket connections
|
||||
|
||||
Current number of active WebSocket connections, representing the WebSocket load on the server.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------ |
|
||||
| Name | `hasura_websocket_connections` |
|
||||
| Type | Gauge |
|
||||
| Labels | none |
|
||||
|
||||
|
||||
### Hasura Postgres connections
|
||||
|
||||
Current number of active PostgreSQL connections. Compare this to
|
||||
[pool settings](/api-reference/syntax-defs.mdx/#pgpoolsettings).
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Name | `hasura_postgres_connections` |
|
||||
| Type | Gauge |
|
||||
| Labels | `source_name`: name of the database<br />`conn_info`: connection url string (password omitted) or name of the connection url environment variable<br />`role`: primary \| replica |
|
||||
|
||||
### Hasura source health
|
||||
|
||||
Health check status of a particular data source, corresponding to the output of `/healthz/sources`, with possible values
|
||||
0 through 3 indicating, respectively: OK, TIMEOUT, FAILED, ERROR. See the
|
||||
[Source Health Check API Reference](/api-reference/source-health.mdx) for details.
|
||||
|
||||
| | |
|
||||
| ------ | ----------------------------------- |
|
||||
| Name | `hasura_source_health` |
|
||||
| Type | Gauge |
|
||||
| Labels | `source_name`: name of the database |
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
sidebar_position: 10
|
||||
sidebar_position: 3
|
||||
sidebar_label: Pre-built Dashboards
|
||||
title: 'Pre-built Dashboards'
|
||||
description: Pre-built observability dashboards and alerting rules using Prometheus, Grafana, Alert Manager, Jeager
|
@ -1,5 +0,0 @@
|
||||
{
|
||||
"label": "Integrations",
|
||||
"position": 3,
|
||||
"className": "cloud-icon"
|
||||
}
|
@ -1,11 +1,11 @@
|
||||
---
|
||||
title: 'Best Practices: Observability'
|
||||
description: Observability best practices
|
||||
keywords:
|
||||
- hasura
|
||||
- docs
|
||||
- best practices
|
||||
sidebar_label: Observability
|
||||
sidebar_label: Best Practices
|
||||
sidebar_position: 2
|
||||
---
|
||||
|
||||
# Observability Best Practices
|
||||
@ -52,20 +52,20 @@ Hasura Cloud projects include dashboards for observability. You will find your m
|
||||
The following default observability options are enabled on your Hasura Cloud project:
|
||||
|
||||
- [Stats Overview](/observability/overview.mdx)
|
||||
- [Errors](/observability/errors.mdx)
|
||||
- [Usage Summaries](/observability/usage.mdx)
|
||||
- [Operations](/observability/operations.mdx)
|
||||
- [Websockets](/observability/websockets.mdx)
|
||||
- [Subscription Workers](/observability/subscription-workers.mdx)
|
||||
- [Distributed Tracing](/observability/tracing.mdx)
|
||||
- [Query Tags](/observability/query-tags.mdx)
|
||||
- [Errors](/observability/cloud-monitoring/errors.mdx)
|
||||
- [Usage Summaries](/observability/cloud-monitoring/usage.mdx)
|
||||
- [Operations](/observability/cloud-monitoring/operations.mdx)
|
||||
- [Websockets](/observability/cloud-monitoring/websockets.mdx)
|
||||
- [Subscription Workers](/observability/cloud-monitoring/subscription-workers.mdx)
|
||||
- [Distributed Tracing](/observability/cloud-monitoring/tracing.mdx)
|
||||
- [Query Tags](/observability/cloud-monitoring/query-tags.mdx)
|
||||
|
||||
#### Third-party observability platforms
|
||||
|
||||
If your organization has multiple applications and systems that need to be monitored, the most efficient way to do so is
|
||||
via an observability platform. Hasura provides first-party integrations with multiple observability platforms and is
|
||||
fully open-telemetry compliant. You can find a list of third-party observability platforms supported by Hasura
|
||||
[here](/observability/integrations/index.mdx).
|
||||
[here](/observability/cloud/index.mdx).
|
||||
|
||||
### Hasura Enterprise (self-hosted)
|
||||
|
||||
@ -84,7 +84,7 @@ to be exported to your observability platform using the appropriate log drivers.
|
||||
|
||||
You can export metrics of your Hasura Cloud project to Prometheus. You can configure this on the `Integrations` tab on
|
||||
the project's settings page. You can find more information on this
|
||||
[here](/observability/prometheus/cloud-integration.mdx).
|
||||
[here](/observability/cloud/prometheus.mdx).
|
||||
|
||||
## Database observability
|
||||
|
||||
@ -105,12 +105,12 @@ be implemented:
|
||||
- Memory
|
||||
- Query Tags
|
||||
|
||||
[Query Tags](/observability/query-tags.mdx) are SQL comments that consist of `key=value` pairs that are appended to
|
||||
[Query Tags](/observability/cloud-monitoring/query-tags.mdx) are SQL comments that consist of `key=value` pairs that are appended to
|
||||
generated SQL statements. When you issue a query or mutation with query tags, the generated SQL has some extra
|
||||
information. Database analytics tools can use that information (metadata) in these comments to analyze DB load and track
|
||||
or monitor performance.
|
||||
information. Database analytics tools can use that information (metadata) in these comments to analyze DB load
|
||||
and track or monitor performance.
|
||||
|
||||
### Using Query Tags and pganalyze
|
||||
### Using Query Tags and **pganalyze**
|
||||
|
||||
- Refer to documentation from [pganalyze](https://pganalyze.com/docs) for information on how to connect your database to
|
||||
the analyzer.
|
@ -38,18 +38,18 @@ import Observability from '@site/static/icons/features/observability.svg';
|
||||
For our Enterprise customers, we have a set of pre-built dashboards and alerting rules configured with thew
|
||||
Prometheus Grafana Jaeger stack, with which you can monitor and debug Hasura. These dashboards will be available
|
||||
soon and integrated with Hasura Cloud too. You can read more and explore these dashboards
|
||||
<VersionedLink to="/observability/prometheus/pre-built-dashboards/">here</VersionedLink>.
|
||||
<VersionedLink to="/observability/enterprise-edition/prometheus/pre-built-dashboards/">here</VersionedLink>.
|
||||
</p>
|
||||
<h4>Quick Links</h4>
|
||||
<ul>
|
||||
<li>
|
||||
<VersionedLink to="/observability/integrations/index">Connect an integration.</VersionedLink>
|
||||
<VersionedLink to="/observability/cloud-monitoring/index/">Hasura Cloud Built-in Monitoring</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/observability/how-it-works">Learn how Observability works.</VersionedLink>
|
||||
<VersionedLink to="/observability/enterprise-edition/prometheus/pre-built-dashboards">Pre-built dashboards with Hasura EE</VersionedLink>
|
||||
</li>
|
||||
<li>
|
||||
<VersionedLink to="/observability/prometheus/pre-built-dashboards">Pre-built dashboards.</VersionedLink>
|
||||
<VersionedLink to="/observability/db-observability/">Database observability</VersionedLink>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
@ -64,22 +64,22 @@ import Observability from '@site/static/icons/features/observability.svg';
|
||||
## Using Observability
|
||||
|
||||
<div className="overview-gallery">
|
||||
<VersionedLink to="/observability/integrations/opentelemetry/">
|
||||
<VersionedLink to="/observability/cloud/opentelemetry/">
|
||||
<div className="card">
|
||||
<h3>OpenTelemetry</h3>
|
||||
<p>Connect your Hasura GraphQL API to OpenTelemetry-compliant services.</p>
|
||||
<p>Connect your Hasura Cloud project to OpenTelemetry-compliant services.</p>
|
||||
</div>
|
||||
</VersionedLink>
|
||||
<VersionedLink to="/observability/prometheus/cloud-integration/">
|
||||
<VersionedLink to="/observability/cloud/prometheus/">
|
||||
<div className="card">
|
||||
<h3>Prometheus</h3>
|
||||
<p>Connect your Hasura GraphQL API to Prometheus.</p>
|
||||
<p>Connect your Hasura Cloud project to Prometheus.</p>
|
||||
</div>
|
||||
</VersionedLink>
|
||||
<VersionedLink to="/observability/integrations/datadog">
|
||||
<VersionedLink to="/observability/cloud/datadog">
|
||||
<div className="card">
|
||||
<h3>Datadog</h3>
|
||||
<p>Connect your Hasura GraphQL API to Datadog.</p>
|
||||
<p>Connect your Hasura Cloud project to Datadog.</p>
|
||||
</div>
|
||||
</VersionedLink>
|
||||
</div>
|
||||
|
@ -1,5 +0,0 @@
|
||||
{
|
||||
"label": "Prometheus",
|
||||
"position": 2,
|
||||
"className": "cloud-and-enterprise-icon"
|
||||
}
|
@ -1,208 +0,0 @@
|
||||
---
|
||||
sidebar_label: Available Metrics
|
||||
description: Metrics via Prometheus for Hasura Enterprise Edition
|
||||
title: 'Enterprise Edition: Metrics via Prometheus'
|
||||
keywords:
|
||||
- hasura
|
||||
- docs
|
||||
- enterprise
|
||||
sidebar_position: 1
|
||||
---
|
||||
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
||||
# Metrics via Prometheus
|
||||
|
||||
<ProductBadge self />
|
||||
|
||||
## Enable metrics endpoint
|
||||
|
||||
By default the Prometheus metrics endpoint is disabled. To enable Prometheus metrics, configure the environment variable
|
||||
below:
|
||||
|
||||
```bash
|
||||
HASURA_GRAPHQL_ENABLED_APIS=metadata,graphql,config,metrics
|
||||
```
|
||||
|
||||
Secure the Prometheus metrics endpoint with a secret:
|
||||
|
||||
```bash
|
||||
HASURA_GRAPHQL_METRICS_SECRET=<secret>
|
||||
```
|
||||
|
||||
```bash
|
||||
curl 'http://127.0.0.1:8080/v1/metrics' -H 'Authorization: Bearer <secret>'
|
||||
```
|
||||
|
||||
:::info Configure a secret
|
||||
|
||||
The metrics endpoint should be configured with a secret to prevent misuse and should not be exposed over the internet.
|
||||
|
||||
:::
|
||||
|
||||
:::tip High-cardinality Labels
|
||||
|
||||
Starting in `v2.26.0`, Hasura GraphQL Engine exposes metrics with high-cardinality labels by default.
|
||||
|
||||
You can disable
|
||||
[the cardinality of labels for metrics](/deployment/graphql-engine-flags/reference.mdx#enable-high-cardinality-labels-for-metrics)
|
||||
if you are experiencing high memory usage, which can be due to a large number of labels in the metrics (typically more
|
||||
than 10000).
|
||||
|
||||
:::
|
||||
|
||||
## Metrics exported
|
||||
|
||||
The following metrics are exported by Hasura GraphQL Engine:
|
||||
|
||||
### Hasura Event Triggers Metrics
|
||||
|
||||
The following metrics can be used to monitor the performance of Hasura Event Triggers system:
|
||||
|
||||
- [`hasura_event_fetch_time_per_batch_seconds`](/event-triggers/observability-and-performace.mdx/#event-fetch-time-per-batch)
|
||||
- [`hasura_event_invocations_total`](/event-triggers/observability-and-performace.mdx/#event-invocations-total)
|
||||
- [`hasura_event_processed_total`](/event-triggers/observability-and-performace.mdx/#event-processed-total)
|
||||
- [`hasura_event_processing_time_seconds`](/event-triggers/observability-and-performace.mdx/#event-processing-time)
|
||||
- [`hasura_event_queue_time_seconds`](/event-triggers/observability-and-performace.mdx/#event-queue-time)
|
||||
- [`hasura_event_trigger_http_workers`](/event-triggers/observability-and-performace.mdx/#event-triggers-http-workers)
|
||||
- [`hasura_event_webhook_processing_time_seconds`](/event-triggers/observability-and-performace.mdx/#event-webhook-processing-time)
|
||||
- [`hasura_events_fetched_per_batch`](/event-triggers/observability-and-performace.mdx/#events-fetched-per-batch)
|
||||
|
||||
### Subscription Metrics
|
||||
|
||||
The following metrics can be used to monitor the performance of subscriptions:
|
||||
|
||||
- [`hasura_active_subscriptions`](/subscriptions/observability-and-performance.mdx#active-subscriptions)
|
||||
- [`hasura_active_subscription_pollers`](/subscriptions/observability-and-performance.mdx#active-subscription-pollers)
|
||||
- [`hasura_active_subscription_pollers_in_error_state`](/subscriptions/observability-and-performance.mdx#active-subscription-pollers-in-error-state)
|
||||
- [`hasura_subscription_db_execution_time_seconds`](/subscriptions/observability-and-performance.mdx#subscription-database-execution-time)
|
||||
- [`hasura_subscription_total_time_seconds`](/subscriptions/observability-and-performance.mdx#subscription-total-time)
|
||||
|
||||
### Hasura cache request count
|
||||
|
||||
Tracks cache hit and miss requests, which helps in monitoring and optimizing cache utilization. You can read more about
|
||||
this [here](/caching/caching-metrics.mdx).
|
||||
|
||||
| | |
|
||||
| ------ | ---------------------------- |
|
||||
| Name | `hasura_cache_request_count` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: hit \| miss |
|
||||
|
||||
### Hasura cron events invocation total
|
||||
|
||||
Total number of cron events invoked, representing the number of invocations made for cron events.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------- |
|
||||
| Name | `hasura_cron_events_invocation_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed |
|
||||
|
||||
### Hasura cron events processed total
|
||||
|
||||
Total number of cron events processed, representing the number of invocations made for cron events. Compare this to
|
||||
`hasura_cron_events_invocation_total`. A high difference between the two metrics indicates high failure rate of the cron
|
||||
webhook.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------ |
|
||||
| Name | `hasura_cron_events_processed_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed |
|
||||
|
||||
### Hasura GraphQL execution time seconds
|
||||
|
||||
Execution time of successful GraphQL requests (excluding subscriptions). If more requests are falling in the higher
|
||||
buckets, you should consider [tuning the performance](/deployment/performance-tuning.mdx).
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------ |
|
||||
| Name | `hasura_graphql_execution_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
|
||||
| Labels | `operation_type`: query \| mutation |
|
||||
|
||||
### Hasura GraphQL requests total
|
||||
|
||||
Number of GraphQL requests received, representing the GraphQL query/mutation traffic on the server.
|
||||
|
||||
| | |
|
||||
| ------ | -------------------------------------------------------------- |
|
||||
| Name | `hasura_graphql_requests_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `operation_type`: query \| mutation \| subscription \| unknown |
|
||||
|
||||
The `unknown` operation type will be returned for queries that fail authorization, parsing, or certain validations. The
|
||||
`response_status` label will be `success` for successful requests and `failed` for failed requests.
|
||||
|
||||
### Hasura HTTP connections
|
||||
|
||||
Current number of active HTTP connections (excluding WebSocket connections), representing the HTTP load on the server.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------- |
|
||||
| Name | `hasura_http_connections` |
|
||||
| Type | Gauge |
|
||||
| Labels | none |
|
||||
|
||||
### Hasura one-off events invocation total
|
||||
|
||||
Total number of one-off events invoked, representing the number of invocations made for one-off events.
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------------- |
|
||||
| Name | `hasura_oneoff_events_invocation_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed |
|
||||
|
||||
### Hasura one-off events processed total
|
||||
|
||||
Total number of one-off events processed, representing the number of invocations made for one-off events. Compare this
|
||||
to `hasura_oneoff_events_invocation_total`. A high difference between the two metrics indicates high failure rate of the
|
||||
one-off webhook.
|
||||
|
||||
| | |
|
||||
| ------ | -------------------------------------- |
|
||||
| Name | `hasura_oneoff_events_processed_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed |
|
||||
|
||||
### Hasura postgres connections
|
||||
|
||||
Current number of active PostgreSQL connections. Compare this to
|
||||
[pool settings](/api-reference/syntax-defs.mdx/#pgpoolsettings).
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Name | `hasura_postgres_connections` |
|
||||
| Type | Gauge |
|
||||
| Labels | `source_name`: name of the database<br />`conn_info`: connection url string (password omitted) or name of the connection url environment variable<br />`role`: primary \| replica |
|
||||
|
||||
### Hasura source health
|
||||
|
||||
Health check status of a particular data source, corresponding to the output of `/healthz/sources`, with possible values
|
||||
0 through 3 indicating, respectively: OK, TIMEOUT, FAILED, ERROR. See the
|
||||
[Source Health Check API Reference](/api-reference/source-health.mdx) for details.
|
||||
|
||||
| | |
|
||||
| ------ | ----------------------------------- |
|
||||
| Name | `hasura_source_health` |
|
||||
| Type | Gauge |
|
||||
| Labels | `source_name`: name of the database |
|
||||
|
||||
### Hasura WebSocket connections
|
||||
|
||||
Current number of active WebSocket connections, representing the WebSocket load on the server.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------ |
|
||||
| Name | `hasura_websocket_connections` |
|
||||
| Type | Gauge |
|
||||
| Labels | none |
|
||||
|
||||
:::info GraphQL request execution time
|
||||
|
||||
- Uses wall-clock time, so it includes time spent waiting on I/O.
|
||||
- Includes authorization, parsing, validation, planning, and execution (calls to databases, Remote Schemas).
|
||||
|
||||
:::
|
@ -6,7 +6,7 @@ keywords:
|
||||
- docs
|
||||
- deployment
|
||||
- allow list
|
||||
sidebar_position: 8
|
||||
sidebar_position: 4
|
||||
---
|
||||
|
||||
import Tabs from '@theme/Tabs';
|
||||
|
@ -7,7 +7,7 @@ keywords:
|
||||
- enterprise
|
||||
- security
|
||||
- limits
|
||||
sidebar_position: 2
|
||||
sidebar_position: 5
|
||||
sidebar_label: API limits
|
||||
sidebar_class_name: cloud-and-enterprise-icon
|
||||
title: 'Cloud & Enterprise Edition: API Limits'
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
title: 'Cloud: Marketplaces'
|
||||
description: Hasura Cloud Marketplaces
|
||||
title: 'Proactive vulnerability scanning'
|
||||
description: Proactive vulnerability scanning
|
||||
keywords:
|
||||
- hasura
|
||||
- Vulnerability
|
||||
@ -11,6 +11,7 @@ keywords:
|
||||
- cloud
|
||||
sidebar_label: Proactive vulnerability scanning
|
||||
sidebar_class_name: cloud-and-enterprise-icon
|
||||
sidebar_position: 7
|
||||
---
|
||||
|
||||
import ProductBadge from '@site/src/components/ProductBadge';
|
||||
|
@ -10,7 +10,7 @@ keywords:
|
||||
- introspection
|
||||
- disable
|
||||
- GraphQL
|
||||
sidebar_position: 3
|
||||
sidebar_position: 6
|
||||
sidebar_label: Disable GraphQL introspection
|
||||
sidebar_class_name: cloud-and-enterprise-icon
|
||||
---
|
||||
|
@ -29,7 +29,7 @@ import Security from '@site/static/icons/features/security.svg';
|
||||
<h4>Quick Links</h4>
|
||||
<ul>
|
||||
<li>
|
||||
<VersionedLink to="/deployment/best-practices/security">
|
||||
<VersionedLink to="/security/security-best-practices">
|
||||
Get started with Security best practices.
|
||||
</VersionedLink>
|
||||
</li>
|
||||
@ -52,7 +52,7 @@ import Security from '@site/static/icons/features/security.svg';
|
||||
<p>Use the Allow List of operations to restrict the operations that can be performed by a role.</p>
|
||||
</div>
|
||||
</VersionedLink>
|
||||
<VersionedLink to="/security/multiple-jwt-secrets">
|
||||
<VersionedLink to="/auth/authentication/multiple-jwt-secrets">
|
||||
<div className="card">
|
||||
<h3>Multiple JWT Secrets</h3>
|
||||
<p>Use multiple JWT secrets to support multiple JWT issuers.</p>
|
||||
|
@ -4,7 +4,9 @@ keywords:
|
||||
- hasura
|
||||
- docs
|
||||
- best practices
|
||||
sidebar_label: Security
|
||||
sidebar_label: Best Practices
|
||||
toc_max_heading_level: 3
|
||||
sidebar_position: 3
|
||||
---
|
||||
|
||||
import Thumbnail from '@site/src/components/Thumbnail';
|
||||
@ -36,14 +38,14 @@ Specifics about each security best practice can be found below.
|
||||
|
||||
## Hasura GraphQL Engine
|
||||
|
||||
#### Restrict Access
|
||||
### Restrict Access
|
||||
|
||||
Restrict knowledge of admin secrets to the minimally required team members as an admin secret provides unrestricted
|
||||
access to the Hasura GraphQL Engine. SSO collaboration should be used to grant project access without sharing an admin
|
||||
key. Subsequently, implement a plan to rotate admin secrets to limit the exposure of an admin secret being shared too
|
||||
broadly.
|
||||
|
||||
[Multiple admin secrets](/security/multiple-admin-secrets.mdx) should be used in situations where admin secrets have
|
||||
[Multiple admin secrets](/auth/authentication/multiple-admin-secrets.mdx) should be used in situations where admin secrets have
|
||||
different rotation timelines or when granting temporary access is needed.
|
||||
|
||||
Leverage [allowed operations lists](https://www.graphql-code-generator.com/plugins/other/hasura-allow-list) whenever
|
||||
@ -61,7 +63,7 @@ The admin role will bypass the allowed operations list.
|
||||
|
||||
:::
|
||||
|
||||
#### Limit the API
|
||||
### Limit the API
|
||||
|
||||
The allowed operations lists workflow is ideal for private/internal APIs or APIs with well understood and clearly
|
||||
defined operations. Public APIs or APIs with less defined expected operations should additionally configure
|
||||
@ -72,7 +74,7 @@ defined operations. Public APIs or APIs with less defined expected operations sh
|
||||
|
||||
- [Limit rows](/auth/authorization/permissions/row-fetch-limit.mdx) returned by a select operation.
|
||||
|
||||
#### Permissions
|
||||
### Permissions
|
||||
|
||||
The row-based access control configuration dictates permissions for the GraphQL API. It is critical that these
|
||||
permissions be configured correctly in order to prevent unauthorized or unintended access to the GraphQL API.
|
||||
@ -86,7 +88,7 @@ permissions be configured correctly in order to prevent unauthorized or unintend
|
||||
[allowed operations lists](https://www.graphql-code-generator.com/plugins/other/hasura-allow-list) and
|
||||
[disabling schema introspection](/security/disable-graphql-introspection.mdx).
|
||||
|
||||
#### Disable development components
|
||||
### Disable development components
|
||||
|
||||
There are several components of Hasura GraphQL Engine that are crucial for development efforts but should be disabled
|
||||
for a production environment. However, it should be expected that some of these components may need to be temporarily
|
||||
@ -100,7 +102,7 @@ re-enabled if a situation arises where a production environment specific issue r
|
||||
|
||||
- [Disable schema introspection](/security/disable-graphql-introspection.mdx).
|
||||
|
||||
#### Additional environment variables
|
||||
### Additional environment variables
|
||||
|
||||
There are specific environment variables that should be configured to ensure appropriate communication to the Hasura
|
||||
GraphQL Engine server.
|
@ -51,112 +51,23 @@ For more details on how Hasura executes subscriptions, refer to the
|
||||
|
||||
<ProductBadge self />
|
||||
|
||||
Hasura exposes a set of Prometheus Metrics that can be used to monitor the subscriptions system and help diagnose
|
||||
performance issues.
|
||||
Hasura EE exposes a set of [Prometheus Metrics](/observability/enterprise-edition/prometheus/metrics.mdx/#subscription-metrics)
|
||||
that can be used to monitor the subscriptions system and help diagnose performance issues.
|
||||
|
||||
### Active Subscriptions
|
||||
:::info More on Observability
|
||||
|
||||
Current number of active subscriptions, representing the subscription load on the server.
|
||||
To find out more about observability, including best practices, check out
|
||||
[observability docs section](/observability/overview.mdx).
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------------------------------------ |
|
||||
| Name | `hasura_active_subscriptions` |
|
||||
| Type | Gauge |
|
||||
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
|
||||
The following metrics can be used to monitor the performance of subscriptions:
|
||||
|
||||
### Active Subscription Pollers
|
||||
:::
|
||||
|
||||
Current number of active subscription pollers. A subscription poller
|
||||
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
|
||||
similar subscriptions together. The value of this metric should be proportional to the number of uniquely parameterized
|
||||
subscriptions (i.e., subscriptions with the same selection set, but with different input arguments and session variables
|
||||
are multiplexed on the same poller). If this metric is high then it may be an indication that there are too many
|
||||
uniquely parameterized subscriptions which could be optimized for better performance.
|
||||
|
||||
| | |
|
||||
| ------ | -------------------------------------------- |
|
||||
| Name | `hasura_active_subscription_pollers` |
|
||||
| Type | Gauge |
|
||||
| Labels | `subscription_kind`: streaming \| live-query |
|
||||
|
||||
### Active Subscription Pollers in Error State
|
||||
|
||||
Current number of active subscription pollers that are in the error state. A subscription poller
|
||||
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
|
||||
similar subscriptions together. A non-zero value of this metric indicates that there are runtime errors in atleast one
|
||||
of the subscription pollers that are running in Hasura. In most of the cases, runtime errors in subscriptions are caused
|
||||
due to the changes at the data model layer and fixing the issue at the data model layer should automatically fix the
|
||||
runtime errors.
|
||||
|
||||
| | |
|
||||
| ------ | --------------------------------------------------- |
|
||||
| Name | `hasura_active_subscription_pollers_in_error_state` |
|
||||
| Type | Gauge |
|
||||
| Labels | `subscription_kind`: streaming \| live-query |
|
||||
|
||||
### Subscription Total Time
|
||||
|
||||
The time taken to complete running of one subscription poller.
|
||||
|
||||
A subscription poller
|
||||
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
|
||||
similar subscriptions together. This subscription poller runs every 1 second by default and queries the database with
|
||||
the multiplexed query to fetch the latest data. In a polling instance, the poller not only queries the database but does
|
||||
other operations like splitting similar queries into batches (by default 100) before fetching their data from the
|
||||
database, etc. **This metric is the total time taken to complete all the operations in a single poll.**
|
||||
|
||||
In a single poll, the subscription poller splits the different variables for the multiplexed query into batches (by
|
||||
default 100) and executes the batches. We use the `hasura_subscription_db_execution_time_seconds` metric to observe the
|
||||
time taken for each batch to execute on the database. This means for a single poll there can be multiple values for
|
||||
`hasura_subscription_db_execution_time_seconds` metric.
|
||||
|
||||
Let's look at an example to understand these metrics better:
|
||||
|
||||
Say we have 650 subscriptions with the same selection set but different input arguments. These 650 subscriptions will be
|
||||
grouped to form one multiplexed query. A single poller would be created to run this multiplexed query. This poller will
|
||||
run every 1 second.
|
||||
|
||||
The default batch size in hasura is 100, so the 650 subscriptions will be split into 7 batches for execution during a
|
||||
single poll.
|
||||
|
||||
During a single poll:
|
||||
|
||||
- Batch 1: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
|
||||
- Batch 2: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
|
||||
- Batch 3: `hasura_subscription_db_execution_time_seconds` = 0.003 seconds
|
||||
- Batch 4: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
|
||||
- Batch 5: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
|
||||
- Batch 6: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
|
||||
- Batch 7: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
|
||||
|
||||
The `hasura_subscription_total_time_seconds` would be sum of all the database execution times shown in the batches, plus
|
||||
some extra process time for other tasks the poller does during a single poll. In this case, it would be approximately
|
||||
0.013 seconds.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------------------------------------ |
|
||||
| Name | `hasura_subscription_total_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.000001, 0.0001, 0.01, 0.1, 0.3, 1, 3, 10, 30, 100 |
|
||||
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
|
||||
|
||||
### Subscription Database Execution Time
|
||||
|
||||
The time taken to run the subscription's multiplexed query in the database for a single batch.
|
||||
|
||||
A subscription poller
|
||||
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
|
||||
similar subscriptions together. During every run (every 1 second by default), the poller splits the different variables
|
||||
for the multiplexed query into batches (by default 100) and execute the batches. This metric observes the time taken for
|
||||
each batch to execute on the database.
|
||||
|
||||
If this metric is high, then it may be an indication that the database is not performing as expected, you should
|
||||
consider investigating the subscription query and see if indexes can help improve performance.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------------------------------------------------------------------ |
|
||||
| Name | `hasura_subscription_db_execution_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.000001, 0.0001, 0.01, 0.1, 0.3, 1, 3, 10, 30, 100 |
|
||||
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
|
||||
- [`hasura_active_subscriptions`](/observability/enterprise-edition/prometheus/metrics.mdx#active-subscriptions)
|
||||
- [`hasura_active_subscription_pollers`](/observability/enterprise-edition/prometheus/metrics.mdx#active-subscription-pollers)
|
||||
- [`hasura_active_subscription_pollers_in_error_state`](/observability/enterprise-edition/prometheus/metrics.mdx#active-subscription-pollers-in-error-state)
|
||||
- [`hasura_subscription_db_execution_time_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx#subscription-database-execution-time)
|
||||
- [`hasura_subscription_total_time_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx#subscription-total-time)
|
||||
|
||||
## Golden Signals for subscriptions
|
||||
|
||||
@ -191,7 +102,7 @@ number of Hasura instances to handle the load.
|
||||
|
||||
Errors in subscriptions can be monitored using the following metrics
|
||||
|
||||
- [`hasura_graphql_requests_total{type="subscription",response_status="error"}`](/observability/prometheus/metrics.mdx#hasura-graphql-requests-total):
|
||||
- [`hasura_graphql_requests_total{type="subscription",response_status="error"}`](/observability/enterprise-edition/prometheus/metrics.mdx#hasura-graphql-requests-total):
|
||||
Total number of errors that happen before the subscription is started (i.e. validation, parsing and authorization
|
||||
errors).
|
||||
- [`hasura_active_subscription_pollers_in_error_state`](#active-subscription-pollers-in-error-state): Number of
|
||||
@ -210,7 +121,7 @@ To monitor the saturation for subscriptions, you can monitor the following:
|
||||
|
||||
- CPU and memory usage of Hasura instances.
|
||||
- For postgres backends, you can monitor the
|
||||
[`hasura_postgres_connections`](/observability/prometheus/metrics.mdx#hasura-postgres-connections) metric to see the
|
||||
[`hasura_postgres_connections`](/observability/enterprise-edition/prometheus/metrics.mdx#hasura-postgres-connections) metric to see the
|
||||
number of connections opened by Hasura with the database.
|
||||
- P99 of the [`hasura_subscription_total_time_seconds`](#subscription-total-time) metric.
|
||||
|
||||
|
@ -27,7 +27,7 @@ In order to find out about the origins of an error, it can be helpful to check t
|
||||
:::info Metrics and distributed tracing in Hasura Cloud
|
||||
|
||||
Hasura Cloud includes metrics and distributed tracing which makes troubleshooting faster. For more information, see
|
||||
[Metrics](/observability/overview.mdx) and [Tracing](/observability/tracing.mdx) in Hasura Cloud.
|
||||
[Metrics](/observability/overview.mdx) and [Tracing](/observability/cloud-monitoring/tracing.mdx) in Hasura Cloud.
|
||||
|
||||
:::
|
||||
|
||||
|
13
docs/src/components/BetaTag/BetaTag.tsx
Normal file
13
docs/src/components/BetaTag/BetaTag.tsx
Normal file
@ -0,0 +1,13 @@
|
||||
import React from 'react';
|
||||
import styles from './styles.module.scss';
|
||||
const BetaTag: React.FC = (): React.ReactElement => {
|
||||
|
||||
return (
|
||||
<div className={styles['beta-tag']}>
|
||||
Beta
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
export default BetaTag;
|
9
docs/src/components/BetaTag/styles.module.scss
Normal file
9
docs/src/components/BetaTag/styles.module.scss
Normal file
@ -0,0 +1,9 @@
|
||||
.beta-tag {
|
||||
font-size: 0.7rem;
|
||||
font-weight: normal;
|
||||
margin-left: 5px;
|
||||
color: white;
|
||||
background-color: var(--ifm-color-gray-600);
|
||||
padding: 2px 5px;
|
||||
border-radius: 5px;
|
||||
}
|
@ -11,6 +11,7 @@ import EnterpriseLight from '@site/static/icons/enterprise-dark.svg';
|
||||
import EnterpriseDark from '@site/static/icons/enterprise-light.svg';
|
||||
import CloudLight from '@site/static/icons/cloud-dark.svg';
|
||||
import CloudDark from '@site/static/icons/cloud-light.svg';
|
||||
import BetaTag from "@site/src/components/BetaTag/BetaTag";
|
||||
export default function DocSidebarItemLink({ item, onItemClick, activePath, level, index, ...props }) {
|
||||
const { href, label, className, autoAddBaseUrl } = item;
|
||||
const isActive = isActiveSidebarItem(item, activePath);
|
||||
@ -24,6 +25,20 @@ export default function DocSidebarItemLink({ item, onItemClick, activePath, leve
|
||||
return isDarkTheme ? <EnterpriseDark /> : <EnterpriseLight />;
|
||||
case 'cloud-icon':
|
||||
return isDarkTheme ? <CloudDark /> : <CloudLight />;
|
||||
case 'enterprise-icon-and-beta':
|
||||
return (
|
||||
<div className={styles['sidebar_link_wrapper']}>
|
||||
{isDarkTheme ? (
|
||||
<>
|
||||
<EnterpriseDark />{' '}<BetaTag/>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<EnterpriseLight />{' '}<BetaTag/>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
case 'cloud-and-enterprise-icon':
|
||||
return (
|
||||
<div className={styles['cloud-ee-container']}>
|
||||
@ -38,6 +53,12 @@ export default function DocSidebarItemLink({ item, onItemClick, activePath, leve
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
case 'beta-icon':
|
||||
return (
|
||||
<div className={styles['sidebar_link_wrapper']}>
|
||||
<BetaTag/>
|
||||
</div>
|
||||
);
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user