docs: restructure observability, security & caching sections

PR-URL: https://github.com/hasura/graphql-engine-mono/pull/9554
Co-authored-by: Rikin Kachhia <54616969+rikinsk@users.noreply.github.com>
GitOrigin-RevId: d14c65920049a0cc39ddc9488af34fc3d6b67467
This commit is contained in:
Sean Park-Ross 2023-06-27 00:26:24 +07:00 committed by hasura-bot
parent 3b16bcb3ab
commit b53f1d7406
66 changed files with 781 additions and 3767 deletions

View File

@ -57,7 +57,7 @@ X-Hasura-Role: admin
| analyze_query_variables | false | boolean | Enables logging of the values of the query variables provided for each request. Default is `false`. |
Please see the corresponding
[feature documentation for the usage of these configurations](/observability/operations.mdx#capture-query-variables).
[feature documentation for the usage of these configurations](/observability/cloud-monitoring/operations.mdx#capture-query-variables).
## remove_metrics_config {#metadata-remove-metrics-config}
@ -77,7 +77,7 @@ X-Hasura-Role: admin
<ProductBadge self />
The OpenTelemetry configuration enables export of [distributed traces](/observability/tracing.mdx) to an
The OpenTelemetry configuration enables export of [distributed traces](/observability/cloud-monitoring/tracing.mdx) to an
[OpenTelemetry](https://opentelemetry.io/) compliant APM receiver.
## set_opentelemetry_config {#metadata-set-opentelemetry-config}

View File

@ -708,7 +708,7 @@ needs to have the public key.
By providing a set of JWT secrets to GraphQL Engine on Cloud or Enterprise versions, you can set it up to authenticate
using various JWT issuers. This configuration option allows for greater flexibility in authentication. Read more here:
[Multiple JWT Secrets](/security/multiple-jwt-secrets.mdx).
[Multiple JWT Secrets](/auth/authentication/multiple-jwt-secrets.mdx).
## Security considerations

View File

@ -12,7 +12,7 @@ keywords:
- multiple
- admin
- secrets
sidebar_position: 4
sidebar_position: 50
sidebar_label: Multiple Admin Secrets
sidebar_class_name: cloud-and-enterprise-icon
---

View File

@ -11,7 +11,7 @@ keywords:
- multiple
- JWT
- secrets
sidebar_position: 5
sidebar_position: 60
sidebar_label: Multiple JWT Secrets
sidebar_class_name: cloud-and-enterprise-icon
---

View File

@ -95,4 +95,4 @@ In the above example, this configuration restricts the number of accessible rows
`{"id":{"_eq":"X-Hasura-User-Id"}}`) to 1.
Setting row fetch limits is useful for preventing abuse of your API especially if it is exposed to the public. You
can [also configure other limits](/deployment/best-practices/security.mdx#limit-the-api).
can [also configure other limits](/security/security-best-practices.mdx#limit-the-api).

View File

@ -23,7 +23,8 @@ Schemas.
Cached responses are stored for a period of time in a LRU (least-recently used) cache, and removed from the cache as per
a user-specified TTL (time-to-live) which defaults to 60 seconds.
For self-hosted Enterprise Edition, refer to the [enable caching](/enterprise/caching.mdx) documentation configure
For self-hosted Enterprise Edition, refer to the [enable caching](/caching/enterprise-caching.mdx) documentation
configure
various parameters.
## Getting started

View File

@ -6,7 +6,8 @@ keywords:
- metrics
- prometheus
- grafana
sidebar_position: 5
sidebar_position: 6
sidebar_class_name: enterprise-icon
---
import Thumbnail from '@site/src/components/Thumbnail';
@ -24,14 +25,14 @@ This can help towards monitoring and further optimization of the cache utilizati
## Exposed metrics
The graphql engine exposes the `hasura_cache_request_count` Prometheus metric.
It represents a `counter` and is incremented every time a request with `@cached` directive is served.
The graphql engine exposes the [hasura_cache_request_count](/observability/enterprise-edition/prometheus/metrics.mdx/#hasura-cache-request-count)
Prometheus metric. It represents a `counter` and is incremented every time a request with `@cached` directive is served.
It has one label `status`, which can have values of either `hit` or `miss`.
| status | description |
|-----------|-------------------|
|--------|-----------------------------------------------------|
| `hit` | request served from the cache |
| `miss` | request served from the source (not found in cache) |

View File

@ -1,8 +1,9 @@
---
sidebar_label: Enable caching
sidebar_label: Enable caching in EE
sidebar_position: 5
description: Hasura Enterprise Edition caching
description: Caching in Hasura Enterprise Edition
title: 'Enterprise Edition: Enable GraphQL caching'
sidebar_class_name: enterprise-icon
keywords:
- hasura
- docs
@ -15,7 +16,7 @@ keywords:
import HeadingIcon from '@site/src/components/HeadingIcon';
import ProductBadge from '@site/src/components/ProductBadge';
# Enable GraphQL Caching
# Enable Caching in Hasura Enterprise Edition
<ProductBadge standard pro ee self />

View File

@ -10,7 +10,6 @@ keywords:
- caching
sidebar_position: 1
hide_table_of_contents: true
sidebar_class_name: cloud-and-enterprise-icon
---
import HeadingIcon from '@site/src/components/HeadingIcon';
@ -26,7 +25,7 @@ import Caching from '@site/static/icons/features/caching.svg';
<div className="overview-text">
<p>
Hasura Cloud and Enterprise Editions provide a caching layer that can be used to cache the response of a GraphQL
query. This can help reduce the number of requests to your datasources and improve the performance of your
query. This can help reduce the number of requests to your data sources and improve the performance of your
application.
</p>
<p>

View File

@ -41,6 +41,7 @@ Schema Registry is available on Hasura Cloud from `v2.26.0-cloud.1` and above.
## How it works
Whenever there is any operation on the Hasura Engine that could change the GraphQL schema, Hasura sends an event to the
Schema Registry along with the GraphQL schemas for all defined roles. Operations which could change the GraphQL schema
include:
@ -84,6 +85,7 @@ The changes between subsequent schemas are computed using the open source
Breaking changes are typically the changes that could potentially break your GraphQL operations (queries, mutations or
subscriptions) at the GraphQL operation validation layer.
For example, if a field `name` is removed from a GraphQL object type `user`, that counts as a breaking change as it
could potentially fail an existing GraphQL operation that queries the `name` field in the `user` type.

View File

@ -16,7 +16,7 @@ sidebar_position: 105
Best practices are the goal of all organizations with many different facets benefiting from those practices. This is
particularly true with enterprise software and Hasura is no different. The guides below are broken down by category.
- [Database Observability](/deployment/best-practices/db-observability.mdx)
- [Metadata](/deployment/best-practices/metadata.mdx)
- [Observability](/deployment/best-practices/observability.mdx)
- [Security](/deployment/best-practices/security.mdx)
- [Database Observability](/observability/db-observability.mdx)
- [Metadata](/migrations-metadata-seeds/metadata-best-practices.mdx)
- [Observability](/observability/observability-best-practices.mdx)
- [Security](/security/security-best-practices.mdx)

View File

@ -113,7 +113,7 @@ When you use [webhook or JWT mode for authentication](/auth/authentication/index
### Admin Secrets
A list of valid admin [secret keys](/security/multiple-admin-secrets.mdx) any one of which can be used to access the
A list of valid admin [secret keys](/auth/authentication/multiple-admin-secrets.mdx) any one of which can be used to access the
Hasura instance.
| | |
@ -367,7 +367,7 @@ Enable the Hasura Console (served by the server on `/` and `/console`).
### Enable High-cardinality Labels for Metrics
Enable high-cardinality labels for [Prometheus Metrics](/observability/prometheus/metrics.mdx). Enabling this setting
Enable high-cardinality labels for [Prometheus Metrics](/observability/enterprise-edition/prometheus/metrics.mdx). Enabling this setting
will add more labels to some of the metrics (e.g. `operation_name` label for Graphql subscription metrics).
| | |
@ -381,7 +381,7 @@ will add more labels to some of the metrics (e.g. `operation_name` label for Gra
### Enable Log Compression
Enable sending compressed logs to [metrics server](/observability/prometheus/metrics.mdx).
Enable sending compressed logs to [metrics server](/observability/enterprise-edition/prometheus/metrics.mdx).
| | |
| ------------------- | ------------------------------------------ |
@ -578,7 +578,7 @@ Having an [admin secret](#admin-secret-key) set is mandatory for setting this va
### JWT Secrets
List of [JWT secrets](/security/multiple-jwt-secrets.mdx) to authenticate with different JWT issuers.
List of [JWT secrets](/auth/authentication/multiple-jwt-secrets.mdx) to authenticate with different JWT issuers.
| | |
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
@ -624,7 +624,7 @@ This variable sets the level for [Hasura's logs](/deployment/logging.mdx#logging
### Max Cache Size
The [maximum cache size](/enterprise/caching.mdx), measured in MB, for queries.
The [maximum cache size](/caching/enterprise-caching.mdx), measured in MB, for queries.
| | |
| ------------------- | ------------------------------------------------------------------------------------------------- |
@ -804,7 +804,7 @@ The maximum number of query plans that can be cached, where `0` disables the cac
### Rate-Limit Redis TLS Hostname
The hostname to use for SNI when connecting to a rate-limiting [Redis instance over TLS](/enterprise/caching.mdx).
The hostname to use for SNI when connecting to a rate-limiting [Redis instance over TLS](/caching/enterprise-caching.mdx).
| | |
| ------------------- | ---------------------------------------------- |
@ -816,7 +816,7 @@ The hostname to use for SNI when connecting to a rate-limiting [Redis instance o
### Rate-Limit Redis URL
The [Redis URL](/enterprise/caching.mdx) to use for rate limiting.
The [Redis URL](/caching/enterprise-caching.mdx) to use for rate limiting.
| | |
| ------------------- | ---------------------------------------- |
@ -829,7 +829,7 @@ The [Redis URL](/enterprise/caching.mdx) to use for rate limiting.
### Rate-Limit Redis Use TLS
Whether to use TLS to connect to a caching [Redis instance](/enterprise/caching.mdx).
Whether to use TLS to connect to a caching [Redis instance](/caching/enterprise-caching.mdx).
| | |
| ------------------- | -------------------------------------------- |
@ -860,7 +860,7 @@ The [URL for a read replica](/databases/database-config/read-replicas.mdx#adding
### Redis TLS Hostname
The hostname to use for SNI when connecting to a [caching Redis instance over TLS](/enterprise/caching.mdx).
The hostname to use for SNI when connecting to a [caching Redis instance over TLS](/caching/enterprise-caching.mdx).
| | |
| ------------------- | ----------------------------------- |
@ -873,7 +873,7 @@ The hostname to use for SNI when connecting to a [caching Redis instance over TL
### Redis TLS Shared CA Store Path
The path to a shared CA store to use to connect to both (caching and rate-limiting)
[Redis URLs over TLS](/enterprise/caching.mdx).
[Redis URLs over TLS](/caching/enterprise-caching.mdx).
| | |
| ------------------- | ------------------------------------------------------------ |
@ -885,8 +885,8 @@ The path to a shared CA store to use to connect to both (caching and rate-limiti
### Redis URL
The Redis URL to use for [query caching](/enterprise/caching.mdx) and
[Webhook Auth Caching](/auth/authentication/webhook.mdx#webhook-auth-caching).
The Redis URL to use for [query caching](/caching/enterprise-caching.mdx) and [Webhook Auth
Caching](/auth/authentication/webhook.mdx#webhook-auth-caching).
| | |
| ------------------- | ---------------------------------------- |

View File

@ -74,7 +74,7 @@ The Enterprise Edition log-types that can be enabled/disabled are:
| `api-limit-log` | Logs errors in [API limit](/security/api-limits.mdx) | `error` |
| `livequery-poller-log` | Logs information for active subscriptions (poller-id, generated sql, polling time, subscriber count, subscription kind, etc.) | `info` |
| `response-caching-log` | Logs response information and errors from [query caching](/caching/overview.mdx) | `info`, `error` and `debug` |
| `tracing-log` | Logs information about [tracing spans](/observability/tracing.mdx) | `info` |
| `tracing-log` | Logs information about [tracing spans](/observability/cloud-monitoring/tracing.mdx) | `info` |
| `metrics` | Logs tenant metrics information | `info` |
| `health-check-log` | Logs source Health Check events which includes health status of a data source | `info` and `warn` |
@ -924,4 +924,4 @@ The `subscription_options` field is an object with the following properties:
You can integrate the logs emitted by the Hasura Engine with external monitoring tools for better visibility as per your
convenience.
For some examples, see [Guides: Integrating with monitoring frameworks](/observability/integrations/index.mdx)
For some examples, see [Guides: Integrating with monitoring frameworks](/observability/cloud/index.mdx)

View File

@ -141,7 +141,7 @@ Observability tools help us track issues, alert us to errors, and allow us to mo
is critical in production. There are many open-source and commercial services. However, you may have to combine many
tools because of the architectural complexity. For more information, check out our
[observability section](/observability/overview.mdx) and our
[observability best practices](/deployment/best-practices/observability.mdx).
[observability best practives](/observability/observability-best-practices.mdx).
## Software architecture and best practices

View File

@ -27,6 +27,6 @@ access to your GraphQL endpoint and the Hasura Console:
If you're looking at adding access control rules for your data to your GraphQL API then head to
[Authentication / access control](/auth/overview.mdx). You can also find more information about
[Hasura security in general here](/security/overview.mdx) and best practices
[here](/deployment/best-practices/security.mdx).
[here](/security/security-best-practices.mdx).
:::

View File

@ -33,4 +33,4 @@ When you are ready to move Hasura to production, check out our
continuous availability.
- We recommend running Hasura with at least 4 CPU cores and a minimum of 8 GB RAM in production. Please set autoscaling
on CPU.
- [Enable and consume metrics](/observability/prometheus/index.mdx).
- [Enable and consume metrics](/observability/enterprise-edition/prometheus/index.mdx).

View File

@ -97,7 +97,7 @@ import Enterprise from '@site/static/icons/features/enterprise.svg';
</div>
</VersionedLink>
<h2 style={{ gridColumn: `1 / -1`, marginTop: `1.2rem`, marginBottom: `.3rem`, justifySelf: `start`, fontSize: `1.8rem` }}>Performance</h2>
<VersionedLink to="/enterprise/caching/">
<VersionedLink to="/caching/enterprise-caching/">
<div className="card">
<h3>Caching</h3>
<p>Learn how to configure caching in Hasura Enterprise Edition to improve the performance of your GraphQL API.</p>
@ -137,13 +137,13 @@ import Enterprise from '@site/static/icons/features/enterprise.svg';
<p>Prevent unauthorized access to your GraphQL API by disabling GraphQL introspection in Hasura Enterprise.</p>
</div>
</VersionedLink>
<VersionedLink to="/security/multiple-admin-secrets/">
<VersionedLink to="/auth/authentication/multiple-admin-secrets/">
<div className="card">
<h3>Multiple Admin Secrets</h3>
<p>Configure multiple admin secrets in Hasura Enterprise Edition.</p>
</div>
</VersionedLink>
<VersionedLink to="/security/multiple-jwt-secrets/">
<VersionedLink to="/auth/authentication/multiple-jwt-secrets/">
<div className="card">
<h3>Multiple JWT Secrets</h3>
<p>Configure multiple JWT secrets in Hasura Enterprise Edition to support multiple authentication providers.</p>
@ -165,13 +165,13 @@ import Enterprise from '@site/static/icons/features/enterprise.svg';
</p>
</div>
</VersionedLink>
<VersionedLink to="/observability/prometheus/index/">
<VersionedLink to="/observability/enterprise-edition/prometheus/index/">
<div className="card">
<h3>Metrics via Prometheus</h3>
<p>Learn how to configure Prometheus in Hasura Enterprise Edition to monitor your GraphQL API.</p>
</div>
</VersionedLink>
<VersionedLink to="/enterprise/opentelemetry/">
<VersionedLink to="/observability/enterprise-edition/opentelemetry/">
<div className="card">
<h3>Traces via OpenTelemetry</h3>
<p>Learn how to configure OpenTelemetry in Hasura Enterprise Edition.</p>

View File

@ -50,145 +50,19 @@ events queue to the webhook.
<ProductBadge self />
Hasura exposes a set of [Prometheus metrics](/observability/prometheus/metrics.mdx) that can be used to monitor the
Event Trigger system and help diagnose performance issues.
Hasura EE exposes a set of [Prometheus metrics](/observability/enterprise-edition/prometheus/metrics.mdx/#hasura-event-triggers-metrics)
that can be used to monitor the Event Trigger system and help diagnose performance issues.
### Event fetch time per batch
The following metrics can be used to monitor the performance of Hasura Event Triggers system:
Hasura fetches the events in batches (by default 100) from the Hasura Event tables in the database. This metric
represents the time taken to fetch a batch of events from the database.
A higher metric indicates slower polling of events from the database, you should consider looking into the performance
of your database.
| | |
| ------ | ------------------------------------------------------------------------------------------ |
| Name | `hasura_event_fetch_time_per_batch_seconds` |
| Type | Histogram<br /><br />Buckets: 0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
| Labels | none |
### Event invocations total
This metric represents the number of HTTP requests that have been made to the webhook server for delivering events.
| | |
| ------ | ---------------------------------------------------------- |
| Name | `hasura_event_invocations_total` |
| Type | Counter |
| Labels | `status`: success \| failed, `trigger_name`, `source_name` |
### Event processed total
Total number of events processed. Represents the Event Trigger egress.
| | |
| ------ | ---------------------------------------------------------- |
| Name | `hasura_event_processed_total` |
| Type | Counter |
| Labels | `status`: success \| failed, `trigger_name`, `source_name` |
### Event processing time
Time time taken for an event to be processed.
| | |
| ------ | --------------------------------------------------------------------- |
| Name | `hasura_event_processing_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100 |
| Labels | `trigger_name`, `source_name` |
The processing of an event involves the following steps:
1. Hasura Engine fetching the event from Hasura Event tables in the database and adding it to the Hasura Events queue
2. An HTTP worker picking up the event from the Hasura Events queue
3. An HTTP worker delivering the event to the webhook
:::info Event delivery failure
Note, if the delivery of the event fails - the delivery of the event is retried based on its `next_retry_at`
configuration.
:::
This metric represent the time taken for an event to be delivered since it was created (if the first attempt) or retried
(after the first attempt). **This metric can be considered as the end-to-end processing time for an event.**
For e.g., say an event was created at `2021-01-01 10:00:30` and it has a `next_retry_at` configuration which says if the
event delivery fails, the event should be retried after 30 seconds.
At `2021-01-01 10:01:30`: the event was fetched from the Hasura Event tables, picked up by the HTTP worker, and the
delivery was attempted. The delivery failed and the `next_retry_at` of `2021-01-01 10:02:00` was set for the event.
Now at `2021-01-01 10:02:00`: the event was fetched again from the Hasura Event tables, picked up by the HTTP worker,
and the delivery was attempted at `2021-01-01 10:03: 30`. This time, the delivery was successful.
The processing time for the second delivery try would be:
Processing Time = event delivery time - event next retried time
Processing Time = `2021-01-01 10:03:30` - `2021-01-01 10:02:00` = `90 seconds`
### Event queue time
Hasura fetches the events from the Hasura Event tables in the database and adds it to the Hasura Events queue. The event
queue time represents the time taken for an event to be picked up by the HTTP worker after it has been added to the
"Events Queue".
Higher value of this metric implies slow event processing. In this case, you can consider increasing the
[HTTP pool size](/deployment/graphql-engine-flags/reference.mdx/#events-http-pool-size) or optimizing the webhook
server.
| | |
| ------ | --------------------------------------------------------------------- |
| Name | `hasura_event_queue_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100 |
| Labels | `trigger_name`, `source_name` |
### Event Triggers HTTP Workers
Current number of active Event Trigger HTTP workers. Compare this number to the
[HTTP pool size](/deployment/graphql-engine-flags/reference.mdx/#events-http-pool-size). Consider increasing it if the
metric is near the current configured value.
| | |
| ------ | ----------------------------------- |
| Name | `hasura_event_trigger_http_workers` |
| Type | Gauge |
| Labels | none |
### Event webhook processing time
The time between when an HTTP worker picks an event for delivery to the time it sends the event payload to the webhook.
A higher processing time indicates slow webhook, you should try to optimize the event webhook.
| | |
| ------ | ------------------------------------------------------------ |
| Name | `hasura_event_webhook_processing_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
| Labels | `trigger_name`, `source_name` |
### Events fetched per batch
Number of events fetched from the Hasura Event tables in the database per batch. This number should be equal or less
than the [events fetch batch size](/deployment/graphql-engine-flags/reference.mdx/#events-fetch-batch-size).
| | |
| ------ | --------------------------------- |
| Name | `hasura_events_fetched_per_batch` |
| Type | Gauge |
| Labels | none |
Since polling the database to continuously check if there are any pending events is an expensive operation, Hasura only
polls the database if there are any pending events. This metric can be used to understand if there are any pending
events in the Hasura Event Tables.
:::info Dependent on pending events
Note that Hasura only fetches events from the Hasura Event tables if there are any pending events. If there are no
pending events, this metric will be 0.
:::
- [`hasura_event_fetch_time_per_batch_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-fetch-time-per-batch)
- [`hasura_event_invocations_total`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-invocations-total)
- [`hasura_event_processed_total`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-processed-total)
- [`hasura_event_processing_time_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-processing-time)
- [`hasura_event_queue_time_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-queue-time)
- [`hasura_event_trigger_http_workers`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-triggers-http-workers)
- [`hasura_event_webhook_processing_time_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx/#event-webhook-processing-time)
- [`hasura_events_fetched_per_batch`](/observability/enterprise-edition/prometheus/metrics.mdx/#events-fetched-per-batch)
## Golden signals for Hasura Event Triggers

View File

@ -122,11 +122,11 @@ The following environment variables can be utilized to configure different value
the CLI:
| Environment variable | Config file key | Description |
| ------------------------------------------------ | --------------------------------- |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|--------------------------------------------------|-----------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `HASURA_GRAPHQL_VERSION` | `version` | Config version to be used. |
| `HASURA_GRAPHQL_ENDPOINT` | `endpoint` | http(s) endpoint for Hasura GraphQL Engine. |
| `HASURA_GRAPHQL_ADMIN_SECRET` | `admin_secret` | Admin secret for Hasura GraphQL Engine. |
| `HASURA_GRAPHQL_ADMIN_SECRETS` | `admin_secrets` | [Admin secrets](/security/multiple-admin-secrets.mdx) for Hasura GraphQL Engine _(Cloud/Enterprise Edition only)_. eg: `HASURA_GRAPHQL_ADMIN_SECRETS='["foo", "bar", "baz"]'` |
| `HASURA_GRAPHQL_ADMIN_SECRETS` | `admin_secrets` | [Admin secrets](/auth/authentication/multiple-admin-secrets.mdx) for Hasura GraphQL Engine _(Cloud/Enterprise Edition only)_. eg: `HASURA_GRAPHQL_ADMIN_SECRETS='["foo", "bar", "baz"]'` |
| `HASURA_GRAPHQL_ACCESS_KEY` | `access_key` | Access key for Hasura GraphQL Engine. Note: Deprecated. Use admin secret instead. |
| `HASURA_GRAPHQL_INSECURE_SKIP_TLS_VERIFY` | `insecure_skip_tls_verify` | Skip verifying SSL certificate for the Hasura endpoint. Useful if you have a self-singed certificate and don't have access to the CA cert. |
| `HASURA_GRAPHQL_CERTIFICATE_AUTHORITY` | `certificate_authority` | Path to the CA certificate for validating the self-signed certificate for the Hasura endpoint. |

View File

@ -89,7 +89,7 @@ to your project now should be executed by the version control process. If you ha
Console for any reason, please follow the `metadata export` command above and update your version control with the
latest Metadata. Otherwise, you may lose the manual changes in your project the next time GitHub integration runs.
You can learn more in our [Metadata Best Practices guide](/deployment/best-practices/metadata.mdx).
You can learn more in our [Metadata Best Practices guide](/migrations-metadata-seeds/metadata-best-practices.mdx).
:::
@ -163,7 +163,7 @@ You need to ensure the your Cloud project has been appropriately set up to execu
added to your Cloud project as well.
- Ensure that you have connected the required database(s) with the right name and connection params as you have in the
Metadata to the Cloud project.
- Follow the steps in our [Metadata Best Practices](/deployment/best-practices/metadata.mdx) guide to ensure that your
- Follow the steps in our [Metadata Best Practices](/migrations-metadata-seeds/metadata-best-practices.mdx) guide to ensure that your
Metadata is in the right format and structure.
## Troubleshooting failures {#github-integration-troubleshooting}

View File

@ -108,7 +108,7 @@ import Cloud from '@site/static/icons/features/cloud.svg';
<VersionedLink to="/hasura-cloud/plans/">API request duration (up to 60 seconds)</VersionedLink>
</li>
<li>
<VersionedLink to="/observability/websockets/">Concurrent websocket connections (up to 10)</VersionedLink>
<VersionedLink to="/observability/cloud-monitoring/websockets/">Concurrent websocket connections (up to 10)</VersionedLink>
</li>
<li>
<VersionedLink to="/api-reference/restified/">RESTified endpoints</VersionedLink>
@ -193,7 +193,7 @@ import Cloud from '@site/static/icons/features/cloud.svg';
<VersionedLink to="/hasura-cloud/plans/">API request duration (up to 120 seconds)</VersionedLink>
</li>
<li>
<VersionedLink to="/observability/websockets/">Concurrent websocket connections (up to 100)</VersionedLink>
<VersionedLink to="/observability/cloud-monitoring/websockets/">Concurrent websocket connections (up to 100)</VersionedLink>
</li>
<li>
<VersionedLink to="/caching/overview/">Query caching (up to 100 MB)</VersionedLink>
@ -228,16 +228,16 @@ import Cloud from '@site/static/icons/features/cloud.svg';
<VersionedLink to="/security/disable-graphql-introspection/">Disable GraphQL introspection</VersionedLink>
</li>
<li>
<VersionedLink to="/security/multiple-jwt-secrets/">Multiple JWT secrets</VersionedLink>
<VersionedLink to="/auth/authentication/multiple-jwt-secrets/">Multiple JWT secrets</VersionedLink>
</li>
<li>
<VersionedLink to="/security/multiple-admin-secrets/">Multiple admin keys</VersionedLink>
<VersionedLink to="/auth/authentication/multiple-admin-secrets/">Multiple admin keys</VersionedLink>
</li>
<li>
<VersionedLink to="/observability/overview/">Metrics, logs, and traces</VersionedLink>
</li>
<li>
<VersionedLink to="/observability/integrations/index/">Observability integration</VersionedLink>
<VersionedLink to="/observability/cloud/index/">Observability integration</VersionedLink>
</li>
<li>
<VersionedLink to="/api-reference/metadata-api/observability/#logs-and-metrics-configuration">
@ -282,7 +282,7 @@ import Cloud from '@site/static/icons/features/cloud.svg';
<VersionedLink to="/hasura-cloud/plans/">API request duration (no limit)</VersionedLink>
</li>
<li>
<VersionedLink to="/observability/websockets/">Concurrent websocket connections (no limit)</VersionedLink>
<VersionedLink to="/observability/cloud-monitoring/websockets/">Concurrent websocket connections (no limit)</VersionedLink>
</li>
<li>
<VersionedLink to="/caching/overview/">Query caching (no limit)</VersionedLink>

View File

@ -4,7 +4,7 @@ keywords:
- hasura
- docs
- best practices
sidebar_label: Metadata
sidebar_label: Metadata Best Practices
---
# Metadata Best Practices

View File

@ -1,5 +1,5 @@
{
"label": "Observability",
"position": 63,
"className": "cloud-icon"
"className": "cloud-and-enterprise-icon"
}

View File

@ -0,0 +1,5 @@
{
"label": "Built-in Monitoring",
"position": 4,
"className": "cloud-icon"
}

View File

@ -1,14 +1,16 @@
---
description: Hasura Observability is a set of tools that help you monitor and debug your GraphQL API
title: How It Works
title: Monitoring on Hasura Cloud
keywords:
- hasura
- docs
- observability
sidebar_position: 2
- monitoring
sidebar_position: 1
slug: index
---
# Observability in Hasura Cloud
# Built-in Monitoring in Hasura Cloud
Observability is a critical aspect of any application, and Hasura Cloud provides developers with a powerful set of tools
to monitor and debug their applications. In this document, we'll explore the observability features available in Hasura
@ -16,42 +18,42 @@ Cloud and how they can help you build better applications.
## Error Reporting
Hasura Cloud provides detailed [error reporting](/observability/errors.mdx) for GraphQL queries and mutations. Whenever
Hasura Cloud provides detailed [error reporting](errors.mdx) for GraphQL queries and mutations. Whenever
an error occurs, Hasura Cloud captures the error message, query, and other relevant information, allowing you to quickly
identify and fix the issue. This feature is particularly useful when debugging complex GraphQL queries and mutations.
## Usage Summaries
Hasura Cloud provides [usage summaries](/observability/usage.mdx) for your GraphQL operations, allowing you to monitor
Hasura Cloud provides [usage summaries](usage.mdx) for your GraphQL operations, allowing you to monitor
the performance of your application and identify any performance bottlenecks. The usage summaries can be filtered by
time range, operation type, and other parameters, making it easy to pinpoint performance issues.
## GraphQL Operations
Hasura Cloud provides detailed metrics for your [GraphQL operations](/observability/operations.mdx), including query
Hasura Cloud provides detailed metrics for your [GraphQL operations](operations.mdx), including query
latency, request count, and error rate. This information can be used to monitor the performance of your application and
identify any issues that may be impacting your users.
## Websockets
Hasura Cloud supports [WebSockets](/observability/websockets.mdx), allowing you to build real-time applications that can
Hasura Cloud supports [WebSockets](websockets.mdx), allowing you to build real-time applications that can
push data updates to the client in real-time without having to continuously poll the server. Hasura Cloud provides
detailed metrics for your WebSocket connections, including connection count, message count, and error rate.
## Subscription Workers
Hasura Cloud provides [subscription workers](/observability/subscription-workers.mdx) that can be used to process
Hasura Cloud provides [subscription workers](subscription-workers.mdx) that can be used to process
subscriptions and deliver real-time updates to your clients. The subscription workers are fully managed and can be
scaled up or down based on your application's needs.
## Distributed Tracing
Hasura Cloud provides [distributed tracing](/observability/tracing.mdx) capabilities, allowing you to trace requests
Hasura Cloud provides [distributed tracing](/observability/cloud-monitoring/tracing.mdx) capabilities, allowing you to trace requests
across multiple services and identify any performance bottlenecks. The tracing information can be used to optimize your
application's performance and ensure that it is running smoothly.
## Query Tags
Hasura Cloud provides [query tags](/observability/query-tags.mdx), which can be used to tag your GraphQL queries and
Hasura Cloud provides [query tags](query-tags.mdx), which can be used to tag your GraphQL queries and
mutations with metadata. This metadata can be used to filter and group your usage summaries and metrics, making it easy
to identify trends and patterns in your application's usage.

View File

@ -0,0 +1,5 @@
{
"label": "Hasura Cloud integrations",
"position": 5,
"className": "cloud-icon"
}

View File

@ -1,5 +1,5 @@
---
sidebar_position: 3
sidebar_position: 6
description: Azure monitor Integration on Hasura Cloud
title: 'Cloud: Azure Monitor Integration'
keywords:
@ -17,7 +17,7 @@ import Thumbnail from '@site/src/components/Thumbnail';
import HeadingIcon from '@site/src/components/HeadingIcon';
import ProductBadge from '@site/src/components/ProductBadge';
# Azure Monitor Integration
# Azure Monitor Integration on Hasura Cloud
<ProductBadge standard pro ee />

View File

@ -1,6 +1,6 @@
---
sidebar_label: Datadog
sidebar_position: 2
sidebar_position: 4
description: Datadog Integration on Hasura Cloud
title: 'Cloud: Datadog Integration'
keywords:
@ -17,7 +17,7 @@ import Thumbnail from '@site/src/components/Thumbnail';
import HeadingIcon from '@site/src/components/HeadingIcon';
import ProductBadge from '@site/src/components/ProductBadge';
# Datadog Integration
# Datadog Integration on Hasura Cloud
<ProductBadge standard pro ee />

View File

@ -1,8 +1,7 @@
---
sidebar_position: 1
slug: index
title: 'Cloud: Integrations with external services'
description: Configure integrations with Hasura Cloud
title: 'Cloud: Observability integrations with external services'
description: Configure observability integrations with Hasura Cloud
keywords:
- hasura
- docs
@ -13,12 +12,14 @@ keywords:
- observability
- monitoring
- monitoring framework
sidebar_class_name: cloud-icon
slug: index
---
import HeadingIcon from '@site/src/components/HeadingIcon';
import ProductBadge from '@site/src/components/ProductBadge';
# Integrations with External Services
# Hasura Cloud observability integrations with external services
<ProductBadge standard pro ee />
@ -27,6 +28,17 @@ import ProductBadge from '@site/src/components/ProductBadge';
To be able to effectively monitor, diagnose and troubleshoot your application stack in production, Hasura Cloud will
export metrics, logs and traces to observability tools / APM vendors.
## Supported integrations
Check out the following guides on how to export telemetry data from Hasura Cloud to the observability tool of your
choice:
- [Datadog](datadog.mdx)
- [New Relic](newrelic.mdx)
- [Azure monitor](azure-monitor.mdx)
- [Prometheus](prometheus.mdx)
- [OpenTelemetry](opentelemetry.mdx)
## Log types
Hasura Cloud combines various
@ -128,7 +140,7 @@ Hasura Cloud APM integrations export the following metrics:
## Traces
Hasura Cloud APM integrations export the same trace logs as Hasura GraphQL Engine. You can find more information about
tracing [here](/observability/tracing.mdx).
tracing [here](/observability/cloud-monitoring/tracing.mdx).
:::info Sampling
@ -213,14 +225,3 @@ Hasura samples all trace logs and exports only 5% to your configured APM provide
```
</details>
## Supported integrations
Check out the following guides on how to export telemetry data from Hasura Cloud to the observability tool of your
choice:
- [Datadog](/observability/integrations/datadog.mdx)
- [New Relic](/observability/integrations/newrelic.mdx)
- [Azure monitor](/observability/integrations/azure-monitor.mdx)
- [Prometheus](/observability/prometheus/cloud-integration.mdx)
- [OpenTelemetry](/observability/integrations/opentelemetry.mdx)

View File

@ -1,5 +1,5 @@
---
sidebar_position: 2
sidebar_position: 5
description: New Relic Integration on Hasura Cloud
title: 'Cloud: New Relic Integration'
keywords:
@ -18,7 +18,7 @@ import Thumbnail from '@site/src/components/Thumbnail';
import HeadingIcon from '@site/src/components/HeadingIcon';
import ProductBadge from '@site/src/components/ProductBadge';
# New Relic Integration
# New Relic Integration on Hasura Cloud
<ProductBadge standard pro ee />

View File

@ -1,6 +1,6 @@
---
sidebar_label: OpenTelemetry
sidebar_position: 5
sidebar_position: 3
description: OpenTelemetry Integration on Hasura Cloud
title: 'Cloud: OpenTelemetry Integration'
keywords:
@ -18,7 +18,7 @@ import Thumbnail from '@site/src/components/Thumbnail';
import HeadingIcon from '@site/src/components/HeadingIcon';
import ProductBadge from '@site/src/components/ProductBadge';
# OpenTelemetry Integration
# Export Traces to OpenTelemetry Compliant Receiver from Hasura Cloud
<ProductBadge standard pro ee />
@ -29,7 +29,7 @@ receiver. This can be configured on the Integrations tab on the project's settin
:::info Note
For Hasura Cloud projects, the OpenTelemetry Integration is only available on the `Standard` (pay-as-you-go) tier and
For Hasura Cloud projects, the OpenTelemetry Integration is only available on the `Professional` tier and
above.
:::

View File

@ -1,6 +1,6 @@
---
sidebar_label: Integrate with Hasura Cloud
sidebar_position: 3
sidebar_label: Prometheus
sidebar_position: 2
description: Prometheus Integration on Hasura Cloud
title: 'Cloud: Prometheus Integration'
keywords:

View File

@ -1,5 +1,5 @@
---
title: 'Best Practices: Database observability'
title: 'Database observability'
description: Database observability best practices
keywords:
- hasura
@ -8,6 +8,7 @@ keywords:
- observability
- database observability
sidebar_label: Database observability
sidebar_position: 6
---
# Database Observability
@ -45,8 +46,8 @@ tables.
In this example, we will use Postgres and Datadog:
Log into a `psql` session as a user who has
[CREATEROLE privileges](https://www.postgresql.org/docs/current/static/app-createuser.html), create a `datadog`user and
password, and grant it read access to `pg_stat_database.`
[CREATEROLE privileges](https://www.postgresql.org/docs/current/static/app-createuser.html), create a `datadog`user and
password, and grant it read access to `pg_stat_database.`
```
create user datadog with password '<PASSWORD>';
@ -134,7 +135,7 @@ import Thumbnail from '@site/src/components/Thumbnail';
/>
- [Sign up](https://www.datadoghq.com) for Datadog.
- [Enable Datadog integration for your Hasura Project](/observability/integrations/datadog.mdx).
- [Enable Datadog integration for your Hasura Project](/observability/cloud/datadog.mdx).
- Set up Tags.
- [Enable Hasura Integration for Datadog](https://app.datadoghq.com/integrations).
- Create a New Dashboard and import the JSON file by following the instructions

View File

@ -0,0 +1,5 @@
{
"label": "Hasura EE integrations",
"position": 5,
"className": "enterprise-icon"
}

View File

@ -0,0 +1,38 @@
---
sidebar_position: 1
title: 'EE: Observability integrations with external services'
description: Configure observability integrations with Hasura EE
keywords:
- hasura
- docs
- cloud
- integrations
- exporter
- integration
- observability
- monitoring
- monitoring framework
sidebar_class_name: ee-icon
slug: index
---
import HeadingIcon from '@site/src/components/HeadingIcon';
import ProductBadge from '@site/src/components/ProductBadge';
# Hasura EE observability integrations with external services
<ProductBadge standard pro ee />
## Overview
To be able to effectively monitor, diagnose and troubleshoot your application stack in production, Hasura Enterprise
Edition will export metrics, logs and traces to observability tools / APM vendors.
## Supported integrations
Check out the following guides on how to export telemetry data from Hasura Cloud to the observability tool of your
choice:
- [Prometheus](prometheus/index.mdx)
- [OpenTelemetry](opentelemetry.mdx)

View File

@ -1,8 +1,8 @@
---
sidebar_label: Traces via OpenTelemetry
sidebar_label: OpenTelemetry
description: Traces via OpenTelemetry for Hasura Enterprise Edition
title: 'Traces via OpenTelemetry'
sidebar_class_name: beta-tag
sidebar_class_name: beta-icon
keywords:
- hasura
- docs
@ -18,7 +18,7 @@ import TabItem from '@theme/TabItem';
import Thumbnail from '@site/src/components/Thumbnail';
import ProductBadge from '@site/src/components/ProductBadge';
# Export Traces to OpenTelemetry Compliant Receiver
# Export Traces to OpenTelemetry Compliant Receiver from Hasura EE
<div className="badge-container">
<ProductBadge self />
@ -27,7 +27,7 @@ import ProductBadge from '@site/src/components/ProductBadge';
## Introduction
[Distributed traces](/observability/tracing.mdx) track and map journeys of user requests across various services or
[Distributed traces](/observability/cloud-monitoring/tracing.mdx) track and map journeys of user requests across various services or
components which can then be analyzed via observability tools.
Traces are typically used to diagnose or debug which part of your application could potentially be responsible for a

View File

@ -0,0 +1,4 @@
{
"label": "Prometheus",
"position": 2
}

View File

@ -1,7 +1,7 @@
---
slug: index
title: 'EE: Integrations with Prometheus'
description: Configure integrations with Hasura Enterprise Edition
description: Prometheus Integration on Hasura EE
keywords:
- hasura
- docs
@ -19,20 +19,16 @@ keywords:
import HeadingIcon from '@site/src/components/HeadingIcon';
import ProductBadge from '@site/src/components/ProductBadge';
# Integrate Prometheus with Hasura Enterprise Edition
<ProductBadge self pro ee />
# Prometheus Integration with Hasura EE
## Overview
In this section, you'll find information on how to integrate [Prometheus](https://prometheus.io/) with Hasura Enterprise
Edition:
In this section, you'll find information on how to integrate [Prometheus](https://prometheus.io/) with Hasura
Enterprise edition:
- [Available metrics](/observability/prometheus/metrics.mdx): Learn about metrics available to monitor the health,
performance and reliability of the Hasura GraphQL Engine.
- [Integrate with Hasura Cloud](/observability/prometheus/cloud-integration.mdx): Configure Prometheus integration with
Hasura Enterprise Edition.
- [Integrate Prometheus with Hasura EE and build a Grafana Dashboard](/observability/prometheus/grafana-dashboard.mdx):
- [Integrate Prometheus with Hasura EE and build a Grafana Dashboard](integrate-prometheus-grafana.mdx):
Configure Prometheus integration with Hasura Enterprise Edition.
- [Pre-built dashboards](/observability/prometheus/pre-built-dashboards.mdx): Learn about pre-built dashboards available
- [Pre-built dashboards](pre-built-dashboards.mdx): Learn about pre-built dashboards available
for Hasura Enterprise Edition.
- [Available metrics](metrics.mdx): Learn about metrics available to monitor the health,
performance and reliability of the Hasura GraphQL Engine.

View File

@ -1,7 +1,7 @@
---
sidebar_position: 2
sidebar_label: Integrate with Hasura EE
title: 'Integrate Prometheus with Hasura EE'
sidebar_label: Integrate and build a Grafana Dashboard
title: 'Integrate Prometheus with Hasura EE and build a Grafana Dashboard'
description: Install Prometheus server and Grafana to create a basic observability dashboard for Hasura.
keywords:
- hasura
@ -15,6 +15,7 @@ keywords:
- grafana
- monitoring
- monitoring framework
toc_max_heading_level: 4
---
import HeadingIcon from '@site/src/components/HeadingIcon';
@ -35,11 +36,47 @@ approaches depending on your use case:
- **Containerized**: If you are running Prometheus and Grafana in a containerized environment, follow the
[containerized installation](#containerized-installation) instructions.
## Self-hosted installation
## Step 1: Enable metrics endpoint
### Install and configure Prometheus
By default, the Prometheus metrics endpoint is disabled. To enable Prometheus metrics, configure the environment variable
below:
#### Step 1. Set up the environment
```bash
HASURA_GRAPHQL_ENABLED_APIS=metadata,graphql,config,metrics
```
Secure the Prometheus metrics endpoint with a secret:
```bash
HASURA_GRAPHQL_METRICS_SECRET=<secret>
```
```bash
curl 'http://127.0.0.1:8080/v1/metrics' -H 'Authorization: Bearer <secret>'
```
:::info Configure a secret
The metrics endpoint should be configured with a secret to prevent misuse and should not be exposed over the internet.
:::
:::tip High-cardinality Labels
Starting in `v2.26.0`, Hasura GraphQL Engine exposes metrics with high-cardinality labels by default.
You can disable
[the cardinality of labels for metrics](/deployment/graphql-engine-flags/reference.mdx#enable-high-cardinality-labels-for-metrics)
if you are experiencing high memory usage, which can be due to a large number of labels in the metrics (typically more
than 10000).
:::
## Option 1: Self-hosted installation
### Step 2: Install and configure Prometheus
#### Step 2.1: Set up the environment
You will need to create a Prometheus user and group, and a directory for Prometheus to store its data. You will also
need to create a directory for Prometheus to store its configuration files.
@ -54,7 +91,7 @@ sudo mkdir /var/lib/prometheus
for i in rules rules.d files_sd; do sudo mkdir -p /etc/prometheus/${i}; done
```
#### Step 2. Install Prometheus
#### Step 2.2: Install Prometheus
The following set of commands will help you download and install Prometheus:
@ -75,7 +112,7 @@ You can check to see if Prometheus is installed correctly by running the followi
prometheus --version
```
#### Step 3. Connect Prometheus to Hasura
#### Step 2.3: Connect Prometheus to Hasura
To connect Prometheus to Hasura, you will need to create a configuration file for Prometheus. The following commands
will help you do this:
@ -124,7 +161,7 @@ scrape_configs:
- targets: ['hasura_deployment_url:8080']
```
#### Step 4. Set firewall rules
#### Step 2.4: Set firewall rules
If you are using a firewall, you will need to set the following rules:
@ -132,7 +169,7 @@ If you are using a firewall, you will need to set the following rules:
sudo ufw allow 9090/tcp
```
#### Step 5. Set up a password for Prometheus web access
#### Step 2.5: Set up a password for Prometheus web access
To set up a password for Prometheus web access, you will need to create a hashed password. First, we'll create the YAML
file which will store the password. Inside `/etc/prometheus/`, run the following:
@ -177,7 +214,7 @@ To check yourself, use `promtool` to check the configuration file:
promtool check web-config /etc/prometheus/web.yml
```
#### Step 6. Restart Prometheus
#### Step 2.6: Restart Prometheus
To restart Prometheus, run the following command:
@ -201,9 +238,9 @@ go_gc_duration_seconds{quantile="0.25"} 0
# etc...
```
### Install and configure Grafana
### Step 3: Install and configure Grafana
#### Step 7. Install Grafana
#### Step 3.1: Install Grafana
You can install Grafana by running the following commands:
@ -224,7 +261,7 @@ After logging in, you will be prompted to change the default password. Set your
:::
#### Step 8. Create a Prometheus data source
#### Step 3.2: Create a Prometheus data source
In Grafana, from the settings icon on the sidebar, open the `Configuration` menu and select `Data Sources`. Then, click
on `Add data source` and select `Prometheus` as the type.
@ -238,7 +275,7 @@ everything is working correctly, you should see a green `Data source is working`
width="1000px"
/>
#### Step 9. Create a Prometheus graph
#### Step 3.3: Create a Prometheus graph
Click the graph title and select `Edit`. Then, select the `Metrics` tab and select your Prometheus data source. Then,
enter any Prometheus expression ino the `Query` field while using the `Metric` field to lookup metrics via autocomplete.
@ -263,11 +300,11 @@ status labels of a returned query result, separated by a dash, you could use the
:::
## Containerized installation
## Option 2: Containerized installation
### Install and configure Prometheus and Grafana
### Step 2: Install and configure Prometheus and Grafana
#### Step 1. Prepare the Prometheus configuration file
#### Step 2.1: Prepare the Prometheus configuration file
Create a file named `prometheus.yml` on your host with the following information:
@ -302,7 +339,7 @@ scrape_configs:
- targets: ['ip_address_of_hasura_installation:8080']
```
#### Step 2. Pull the Prometheus and Grafana Docker containers
#### Step 2.2: Pull the Prometheus and Grafana Docker containers
For Prometheus, run the following command:
@ -316,9 +353,9 @@ Then, for Grafana, run the following:
docker run -d -p 3000:3000 grafana/grafana-enterprise
```
### Configure Grafana
### Step 3: Configure Grafana
#### Step 3. Adding a Prometheus as a data source in Grafana
#### Step 3.1: Adding a Prometheus as a data source in Grafana
In Grafana, from the settings icon on the sidebar, open the `Configuration` menu and select `Data Sources`. Then, click
on `Add data source` and select `Prometheus` as the type.
@ -332,7 +369,7 @@ everything is working correctly, you should see a green `Alerting supported` mes
width="1000px"
/>
#### Step 5. Add Hasura metrics to the dashboard
#### Step 3.2: Add Hasura metrics to the dashboard
Click on the `Add Panel` icon in the top-right corner of the Grafana dashboard. Then, select `Add New Panel` or
`Add New Row`.

View File

@ -0,0 +1,412 @@
---
sidebar_label: Available Metrics
description: Metrics via Prometheus for Hasura Enterprise Edition
title: 'Enterprise Edition: Metrics via Prometheus'
keywords:
- hasura
- docs
- enterprise
sidebar_position: 4
toc_max_heading_level: 4
---
import ProductBadge from '@site/src/components/ProductBadge';
# Metrics exported via Prometheus
<ProductBadge self />
## Metrics exported
The following metrics are exported by Hasura GraphQL Engine:
### GraphQL request metrics
#### Hasura GraphQL execution time seconds
Execution time of successful GraphQL requests (excluding subscriptions). If more requests are falling in the higher
buckets, you should consider [tuning the performance](/deployment/performance-tuning.mdx).
| | |
| ------ | ------------------------------------------------------------ |
| Name | `hasura_graphql_execution_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
| Labels | `operation_type`: query \| mutation |
:::info GraphQL request execution time
- Uses wall-clock time, so it includes time spent waiting on I/O.
- Includes authorization, parsing, validation, planning, and execution (calls to databases, Remote Schemas).
:::
#### Hasura GraphQL requests total
Number of GraphQL requests received, representing the GraphQL query/mutation traffic on the server.
| | |
| ------ | -------------------------------------------------------------- |
| Name | `hasura_graphql_requests_total` |
| Type | Counter |
| Labels | `operation_type`: query \| mutation \| subscription \| unknown |
The `unknown` operation type will be returned for queries that fail authorization, parsing, or certain validations. The
`response_status` label will be `success` for successful requests and `failed` for failed requests.
### Hasura Event Triggers metrics
See more details on Event trigger observability [here](/event-triggers/observability-and-performance.mdx).
#### Event fetch time per batch
Hasura fetches the events in batches (by default 100) from the Hasura Event tables in the database. This metric
represents the time taken to fetch a batch of events from the database.
A higher metric indicates slower polling of events from the database, you should consider looking into the performance
of your database.
| | |
| ------ | ------------------------------------------------------------------------------------------ |
| Name | `hasura_event_fetch_time_per_batch_seconds` |
| Type | Histogram<br /><br />Buckets: 0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
| Labels | none |
#### Event invocations total
This metric represents the number of HTTP requests that have been made to the webhook server for delivering events.
| | |
| ------ | ---------------------------------------------------------- |
| Name | `hasura_event_invocations_total` |
| Type | Counter |
| Labels | `status`: success \| failed, `trigger_name`, `source_name` |
#### Event processed total
Total number of events processed. Represents the Event Trigger egress.
| | |
| ------ | ---------------------------------------------------------- |
| Name | `hasura_event_processed_total` |
| Type | Counter |
| Labels | `status`: success \| failed, `trigger_name`, `source_name` |
#### Event processing time
Time taken for an event to be processed.
| | |
| ------ | --------------------------------------------------------------------- |
| Name | `hasura_event_processing_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100 |
| Labels | `trigger_name`, `source_name` |
The processing of an event involves the following steps:
1. Hasura Engine fetching the event from Hasura Event tables in the database and adding it to the Hasura Events queue
2. An HTTP worker picking up the event from the Hasura Events queue
3. An HTTP worker delivering the event to the webhook
:::info Event delivery failure
Note, if the delivery of the event fails - the delivery of the event is retried based on its `next_retry_at`
configuration.
:::
This metric represents the time taken for an event to be delivered since it was created (if the first attempt) or retried
(after the first attempt). **This metric can be considered as the end-to-end processing time for an event.**
For e.g., say an event was created at `2021-01-01 10:00:30` and it has a `next_retry_at` configuration which says if the
event delivery fails, the event should be retried after 30 seconds.
At `2021-01-01 10:01:30`: the event was fetched from the Hasura Event tables, picked up by the HTTP worker, and the
delivery was attempted. The delivery failed and the `next_retry_at` of `2021-01-01 10:02:00` was set for the event.
Now at `2021-01-01 10:02:00`: the event was fetched again from the Hasura Event tables, picked up by the HTTP worker,
and the delivery was attempted at `2021-01-01 10:03: 30`. This time, the delivery was successful.
The processing time for the second delivery try would be:
Processing Time = event delivery time - event next retried time
Processing Time = `2021-01-01 10:03:30` - `2021-01-01 10:02:00` = `90 seconds`
#### Event queue time
Hasura fetches the events from the Hasura Event tables in the database and adds it to the Hasura Events queue. The event
queue time represents the time taken for an event to be picked up by the HTTP worker after it has been added to the
"Events Queue".
Higher value of this metric implies slow event processing. In this case, you can consider increasing the
[HTTP pool size](/deployment/graphql-engine-flags/reference.mdx/#events-http-pool-size) or optimizing the webhook
server.
| | |
| ------ | --------------------------------------------------------------------- |
| Name | `hasura_event_queue_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100 |
| Labels | `trigger_name`, `source_name` |
#### Event Triggers HTTP Workers
Current number of active Event Trigger HTTP workers. Compare this number to the
[HTTP pool size](/deployment/graphql-engine-flags/reference.mdx/#events-http-pool-size). Consider increasing it if the
metric is near the current configured value.
| | |
| ------ | ----------------------------------- |
| Name | `hasura_event_trigger_http_workers` |
| Type | Gauge |
| Labels | none |
#### Event webhook processing time
The time between when an HTTP worker picks an event for delivery to the time it sends the event payload to the webhook.
A higher processing time indicates slow webhook, you should try to optimize the event webhook.
| | |
| ------ | ------------------------------------------------------------ |
| Name | `hasura_event_webhook_processing_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
| Labels | `trigger_name`, `source_name` |
#### Events fetched per batch
Number of events fetched from the Hasura Event tables in the database per batch. This number should be equal or less
than the [events fetch batch size](/deployment/graphql-engine-flags/reference.mdx/#events-fetch-batch-size).
| | |
| ------ | --------------------------------- |
| Name | `hasura_events_fetched_per_batch` |
| Type | Gauge |
| Labels | none |
Since polling the database to continuously check if there are any pending events is an expensive operation, Hasura only
polls the database if there are any pending events. This metric can be used to understand if there are any pending
events in the Hasura Event Tables.
:::info Dependent on pending events
Note that Hasura only fetches events from the Hasura Event tables if there are any pending events. If there are no
pending events, this metric will be 0.
:::
### Subscription metrics
See more details on subscriptions observability [here](/subscriptions/observability-and-performance.mdx).
#### Active Subscriptions
Current number of active subscriptions, representing the subscription load on the server.
| | |
| ------ | ------------------------------------------------------------------------------------------ |
| Name | `hasura_active_subscriptions` |
| Type | Gauge |
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
#### Active Subscription Pollers
Current number of active subscription pollers. A subscription poller
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
similar subscriptions together. The value of this metric should be proportional to the number of uniquely parameterized
subscriptions (i.e., subscriptions with the same selection set, but with different input arguments and session variables
are multiplexed on the same poller). If this metric is high then it may be an indication that there are too many
uniquely parameterized subscriptions which could be optimized for better performance.
| | |
| ------ | -------------------------------------------- |
| Name | `hasura_active_subscription_pollers` |
| Type | Gauge |
| Labels | `subscription_kind`: streaming \| live-query |
#### Active Subscription Pollers in Error State
Current number of active subscription pollers that are in the error state. A subscription poller
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
similar subscriptions together. A non-zero value of this metric indicates that there are runtime errors in atleast one
of the subscription pollers that are running in Hasura. In most of the cases, runtime errors in subscriptions are caused
due to the changes at the data model layer and fixing the issue at the data model layer should automatically fix the
runtime errors.
| | |
| ------ | --------------------------------------------------- |
| Name | `hasura_active_subscription_pollers_in_error_state` |
| Type | Gauge |
| Labels | `subscription_kind`: streaming \| live-query |
#### Subscription Total Time
The time taken to complete running of one subscription poller.
A subscription poller
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
similar subscriptions together. This subscription poller runs every 1 second by default and queries the database with
the multiplexed query to fetch the latest data. In a polling instance, the poller not only queries the database but does
other operations like splitting similar queries into batches (by default 100) before fetching their data from the
database, etc. **This metric is the total time taken to complete all the operations in a single poll.**
In a single poll, the subscription poller splits the different variables for the multiplexed query into batches (by
default 100) and executes the batches. We use the `hasura_subscription_db_execution_time_seconds` metric to observe the
time taken for each batch to execute on the database. This means for a single poll there can be multiple values for
`hasura_subscription_db_execution_time_seconds` metric.
Let's look at an example to understand these metrics better:
Say we have 650 subscriptions with the same selection set but different input arguments. These 650 subscriptions will be
grouped to form one multiplexed query. A single poller would be created to run this multiplexed query. This poller will
run every 1 second.
The default batch size in hasura is 100, so the 650 subscriptions will be split into 7 batches for execution during a
single poll.
During a single poll:
- Batch 1: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
- Batch 2: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
- Batch 3: `hasura_subscription_db_execution_time_seconds` = 0.003 seconds
- Batch 4: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
- Batch 5: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
- Batch 6: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
- Batch 7: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
The `hasura_subscription_total_time_seconds` would be sum of all the database execution times shown in the batches, plus
some extra process time for other tasks the poller does during a single poll. In this case, it would be approximately
0.013 seconds.
| | |
| ------ | ------------------------------------------------------------------------------------------ |
| Name | `hasura_subscription_total_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.000001, 0.0001, 0.01, 0.1, 0.3, 1, 3, 10, 30, 100 |
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
#### Subscription Database Execution Time
The time taken to run the subscription's multiplexed query in the database for a single batch.
A subscription poller
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
similar subscriptions together. During every run (every 1 second by default), the poller splits the different variables
for the multiplexed query into batches (by default 100) and execute the batches. This metric observes the time taken for
each batch to execute on the database.
If this metric is high, then it may be an indication that the database is not performing as expected, you should
consider investigating the subscription query and see if indexes can help improve performance.
| | |
| ------ | ------------------------------------------------------------------------------------------ |
| Name | `hasura_subscription_db_execution_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.000001, 0.0001, 0.01, 0.1, 0.3, 1, 3, 10, 30, 100 |
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
### Cache metrics
See more details on caching metrics [here](/caching/caching-metrics.mdx)
#### Hasura cache request count
Tracks cache hit and miss requests, which helps in monitoring and optimizing cache utilization.
| | |
| ------ | ---------------------------- |
| Name | `hasura_cache_request_count` |
| Type | Counter |
| Labels | `status`: hit \| miss |
### Cron trigger metrics
#### Hasura cron events invocation total
Total number of cron events invoked, representing the number of invocations made for cron events.
| | |
| ------ | ------------------------------------- |
| Name | `hasura_cron_events_invocation_total` |
| Type | Counter |
| Labels | `status`: success \| failed |
#### Hasura cron events processed total
Total number of cron events processed, representing the number of invocations made for cron events. Compare this to
`hasura_cron_events_invocation_total`. A high difference between the two metrics indicates high failure rate of the cron
webhook.
| | |
| ------ | ------------------------------------ |
| Name | `hasura_cron_events_processed_total` |
| Type | Counter |
| Labels | `status`: success \| failed |
### One-off Scheduled events metrics
#### Hasura one-off events invocation total
Total number of one-off events invoked, representing the number of invocations made for one-off events.
| | |
| ------ | --------------------------------------- |
| Name | `hasura_oneoff_events_invocation_total` |
| Type | Counter |
| Labels | `status`: success \| failed |
#### Hasura one-off events processed total
Total number of one-off events processed, representing the number of invocations made for one-off events. Compare this
to `hasura_oneoff_events_invocation_total`. A high difference between the two metrics indicates high failure rate of the
one-off webhook.
| | |
| ------ | -------------------------------------- |
| Name | `hasura_oneoff_events_processed_total` |
| Type | Counter |
| Labels | `status`: success \| failed |
### Hasura HTTP connections
Current number of active HTTP connections (excluding WebSocket connections), representing the HTTP load on the server.
| | |
| ------ | ------------------------- |
| Name | `hasura_http_connections` |
| Type | Gauge |
| Labels | none |
### Hasura WebSocket connections
Current number of active WebSocket connections, representing the WebSocket load on the server.
| | |
| ------ | ------------------------------ |
| Name | `hasura_websocket_connections` |
| Type | Gauge |
| Labels | none |
### Hasura Postgres connections
Current number of active PostgreSQL connections. Compare this to
[pool settings](/api-reference/syntax-defs.mdx/#pgpoolsettings).
| | |
| ------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Name | `hasura_postgres_connections` |
| Type | Gauge |
| Labels | `source_name`: name of the database<br />`conn_info`: connection url string (password omitted) or name of the connection url environment variable<br />`role`: primary \| replica |
### Hasura source health
Health check status of a particular data source, corresponding to the output of `/healthz/sources`, with possible values
0 through 3 indicating, respectively: OK, TIMEOUT, FAILED, ERROR. See the
[Source Health Check API Reference](/api-reference/source-health.mdx) for details.
| | |
| ------ | ----------------------------------- |
| Name | `hasura_source_health` |
| Type | Gauge |
| Labels | `source_name`: name of the database |

View File

@ -1,5 +1,5 @@
---
sidebar_position: 10
sidebar_position: 3
sidebar_label: Pre-built Dashboards
title: 'Pre-built Dashboards'
description: Pre-built observability dashboards and alerting rules using Prometheus, Grafana, Alert Manager, Jeager

View File

@ -1,5 +0,0 @@
{
"label": "Integrations",
"position": 3,
"className": "cloud-icon"
}

View File

@ -1,11 +1,11 @@
---
title: 'Best Practices: Observability'
description: Observability best practices
keywords:
- hasura
- docs
- best practices
sidebar_label: Observability
sidebar_label: Best Practices
sidebar_position: 2
---
# Observability Best Practices
@ -52,20 +52,20 @@ Hasura Cloud projects include dashboards for observability. You will find your m
The following default observability options are enabled on your Hasura Cloud project:
- [Stats Overview](/observability/overview.mdx)
- [Errors](/observability/errors.mdx)
- [Usage Summaries](/observability/usage.mdx)
- [Operations](/observability/operations.mdx)
- [Websockets](/observability/websockets.mdx)
- [Subscription Workers](/observability/subscription-workers.mdx)
- [Distributed Tracing](/observability/tracing.mdx)
- [Query Tags](/observability/query-tags.mdx)
- [Errors](/observability/cloud-monitoring/errors.mdx)
- [Usage Summaries](/observability/cloud-monitoring/usage.mdx)
- [Operations](/observability/cloud-monitoring/operations.mdx)
- [Websockets](/observability/cloud-monitoring/websockets.mdx)
- [Subscription Workers](/observability/cloud-monitoring/subscription-workers.mdx)
- [Distributed Tracing](/observability/cloud-monitoring/tracing.mdx)
- [Query Tags](/observability/cloud-monitoring/query-tags.mdx)
#### Third-party observability platforms
If your organization has multiple applications and systems that need to be monitored, the most efficient way to do so is
via an observability platform. Hasura provides first-party integrations with multiple observability platforms and is
fully open-telemetry compliant. You can find a list of third-party observability platforms supported by Hasura
[here](/observability/integrations/index.mdx).
[here](/observability/cloud/index.mdx).
### Hasura Enterprise (self-hosted)
@ -84,7 +84,7 @@ to be exported to your observability platform using the appropriate log drivers.
You can export metrics of your Hasura Cloud project to Prometheus. You can configure this on the `Integrations` tab on
the project's settings page. You can find more information on this
[here](/observability/prometheus/cloud-integration.mdx).
[here](/observability/cloud/prometheus.mdx).
## Database observability
@ -105,12 +105,12 @@ be implemented:
- Memory
- Query Tags
[Query Tags](/observability/query-tags.mdx) are SQL comments that consist of `key=value` pairs that are appended to
[Query Tags](/observability/cloud-monitoring/query-tags.mdx) are SQL comments that consist of `key=value` pairs that are appended to
generated SQL statements. When you issue a query or mutation with query tags, the generated SQL has some extra
information. Database analytics tools can use that information (metadata) in these comments to analyze DB load and track
or monitor performance.
information. Database analytics tools can use that information (metadata) in these comments to analyze DB load
and track or monitor performance.
### Using Query Tags and pganalyze
### Using Query Tags and **pganalyze**
- Refer to documentation from [pganalyze](https://pganalyze.com/docs) for information on how to connect your database to
the analyzer.

View File

@ -38,18 +38,18 @@ import Observability from '@site/static/icons/features/observability.svg';
For our Enterprise customers, we have a set of pre-built dashboards and alerting rules configured with thew
Prometheus Grafana Jaeger stack, with which you can monitor and debug Hasura. These dashboards will be available
soon and integrated with Hasura Cloud too. You can read more and explore these dashboards &nbsp;
<VersionedLink to="/observability/prometheus/pre-built-dashboards/">here</VersionedLink>.
<VersionedLink to="/observability/enterprise-edition/prometheus/pre-built-dashboards/">here</VersionedLink>.
</p>
<h4>Quick Links</h4>
<ul>
<li>
<VersionedLink to="/observability/integrations/index">Connect an integration.</VersionedLink>
<VersionedLink to="/observability/cloud-monitoring/index/">Hasura Cloud Built-in Monitoring</VersionedLink>
</li>
<li>
<VersionedLink to="/observability/how-it-works">Learn how Observability works.</VersionedLink>
<VersionedLink to="/observability/enterprise-edition/prometheus/pre-built-dashboards">Pre-built dashboards with Hasura EE</VersionedLink>
</li>
<li>
<VersionedLink to="/observability/prometheus/pre-built-dashboards">Pre-built dashboards.</VersionedLink>
<VersionedLink to="/observability/db-observability/">Database observability</VersionedLink>
</li>
</ul>
</div>
@ -64,22 +64,22 @@ import Observability from '@site/static/icons/features/observability.svg';
## Using Observability
<div className="overview-gallery">
<VersionedLink to="/observability/integrations/opentelemetry/">
<VersionedLink to="/observability/cloud/opentelemetry/">
<div className="card">
<h3>OpenTelemetry</h3>
<p>Connect your Hasura GraphQL API to OpenTelemetry-compliant services.</p>
<p>Connect your Hasura Cloud project to OpenTelemetry-compliant services.</p>
</div>
</VersionedLink>
<VersionedLink to="/observability/prometheus/cloud-integration/">
<VersionedLink to="/observability/cloud/prometheus/">
<div className="card">
<h3>Prometheus</h3>
<p>Connect your Hasura GraphQL API to Prometheus.</p>
<p>Connect your Hasura Cloud project to Prometheus.</p>
</div>
</VersionedLink>
<VersionedLink to="/observability/integrations/datadog">
<VersionedLink to="/observability/cloud/datadog">
<div className="card">
<h3>Datadog</h3>
<p>Connect your Hasura GraphQL API to Datadog.</p>
<p>Connect your Hasura Cloud project to Datadog.</p>
</div>
</VersionedLink>
</div>

View File

@ -1,5 +0,0 @@
{
"label": "Prometheus",
"position": 2,
"className": "cloud-and-enterprise-icon"
}

View File

@ -1,208 +0,0 @@
---
sidebar_label: Available Metrics
description: Metrics via Prometheus for Hasura Enterprise Edition
title: 'Enterprise Edition: Metrics via Prometheus'
keywords:
- hasura
- docs
- enterprise
sidebar_position: 1
---
import ProductBadge from '@site/src/components/ProductBadge';
# Metrics via Prometheus
<ProductBadge self />
## Enable metrics endpoint
By default the Prometheus metrics endpoint is disabled. To enable Prometheus metrics, configure the environment variable
below:
```bash
HASURA_GRAPHQL_ENABLED_APIS=metadata,graphql,config,metrics
```
Secure the Prometheus metrics endpoint with a secret:
```bash
HASURA_GRAPHQL_METRICS_SECRET=<secret>
```
```bash
curl 'http://127.0.0.1:8080/v1/metrics' -H 'Authorization: Bearer <secret>'
```
:::info Configure a secret
The metrics endpoint should be configured with a secret to prevent misuse and should not be exposed over the internet.
:::
:::tip High-cardinality Labels
Starting in `v2.26.0`, Hasura GraphQL Engine exposes metrics with high-cardinality labels by default.
You can disable
[the cardinality of labels for metrics](/deployment/graphql-engine-flags/reference.mdx#enable-high-cardinality-labels-for-metrics)
if you are experiencing high memory usage, which can be due to a large number of labels in the metrics (typically more
than 10000).
:::
## Metrics exported
The following metrics are exported by Hasura GraphQL Engine:
### Hasura Event Triggers Metrics
The following metrics can be used to monitor the performance of Hasura Event Triggers system:
- [`hasura_event_fetch_time_per_batch_seconds`](/event-triggers/observability-and-performace.mdx/#event-fetch-time-per-batch)
- [`hasura_event_invocations_total`](/event-triggers/observability-and-performace.mdx/#event-invocations-total)
- [`hasura_event_processed_total`](/event-triggers/observability-and-performace.mdx/#event-processed-total)
- [`hasura_event_processing_time_seconds`](/event-triggers/observability-and-performace.mdx/#event-processing-time)
- [`hasura_event_queue_time_seconds`](/event-triggers/observability-and-performace.mdx/#event-queue-time)
- [`hasura_event_trigger_http_workers`](/event-triggers/observability-and-performace.mdx/#event-triggers-http-workers)
- [`hasura_event_webhook_processing_time_seconds`](/event-triggers/observability-and-performace.mdx/#event-webhook-processing-time)
- [`hasura_events_fetched_per_batch`](/event-triggers/observability-and-performace.mdx/#events-fetched-per-batch)
### Subscription Metrics
The following metrics can be used to monitor the performance of subscriptions:
- [`hasura_active_subscriptions`](/subscriptions/observability-and-performance.mdx#active-subscriptions)
- [`hasura_active_subscription_pollers`](/subscriptions/observability-and-performance.mdx#active-subscription-pollers)
- [`hasura_active_subscription_pollers_in_error_state`](/subscriptions/observability-and-performance.mdx#active-subscription-pollers-in-error-state)
- [`hasura_subscription_db_execution_time_seconds`](/subscriptions/observability-and-performance.mdx#subscription-database-execution-time)
- [`hasura_subscription_total_time_seconds`](/subscriptions/observability-and-performance.mdx#subscription-total-time)
### Hasura cache request count
Tracks cache hit and miss requests, which helps in monitoring and optimizing cache utilization. You can read more about
this [here](/caching/caching-metrics.mdx).
| | |
| ------ | ---------------------------- |
| Name | `hasura_cache_request_count` |
| Type | Counter |
| Labels | `status`: hit \| miss |
### Hasura cron events invocation total
Total number of cron events invoked, representing the number of invocations made for cron events.
| | |
| ------ | ------------------------------------- |
| Name | `hasura_cron_events_invocation_total` |
| Type | Counter |
| Labels | `status`: success \| failed |
### Hasura cron events processed total
Total number of cron events processed, representing the number of invocations made for cron events. Compare this to
`hasura_cron_events_invocation_total`. A high difference between the two metrics indicates high failure rate of the cron
webhook.
| | |
| ------ | ------------------------------------ |
| Name | `hasura_cron_events_processed_total` |
| Type | Counter |
| Labels | `status`: success \| failed |
### Hasura GraphQL execution time seconds
Execution time of successful GraphQL requests (excluding subscriptions). If more requests are falling in the higher
buckets, you should consider [tuning the performance](/deployment/performance-tuning.mdx).
| | |
| ------ | ------------------------------------------------------------ |
| Name | `hasura_graphql_execution_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
| Labels | `operation_type`: query \| mutation |
### Hasura GraphQL requests total
Number of GraphQL requests received, representing the GraphQL query/mutation traffic on the server.
| | |
| ------ | -------------------------------------------------------------- |
| Name | `hasura_graphql_requests_total` |
| Type | Counter |
| Labels | `operation_type`: query \| mutation \| subscription \| unknown |
The `unknown` operation type will be returned for queries that fail authorization, parsing, or certain validations. The
`response_status` label will be `success` for successful requests and `failed` for failed requests.
### Hasura HTTP connections
Current number of active HTTP connections (excluding WebSocket connections), representing the HTTP load on the server.
| | |
| ------ | ------------------------- |
| Name | `hasura_http_connections` |
| Type | Gauge |
| Labels | none |
### Hasura one-off events invocation total
Total number of one-off events invoked, representing the number of invocations made for one-off events.
| | |
| ------ | --------------------------------------- |
| Name | `hasura_oneoff_events_invocation_total` |
| Type | Counter |
| Labels | `status`: success \| failed |
### Hasura one-off events processed total
Total number of one-off events processed, representing the number of invocations made for one-off events. Compare this
to `hasura_oneoff_events_invocation_total`. A high difference between the two metrics indicates high failure rate of the
one-off webhook.
| | |
| ------ | -------------------------------------- |
| Name | `hasura_oneoff_events_processed_total` |
| Type | Counter |
| Labels | `status`: success \| failed |
### Hasura postgres connections
Current number of active PostgreSQL connections. Compare this to
[pool settings](/api-reference/syntax-defs.mdx/#pgpoolsettings).
| | |
| ------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Name | `hasura_postgres_connections` |
| Type | Gauge |
| Labels | `source_name`: name of the database<br />`conn_info`: connection url string (password omitted) or name of the connection url environment variable<br />`role`: primary \| replica |
### Hasura source health
Health check status of a particular data source, corresponding to the output of `/healthz/sources`, with possible values
0 through 3 indicating, respectively: OK, TIMEOUT, FAILED, ERROR. See the
[Source Health Check API Reference](/api-reference/source-health.mdx) for details.
| | |
| ------ | ----------------------------------- |
| Name | `hasura_source_health` |
| Type | Gauge |
| Labels | `source_name`: name of the database |
### Hasura WebSocket connections
Current number of active WebSocket connections, representing the WebSocket load on the server.
| | |
| ------ | ------------------------------ |
| Name | `hasura_websocket_connections` |
| Type | Gauge |
| Labels | none |
:::info GraphQL request execution time
- Uses wall-clock time, so it includes time spent waiting on I/O.
- Includes authorization, parsing, validation, planning, and execution (calls to databases, Remote Schemas).
:::

View File

@ -6,7 +6,7 @@ keywords:
- docs
- deployment
- allow list
sidebar_position: 8
sidebar_position: 4
---
import Tabs from '@theme/Tabs';

View File

@ -7,7 +7,7 @@ keywords:
- enterprise
- security
- limits
sidebar_position: 2
sidebar_position: 5
sidebar_label: API limits
sidebar_class_name: cloud-and-enterprise-icon
title: 'Cloud & Enterprise Edition: API Limits'

View File

@ -1,6 +1,6 @@
---
title: 'Cloud: Marketplaces'
description: Hasura Cloud Marketplaces
title: 'Proactive vulnerability scanning'
description: Proactive vulnerability scanning
keywords:
- hasura
- Vulnerability
@ -11,6 +11,7 @@ keywords:
- cloud
sidebar_label: Proactive vulnerability scanning
sidebar_class_name: cloud-and-enterprise-icon
sidebar_position: 7
---
import ProductBadge from '@site/src/components/ProductBadge';

View File

@ -10,7 +10,7 @@ keywords:
- introspection
- disable
- GraphQL
sidebar_position: 3
sidebar_position: 6
sidebar_label: Disable GraphQL introspection
sidebar_class_name: cloud-and-enterprise-icon
---

View File

@ -29,7 +29,7 @@ import Security from '@site/static/icons/features/security.svg';
<h4>Quick Links</h4>
<ul>
<li>
<VersionedLink to="/deployment/best-practices/security">
<VersionedLink to="/security/security-best-practices">
Get started with Security best practices.
</VersionedLink>
</li>
@ -52,7 +52,7 @@ import Security from '@site/static/icons/features/security.svg';
<p>Use the Allow List of operations to restrict the operations that can be performed by a role.</p>
</div>
</VersionedLink>
<VersionedLink to="/security/multiple-jwt-secrets">
<VersionedLink to="/auth/authentication/multiple-jwt-secrets">
<div className="card">
<h3>Multiple JWT Secrets</h3>
<p>Use multiple JWT secrets to support multiple JWT issuers.</p>

View File

@ -4,7 +4,9 @@ keywords:
- hasura
- docs
- best practices
sidebar_label: Security
sidebar_label: Best Practices
toc_max_heading_level: 3
sidebar_position: 3
---
import Thumbnail from '@site/src/components/Thumbnail';
@ -36,14 +38,14 @@ Specifics about each security best practice can be found below.
## Hasura GraphQL Engine
#### Restrict Access
### Restrict Access
Restrict knowledge of admin secrets to the minimally required team members as an admin secret provides unrestricted
access to the Hasura GraphQL Engine. SSO collaboration should be used to grant project access without sharing an admin
key. Subsequently, implement a plan to rotate admin secrets to limit the exposure of an admin secret being shared too
broadly.
[Multiple admin secrets](/security/multiple-admin-secrets.mdx) should be used in situations where admin secrets have
[Multiple admin secrets](/auth/authentication/multiple-admin-secrets.mdx) should be used in situations where admin secrets have
different rotation timelines or when granting temporary access is needed.
Leverage [allowed operations lists](https://www.graphql-code-generator.com/plugins/other/hasura-allow-list) whenever
@ -61,7 +63,7 @@ The admin role will bypass the allowed operations list.
:::
#### Limit the API
### Limit the API
The allowed operations lists workflow is ideal for private/internal APIs or APIs with well understood and clearly
defined operations. Public APIs or APIs with less defined expected operations should additionally configure
@ -72,7 +74,7 @@ defined operations. Public APIs or APIs with less defined expected operations sh
- [Limit rows](/auth/authorization/permissions/row-fetch-limit.mdx) returned by a select operation.
#### Permissions
### Permissions
The row-based access control configuration dictates permissions for the GraphQL API. It is critical that these
permissions be configured correctly in order to prevent unauthorized or unintended access to the GraphQL API.
@ -86,7 +88,7 @@ permissions be configured correctly in order to prevent unauthorized or unintend
[allowed operations lists](https://www.graphql-code-generator.com/plugins/other/hasura-allow-list) and
[disabling schema introspection](/security/disable-graphql-introspection.mdx).
#### Disable development components
### Disable development components
There are several components of Hasura GraphQL Engine that are crucial for development efforts but should be disabled
for a production environment. However, it should be expected that some of these components may need to be temporarily
@ -100,7 +102,7 @@ re-enabled if a situation arises where a production environment specific issue r
- [Disable schema introspection](/security/disable-graphql-introspection.mdx).
#### Additional environment variables
### Additional environment variables
There are specific environment variables that should be configured to ensure appropriate communication to the Hasura
GraphQL Engine server.

View File

@ -51,112 +51,23 @@ For more details on how Hasura executes subscriptions, refer to the
<ProductBadge self />
Hasura exposes a set of Prometheus Metrics that can be used to monitor the subscriptions system and help diagnose
performance issues.
Hasura EE exposes a set of [Prometheus Metrics](/observability/enterprise-edition/prometheus/metrics.mdx/#subscription-metrics)
that can be used to monitor the subscriptions system and help diagnose performance issues.
### Active Subscriptions
:::info More on Observability
Current number of active subscriptions, representing the subscription load on the server.
To find out more about observability, including best practices, check out
[observability docs section](/observability/overview.mdx).
| | |
| ------ | ------------------------------------------------------------------------------------------ |
| Name | `hasura_active_subscriptions` |
| Type | Gauge |
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
The following metrics can be used to monitor the performance of subscriptions:
### Active Subscription Pollers
:::
Current number of active subscription pollers. A subscription poller
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
similar subscriptions together. The value of this metric should be proportional to the number of uniquely parameterized
subscriptions (i.e., subscriptions with the same selection set, but with different input arguments and session variables
are multiplexed on the same poller). If this metric is high then it may be an indication that there are too many
uniquely parameterized subscriptions which could be optimized for better performance.
| | |
| ------ | -------------------------------------------- |
| Name | `hasura_active_subscription_pollers` |
| Type | Gauge |
| Labels | `subscription_kind`: streaming \| live-query |
### Active Subscription Pollers in Error State
Current number of active subscription pollers that are in the error state. A subscription poller
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
similar subscriptions together. A non-zero value of this metric indicates that there are runtime errors in atleast one
of the subscription pollers that are running in Hasura. In most of the cases, runtime errors in subscriptions are caused
due to the changes at the data model layer and fixing the issue at the data model layer should automatically fix the
runtime errors.
| | |
| ------ | --------------------------------------------------- |
| Name | `hasura_active_subscription_pollers_in_error_state` |
| Type | Gauge |
| Labels | `subscription_kind`: streaming \| live-query |
### Subscription Total Time
The time taken to complete running of one subscription poller.
A subscription poller
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
similar subscriptions together. This subscription poller runs every 1 second by default and queries the database with
the multiplexed query to fetch the latest data. In a polling instance, the poller not only queries the database but does
other operations like splitting similar queries into batches (by default 100) before fetching their data from the
database, etc. **This metric is the total time taken to complete all the operations in a single poll.**
In a single poll, the subscription poller splits the different variables for the multiplexed query into batches (by
default 100) and executes the batches. We use the `hasura_subscription_db_execution_time_seconds` metric to observe the
time taken for each batch to execute on the database. This means for a single poll there can be multiple values for
`hasura_subscription_db_execution_time_seconds` metric.
Let's look at an example to understand these metrics better:
Say we have 650 subscriptions with the same selection set but different input arguments. These 650 subscriptions will be
grouped to form one multiplexed query. A single poller would be created to run this multiplexed query. This poller will
run every 1 second.
The default batch size in hasura is 100, so the 650 subscriptions will be split into 7 batches for execution during a
single poll.
During a single poll:
- Batch 1: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
- Batch 2: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
- Batch 3: `hasura_subscription_db_execution_time_seconds` = 0.003 seconds
- Batch 4: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
- Batch 5: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
- Batch 6: `hasura_subscription_db_execution_time_seconds` = 0.001 seconds
- Batch 7: `hasura_subscription_db_execution_time_seconds` = 0.002 seconds
The `hasura_subscription_total_time_seconds` would be sum of all the database execution times shown in the batches, plus
some extra process time for other tasks the poller does during a single poll. In this case, it would be approximately
0.013 seconds.
| | |
| ------ | ------------------------------------------------------------------------------------------ |
| Name | `hasura_subscription_total_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.000001, 0.0001, 0.01, 0.1, 0.3, 1, 3, 10, 30, 100 |
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
### Subscription Database Execution Time
The time taken to run the subscription's multiplexed query in the database for a single batch.
A subscription poller
[multiplexes](https://github.com/hasura/graphql-engine/blob/master/architecture/live-queries.md#idea-3-batch-multiple-live-queries-into-one-sql-query)
similar subscriptions together. During every run (every 1 second by default), the poller splits the different variables
for the multiplexed query into batches (by default 100) and execute the batches. This metric observes the time taken for
each batch to execute on the database.
If this metric is high, then it may be an indication that the database is not performing as expected, you should
consider investigating the subscription query and see if indexes can help improve performance.
| | |
| ------ | ------------------------------------------------------------------------------------------ |
| Name | `hasura_subscription_db_execution_time_seconds` |
| Type | Histogram<br /><br />Buckets: 0.000001, 0.0001, 0.01, 0.1, 0.3, 1, 3, 10, 30, 100 |
| Labels | `subscription_kind`: streaming \| live-query, `operation_name`, `parameterized_query_hash` |
- [`hasura_active_subscriptions`](/observability/enterprise-edition/prometheus/metrics.mdx#active-subscriptions)
- [`hasura_active_subscription_pollers`](/observability/enterprise-edition/prometheus/metrics.mdx#active-subscription-pollers)
- [`hasura_active_subscription_pollers_in_error_state`](/observability/enterprise-edition/prometheus/metrics.mdx#active-subscription-pollers-in-error-state)
- [`hasura_subscription_db_execution_time_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx#subscription-database-execution-time)
- [`hasura_subscription_total_time_seconds`](/observability/enterprise-edition/prometheus/metrics.mdx#subscription-total-time)
## Golden Signals for subscriptions
@ -191,7 +102,7 @@ number of Hasura instances to handle the load.
Errors in subscriptions can be monitored using the following metrics
- [`hasura_graphql_requests_total{type="subscription",response_status="error"}`](/observability/prometheus/metrics.mdx#hasura-graphql-requests-total):
- [`hasura_graphql_requests_total{type="subscription",response_status="error"}`](/observability/enterprise-edition/prometheus/metrics.mdx#hasura-graphql-requests-total):
Total number of errors that happen before the subscription is started (i.e. validation, parsing and authorization
errors).
- [`hasura_active_subscription_pollers_in_error_state`](#active-subscription-pollers-in-error-state): Number of
@ -210,7 +121,7 @@ To monitor the saturation for subscriptions, you can monitor the following:
- CPU and memory usage of Hasura instances.
- For postgres backends, you can monitor the
[`hasura_postgres_connections`](/observability/prometheus/metrics.mdx#hasura-postgres-connections) metric to see the
[`hasura_postgres_connections`](/observability/enterprise-edition/prometheus/metrics.mdx#hasura-postgres-connections) metric to see the
number of connections opened by Hasura with the database.
- P99 of the [`hasura_subscription_total_time_seconds`](#subscription-total-time) metric.

View File

@ -27,7 +27,7 @@ In order to find out about the origins of an error, it can be helpful to check t
:::info Metrics and distributed tracing in Hasura Cloud
Hasura Cloud includes metrics and distributed tracing which makes troubleshooting faster. For more information, see
[Metrics](/observability/overview.mdx) and [Tracing](/observability/tracing.mdx) in Hasura Cloud.
[Metrics](/observability/overview.mdx) and [Tracing](/observability/cloud-monitoring/tracing.mdx) in Hasura Cloud.
:::

View File

@ -0,0 +1,13 @@
import React from 'react';
import styles from './styles.module.scss';
const BetaTag: React.FC = (): React.ReactElement => {
return (
<div className={styles['beta-tag']}>
Beta
</div>
)
}
export default BetaTag;

View File

@ -0,0 +1,9 @@
.beta-tag {
font-size: 0.7rem;
font-weight: normal;
margin-left: 5px;
color: white;
background-color: var(--ifm-color-gray-600);
padding: 2px 5px;
border-radius: 5px;
}

View File

@ -11,6 +11,7 @@ import EnterpriseLight from '@site/static/icons/enterprise-dark.svg';
import EnterpriseDark from '@site/static/icons/enterprise-light.svg';
import CloudLight from '@site/static/icons/cloud-dark.svg';
import CloudDark from '@site/static/icons/cloud-light.svg';
import BetaTag from "@site/src/components/BetaTag/BetaTag";
export default function DocSidebarItemLink({ item, onItemClick, activePath, level, index, ...props }) {
const { href, label, className, autoAddBaseUrl } = item;
const isActive = isActiveSidebarItem(item, activePath);
@ -24,6 +25,20 @@ export default function DocSidebarItemLink({ item, onItemClick, activePath, leve
return isDarkTheme ? <EnterpriseDark /> : <EnterpriseLight />;
case 'cloud-icon':
return isDarkTheme ? <CloudDark /> : <CloudLight />;
case 'enterprise-icon-and-beta':
return (
<div className={styles['sidebar_link_wrapper']}>
{isDarkTheme ? (
<>
<EnterpriseDark />{' '}<BetaTag/>
</>
) : (
<>
<EnterpriseLight />{' '}<BetaTag/>
</>
)}
</div>
);
case 'cloud-and-enterprise-icon':
return (
<div className={styles['cloud-ee-container']}>
@ -38,6 +53,12 @@ export default function DocSidebarItemLink({ item, onItemClick, activePath, leve
)}
</div>
);
case 'beta-icon':
return (
<div className={styles['sidebar_link_wrapper']}>
<BetaTag/>
</div>
);
default:
return null;
}

File diff suppressed because it is too large Load Diff