mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-14 17:02:49 +03:00
docs,community: update the observability boilerplate to use OpenTelemetry logs exporter
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/10318 Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Sean Park-Ross <94021366+seanparkross@users.noreply.github.com> GitOrigin-RevId: c5a5bddc25e1b9add3712f6345883d379b468751
This commit is contained in:
parent
33bafdf450
commit
61b527033d
@ -1 +1,2 @@
|
||||
.env
|
||||
.env
|
||||
gcp-sa.json
|
@ -1,12 +1,20 @@
|
||||
# Hasura Enterprise Observability Demo
|
||||
|
||||
An all-in-one observability demo for Hasura GraphQL Engine Enterprise Edition with Prometheus, Jaeger, Alert-manager, Node Exporter, Loki Logs and Grafana dashboards.
|
||||
An all-in-one observability demo for Hasura GraphQL Engine Enterprise Edition with Prometheus, Jaeger, Alert-manager, Node Exporter, Loki Logs, OpenTelemetry and Grafana dashboards.
|
||||
|
||||
## Get Started
|
||||
|
||||
- Copy `dotenv` to `.env` and configure secrets and EE License Key.
|
||||
- Try `docker-compose` locally with `docker-compose up -d`, or run the Docker Swarm stack with scripts that are in the `util` folder.
|
||||
|
||||
The default configuration enables the Native Prometheus exporter. If you want to push metrics to 3rd-party services that support OpenTelemetry, first disable the `metrics` API and configure the OpenTelemetry metrics endpoint to the `otel-collector` service. `Browse the console -> Settings -> OpenTelemetry Exporter` and configure the metrics endpoint.
|
||||
|
||||
```ini
|
||||
HGE_ENABLED_APIS=metadata,graphql,config
|
||||
```
|
||||
|
||||
![OpenTelemetry Exporter](./assets/images/opentelemetry-export-config.png)
|
||||
|
||||
## Components
|
||||
|
||||
Here's a list of all the services that are created:
|
||||
|
Binary file not shown.
After Width: | Height: | Size: 102 KiB |
@ -15,7 +15,7 @@ services:
|
||||
- "${REDIS_PORT}:6379"
|
||||
|
||||
postgres:
|
||||
container_name: hge-metadata-pg
|
||||
container_name: hge-postgres
|
||||
image: ${PG_IMAGE}
|
||||
restart: always
|
||||
ports:
|
||||
@ -31,6 +31,7 @@ services:
|
||||
POSTGRESQL_DATABASE: ${PG_DATABASE}
|
||||
|
||||
postgres-replica:
|
||||
container_name: hge-postgres-replica
|
||||
image: "bitnami/postgresql:15"
|
||||
restart: always
|
||||
volumes:
|
||||
@ -54,16 +55,12 @@ services:
|
||||
- postgres
|
||||
- postgres-replica
|
||||
- redis
|
||||
labels:
|
||||
logging: "promtail"
|
||||
logging_jobname: "graphql-engine"
|
||||
restart: always
|
||||
## uncomment the auto migration image and volumes to explore the example
|
||||
volumes:
|
||||
- ./examples/hasura/migrations:/hasura-migrations
|
||||
- ./examples/hasura/metadata:/hasura-metadata
|
||||
environment:
|
||||
## generate a Hasura Pro Key from the Hasura EE Control Plane for this project and uncomment the below line
|
||||
HASURA_GRAPHQL_EE_LICENSE_KEY: ${HGE_EE_LICENSE_KEY}
|
||||
HASURA_GRAPHQL_ADMIN_SECRET: ${HGE_ADMIN_SECRET}
|
||||
## The metadata database for this Hasura GraphQL project. Can be changed to a managed postgres instance
|
||||
@ -77,7 +74,7 @@ services:
|
||||
## enable debugging mode. It is recommended to disable this in production
|
||||
HASURA_GRAPHQL_DEV_MODE: "true"
|
||||
HASURA_GRAPHQL_ENABLED_LOG_TYPES: ${HGE_ENABLED_LOG_TYPES}
|
||||
HASURA_GRAPHQL_LOG_LEVEL: debug
|
||||
HASURA_GRAPHQL_LOG_LEVEL: info
|
||||
HASURA_GRAPHQL_CONSOLE_ASSETS_DIR: "/srv/console-assets"
|
||||
HASURA_GRAPHQL_REDIS_URL: "redis://redis:6379"
|
||||
HASURA_GRAPHQL_RATE_LIMIT_REDIS_URL: "redis://redis:6379"
|
||||
@ -86,6 +83,9 @@ services:
|
||||
HASURA_GRAPHQL_ENABLED_APIS: ${HGE_ENABLED_APIS}
|
||||
HASURA_GRAPHQL_UNAUTHORIZED_ROLE: anonymous
|
||||
|
||||
JAEGER_HOST: http://jaeger:4318
|
||||
OTEL_COLLECTOR_HOST: http://otel-collector:4318
|
||||
|
||||
## enable basic authorization for the Prometheus metrics endpoint
|
||||
## remember to set the secret credential in the Prometheus target config
|
||||
# HASURA_GRAPHQL_METRICS_SECRET: ${HGE_METRICS_SECRET}
|
||||
@ -192,10 +192,24 @@ services:
|
||||
- ${LOKI_PORT}:3100
|
||||
command: -config.file=/etc/loki/local-config.yaml
|
||||
|
||||
promtail:
|
||||
image: ${PROMTAIL_IMAGE}
|
||||
otel-collector:
|
||||
container_name: hge-otel-collector
|
||||
image: ${OTEL_COLLECTOR_IMAGE}
|
||||
command:
|
||||
- --config=/etc/otel-collector-config.yaml
|
||||
volumes:
|
||||
- ./promtail/config.yaml:/etc/promtail/config.yml:ro
|
||||
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
restart: always
|
||||
- ./otel-collector/otel-collector-config.yaml:/etc/otel-collector-config.yaml
|
||||
# - ./gcp-sa.json:/etc/otel/gcp-sa.json
|
||||
ports:
|
||||
- "1888" # pprof extension
|
||||
- "8888" # Prometheus metrics exposed by the collector
|
||||
- "8889" # Prometheus exporter metrics
|
||||
- "13133" # health_check extension
|
||||
- "4317" # OTLP gRPC receiver
|
||||
- "4318" # OTLP http receiver
|
||||
- "55679" # zpages extension
|
||||
environment:
|
||||
DD_API_KEY: ${DD_API_KEY}
|
||||
HONEYCOMB_API_KEY: ${HONEYCOMB_API_KEY}
|
||||
HONEYCOMB_DATASET: ${HONEYCOMB_DATASET}
|
||||
# GOOGLE_APPLICATION_CREDENTIALS: /etc/otel/gcp-sa.json
|
@ -1,5 +1,5 @@
|
||||
# HGE
|
||||
HGE_IMAGE=hasura/graphql-engine:v2.33.0-beta.1.cli-migrations-v3
|
||||
HGE_IMAGE=hasura/graphql-engine:v2.35.0.cli-migrations-v3
|
||||
HGE_PORT=8080
|
||||
HGE_EE_LICENSE_KEY=
|
||||
HGE_ADMIN_SECRET=myadminsecretkey
|
||||
@ -53,3 +53,13 @@ LOKI_PORT=3100
|
||||
|
||||
# Promtail
|
||||
PROMTAIL_IMAGE=grafana/promtail:2.8.2
|
||||
|
||||
# OpenTelemetry Collector
|
||||
OTEL_COLLECTOR_IMAGE=otel/opentelemetry-collector-contrib:0.84.0
|
||||
|
||||
# DataDog
|
||||
DD_API_KEY=
|
||||
|
||||
# HoneyComb
|
||||
HONEYCOMB_API_KEY=
|
||||
HONEYCOMB_DATASET=
|
@ -3,7 +3,9 @@ data_types:
|
||||
- traces
|
||||
exporter_otlp:
|
||||
headers: []
|
||||
otlp_traces_endpoint: http://jaeger:4318/v1/traces
|
||||
otlp_logs_endpoint: "{{OTEL_COLLECTOR_HOST}}/v1/logs"
|
||||
otlp_metrics_endpoint: "{{OTEL_COLLECTOR_HOST}}/v1/metrics"
|
||||
otlp_traces_endpoint: "{{JAEGER_HOST}}/v1/traces"
|
||||
protocol: http/protobuf
|
||||
resource_attributes: []
|
||||
batch_span_processor:
|
||||
|
@ -1,52 +1,52 @@
|
||||
{
|
||||
"__elements": {},
|
||||
"__inputs": [
|
||||
{
|
||||
"description": "",
|
||||
"label": "Loki",
|
||||
"name": "DS_LOKI",
|
||||
"label": "Loki",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "loki",
|
||||
"pluginName": "Loki",
|
||||
"type": "datasource"
|
||||
"pluginName": "Loki"
|
||||
},
|
||||
{
|
||||
"description": "",
|
||||
"label": "Jaeger",
|
||||
"name": "DS_JAEGER",
|
||||
"label": "Jaeger",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "jaeger",
|
||||
"pluginName": "Jaeger",
|
||||
"type": "datasource"
|
||||
"pluginName": "Jaeger"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "9.5.2"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "jaeger",
|
||||
"name": "Jaeger",
|
||||
"type": "datasource",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "logs",
|
||||
"name": "Logs",
|
||||
"type": "panel",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "loki",
|
||||
"name": "Loki",
|
||||
"type": "datasource",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "traces",
|
||||
"name": "Traces",
|
||||
"type": "panel",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
@ -102,8 +102,8 @@
|
||||
"type": "loki",
|
||||
"uid": "${DS_LOKI}"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"expr": "{job=~\"$job\", instance=~\"$instance\"} |~ `(?i)$search_keyword` | json | type =~ `$log_type`",
|
||||
"editorMode": "code",
|
||||
"expr": "{job=~\"$job\", exporter=~\"$exporter\"} |~ `(?i)$search_keyword` | json | attributes_type =~ `$log_type`",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
@ -141,12 +141,14 @@
|
||||
"refresh": "",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["hasura"],
|
||||
"tags": [
|
||||
"hasura"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"selected": true,
|
||||
"text": "Loki",
|
||||
"value": "Loki"
|
||||
},
|
||||
@ -158,6 +160,7 @@
|
||||
"name": "DS_LOKI",
|
||||
"options": [],
|
||||
"query": "loki",
|
||||
"queryValue": "",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
@ -217,12 +220,12 @@
|
||||
"definition": "",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Instance",
|
||||
"label": "Exporter",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"name": "exporter",
|
||||
"options": [],
|
||||
"query": {
|
||||
"label": "instance",
|
||||
"label": "exporter",
|
||||
"refId": "LokiVariableQueryEditor-VariableQuery",
|
||||
"stream": "",
|
||||
"type": 1
|
||||
@ -257,8 +260,12 @@
|
||||
{
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": ["All"],
|
||||
"value": ["$__all"]
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"description": "Hasura Log Type",
|
||||
"hide": 0,
|
||||
@ -337,7 +344,7 @@
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-5m",
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
@ -346,4 +353,4 @@
|
||||
"uid": "b4a37738-ba1b-4252-8905-516de01093ea",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
}
|
@ -25,8 +25,25 @@ datasources:
|
||||
url: http://jaeger:16686/
|
||||
# basicAuth: true
|
||||
# basicAuthUser: my_user
|
||||
# editable: true
|
||||
editable: true
|
||||
isDefault: false
|
||||
jsonData:
|
||||
tracesToLogsV2:
|
||||
datasourceUid: 'Loki'
|
||||
spanStartTimeShift: '1h'
|
||||
spanEndTimeShift: '-1h'
|
||||
filterByTraceID: false
|
||||
filterBySpanID: false
|
||||
customQuery: true
|
||||
query: "{exporter=\"OTLP\"} | json | traceid=`$${__span.traceId}`"
|
||||
nodeGraph:
|
||||
enabled: true
|
||||
traceQuery:
|
||||
timeShiftEnabled: true
|
||||
spanStartTimeShift: '1h'
|
||||
spanEndTimeShift: '-1h'
|
||||
spanBar:
|
||||
type: 'None'
|
||||
|
||||
- name: Loki
|
||||
type: loki
|
||||
@ -34,4 +51,4 @@ datasources:
|
||||
url: http://loki:3100
|
||||
version: 1
|
||||
editable: false
|
||||
isDefault: true
|
||||
isDefault: false
|
||||
|
@ -0,0 +1,102 @@
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
http:
|
||||
|
||||
exporters:
|
||||
prometheus:
|
||||
endpoint: "0.0.0.0:8889"
|
||||
|
||||
# Data sources: traces, metrics, logs
|
||||
logging:
|
||||
verbosity: detailed
|
||||
|
||||
loki:
|
||||
endpoint: http://loki:3100/loki/api/v1/push
|
||||
|
||||
# datadog:
|
||||
# api:
|
||||
# key: ${DD_API_KEY}
|
||||
|
||||
## deprecated: can push traces directly to http://jaeger:4318/v1/traces
|
||||
## use this pipeline if you want exporting traces to multi-channels
|
||||
##
|
||||
jaeger:
|
||||
endpoint: jaeger:14250
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
## Honeycomb exporter
|
||||
## Note: logs are sent to the service.name in trace (hasura), not to x-honeycomb-dataset
|
||||
# otlp:
|
||||
# endpoint: api.honeycomb.io:443
|
||||
# headers:
|
||||
# x-honeycomb-team: ${HONEYCOMB_API_KEY}
|
||||
# x-honeycomb-dataset: ${HONEYCOMB_DATASET}
|
||||
|
||||
## Google Cloud
|
||||
## You need to create a service account with following roles
|
||||
## - roles/monitoring.metricWriter
|
||||
## - roles/cloudtrace.agent
|
||||
## - roles/logging.logWriter
|
||||
## download and rename to gcp-sa.json so the otel-collector can mount the service account
|
||||
##
|
||||
# googlecloud:
|
||||
# log:
|
||||
# default_log_name: example-otel.io/hasura-exported-log
|
||||
|
||||
processors:
|
||||
batch:
|
||||
send_batch_max_size: 100
|
||||
send_batch_size: 10
|
||||
timeout: 10s
|
||||
|
||||
transform:
|
||||
error_mode: ignore
|
||||
metric_statements:
|
||||
# OpenTelemetry doesn't automatically convert resource.attributes to label,
|
||||
# so we need to assign them to data_point attributes
|
||||
# {
|
||||
# "resource_metrics": [
|
||||
# {
|
||||
# "resource": {
|
||||
# "attributes": [
|
||||
# {
|
||||
# "key": "host.name",
|
||||
# "value": {
|
||||
# "string_value": "abc:8080"
|
||||
# }
|
||||
# },
|
||||
# ]
|
||||
# }
|
||||
# }
|
||||
# ]
|
||||
# }
|
||||
- context: datapoint
|
||||
statements:
|
||||
- set(attributes["instance"], resource.attributes["host.name"])
|
||||
|
||||
memory_limiter:
|
||||
check_interval: 1s
|
||||
limit_percentage: 65
|
||||
spike_limit_percentage: 20
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
## enable traces to DataDogs if require
|
||||
##
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, batch]
|
||||
exporters: [jaeger]
|
||||
|
||||
logs:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, batch]
|
||||
exporters: [logging, loki]
|
||||
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, transform, batch]
|
||||
exporters: [prometheus]
|
@ -76,3 +76,14 @@ scrape_configs:
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: blackbox_exporter:9115
|
||||
|
||||
- job_name: "otel-collector"
|
||||
scrape_interval: 10s
|
||||
static_configs:
|
||||
- targets: ["otel-collector:8888"]
|
||||
|
||||
- job_name: "otel-collector-hasura"
|
||||
scrape_interval: 10s
|
||||
honor_labels: true
|
||||
static_configs:
|
||||
- targets: ["otel-collector:8889"]
|
||||
|
@ -1,26 +0,0 @@
|
||||
# https://grafana.com/docs/loki/latest/clients/promtail/configuration/
|
||||
# https://docs.docker.com/engine/api/v1.41/#operation/ContainerList
|
||||
server:
|
||||
http_listen_port: 9080
|
||||
grpc_listen_port: 0
|
||||
|
||||
positions:
|
||||
filename: /tmp/positions.yaml
|
||||
|
||||
clients:
|
||||
- url: http://loki:3100/loki/api/v1/push
|
||||
|
||||
scrape_configs:
|
||||
- job_name: flog_scrape
|
||||
docker_sd_configs:
|
||||
- host: unix:///var/run/docker.sock
|
||||
refresh_interval: 5s
|
||||
filters:
|
||||
- name: label
|
||||
values: ["logging=promtail"]
|
||||
relabel_configs:
|
||||
- source_labels: ["__meta_docker_container_name"]
|
||||
regex: "/(.*)"
|
||||
target_label: "instance"
|
||||
- source_labels: ["__meta_docker_container_label_logging_jobname"]
|
||||
target_label: "job"
|
@ -289,3 +289,75 @@ send the data to it. The list of
|
||||
be found in the [OpenTelemetry Collector repository](https://github.com/open-telemetry/opentelemetry-collector-contrib).
|
||||
|
||||
:::
|
||||
|
||||
## Connect Logs and Traces
|
||||
|
||||
Trace and Span ID are included in the root of the log body. GraphQL Engine follows
|
||||
[OpenTelemetry's data model](https://opentelemetry.io/docs/specs/otel/logs/data-model/#log-and-event-record-definition)
|
||||
so that OpenTelemetry-compliant services can automatically correlate logs with Traces. However, some services need
|
||||
extra configurations.
|
||||
|
||||
### Jaeger
|
||||
|
||||
Grafana can link Jaeger with log data sources such as Loki, Elasticsearch, Splunk, etc... To do that, you need to
|
||||
configure `Trace to logs` in Jaeger's data source settings.
|
||||
|
||||
<Thumbnail
|
||||
src="/img/enterprise/open-telemetry-jaeger-loki-correlation-config.png"
|
||||
alt="Jaeger traces and Loki logs correlation config"
|
||||
width="1000px"
|
||||
/>
|
||||
|
||||
In this example, Traces are linked to Loki logs by the `traceid`. You also can copy the below configuration to the data
|
||||
source file.
|
||||
|
||||
```yaml
|
||||
datasources:
|
||||
- name: Jaeger
|
||||
type: jaeger
|
||||
url: http://jaeger:16686/
|
||||
jsonData:
|
||||
tracesToLogsV2:
|
||||
datasourceUid: 'Loki'
|
||||
spanStartTimeShift: '1h'
|
||||
spanEndTimeShift: '-1h'
|
||||
filterByTraceID: false
|
||||
filterBySpanID: false
|
||||
customQuery: true
|
||||
query: "{exporter=\"OTLP\"} | json | traceid=`$${__span.traceId}`"
|
||||
traceQuery:
|
||||
timeShiftEnabled: true
|
||||
spanStartTimeShift: '1h'
|
||||
spanEndTimeShift: '-1h'
|
||||
```
|
||||
|
||||
You will see the `Logs for this span` button enabled when exploring the trace detail.
|
||||
|
||||
<Thumbnail
|
||||
src="/img/enterprise/open-telemetry-jaeger-loki-correlation.png"
|
||||
alt="Jaeger traces and Loki logs correlation"
|
||||
width="1200px"
|
||||
/>
|
||||
|
||||
### Datadog
|
||||
|
||||
If Datadog can't correlate between traces and logs, you should verify the Trace ID attributes mapping.
|
||||
Read more at [the troubleshooting section](https://docs.datadoghq.com/tracing/troubleshooting/correlated-logs-not-showing-up-in-the-trace-id-panel/?tab=jsonlogs#trace-id-option) on Datadog.
|
||||
|
||||
<Thumbnail
|
||||
src="/img/enterprise/open-telemetry-datadog-trace-log.png"
|
||||
alt="Datadog traces and logs correlation"
|
||||
width="1000px"
|
||||
/>
|
||||
|
||||
### Honeycomb
|
||||
|
||||
Traces and logs can't correlate together if they are exported to different datasets.
|
||||
Note that Honeycomb will use the `service.name` attribute as the dataset where logs are exported.
|
||||
Therefore the `x-honeycomb-dataset` header must be matched with that attribute.
|
||||
|
||||
<Thumbnail
|
||||
src="/img/enterprise/open-telemetry-honeycomb-trace-log.png"
|
||||
alt="Honeycomb traces and logs correlation"
|
||||
width="1000px"
|
||||
/>
|
||||
|
BIN
docs/static/img/enterprise/open-telemetry-datadog-trace-log.png
vendored
Normal file
BIN
docs/static/img/enterprise/open-telemetry-datadog-trace-log.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 136 KiB |
BIN
docs/static/img/enterprise/open-telemetry-honeycomb-trace-log.png
vendored
Normal file
BIN
docs/static/img/enterprise/open-telemetry-honeycomb-trace-log.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 104 KiB |
BIN
docs/static/img/enterprise/open-telemetry-jaeger-loki-correlation-config.png
vendored
Normal file
BIN
docs/static/img/enterprise/open-telemetry-jaeger-loki-correlation-config.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 62 KiB |
BIN
docs/static/img/enterprise/open-telemetry-jaeger-loki-correlation.png
vendored
Normal file
BIN
docs/static/img/enterprise/open-telemetry-jaeger-loki-correlation.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 190 KiB |
Loading…
Reference in New Issue
Block a user