diff --git a/infra/es_cluster.tf b/infra/es_cluster.tf deleted file mode 100644 index 58002cea2d..0000000000 --- a/infra/es_cluster.tf +++ /dev/null @@ -1,1098 +0,0 @@ -# Copyright (c) 2022 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -resource "google_compute_network" "es" { - name = "es-network" -} - -/* -Instruct ES to move all data out of blue nodes: -PUT _cluster/settings -{ - "transient" : { - "cluster.routing.allocation.exclude._name" : "es-blue-*" - } -} -use null to reset -*/ - -locals { - es_ssh = 0 - es_feed = 1 - es_clusters = [ - { - suffix = "-blue", - ubuntu_version = "2004", - size = 5, - init = "[]", - type = "n2-highmem-2", - xmx = "12g", - disk_size = 800, - }, - { - suffix = "-green", - ubuntu_version = "2004", - size = 0, - init = "[]", - type = "n2-highmem-2", - xmx = "12g", - disk_size = 800, - }, - { - suffix = "-init", - ubuntu_version = "2004", - size = 0, - init = "[\"$(hostname)\"]", - type = "e2-standard-2", - xmx = "6g", - disk_size = 200, - }, - ] - - es_ports = [ - { name = "es", port = "9200" }, - { name = "kibana", port = "5601" }, - { name = "cerebro", port = "9000" }, - ] -} - -resource "google_compute_firewall" "es-ssh" { - ## Disabled by default - count = local.es_ssh - name = "es-ssh" - network = google_compute_network.es.name - log_config { - metadata = "INCLUDE_ALL_METADATA" - } - allow { - protocol = "tcp" - ports = ["22"] - } - source_ranges = [ # VPNs - "35.194.81.56/32", # North Virginia - "35.189.40.124/32", # Sydney - "35.198.147.95/32", # Frankfurt - "18.210.210.130/32", # consultant - ] -} - -resource "google_compute_firewall" "es-http" { - name = "es-http" - network = google_compute_network.es.name - target_tags = ["es"] - - source_ranges = [ - ## Google Load Balancer - "130.211.0.0/22", - "35.191.0.0/16", - ] - - allow { - protocol = "tcp" - ports = [for p in local.es_ports : p.port] - } -} - -resource "google_compute_firewall" "es-internal" { - name = "es-internal" - network = google_compute_network.es.name - target_tags = ["es"] - - source_ranges = [ - ## Internal - "10.128.0.0/9", - ] - - allow { - protocol = "tcp" - ports = ["9300"] - } -} - -resource "google_service_account" "es-discovery" { - # account_id allows - but not _ - account_id = "es-discovery" - display_name = "es-discovery" -} - -resource "google_project_iam_custom_role" "es-discovery" { - # role_id allows _ but not - - role_id = "es_discovery" - title = "es-discovery" - description = "es-discovery" - permissions = [ - # Cloud logging - "logging.logEntries.create", - # ES discovery - "compute.instances.get", - "compute.instances.list", - ] -} - -resource "google_project_iam_member" "es-discovery" { - project = local.project - role = google_project_iam_custom_role.es-discovery.id - member = "serviceAccount:${google_service_account.es-discovery.email}" -} - -resource "google_compute_instance_template" "es" { - count = length(local.es_clusters) - name_prefix = "es${local.es_clusters[count.index].suffix}-" - machine_type = local.es_clusters[count.index].type - tags = ["es"] - labels = local.machine-labels - - disk { - boot = true - disk_size_gb = local.es_clusters[count.index].disk_size - source_image = "ubuntu-os-cloud/ubuntu-${local.es_clusters[count.index].ubuntu_version}-lts" - } - - metadata_startup_script = < /etc/docker/daemon.json -{ - "log-driver": "json-file", - "log-opts": {"max-size": "10m", "max-file": "3"} -} -CONFIG - -apt-get install -y \ - apt-transport-https \ - ca-certificates \ - gnupg \ - lsb-release -curl -fsSL https://download.docker.com/linux/ubuntu/gpg \ - | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg -echo \ - "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null -apt-get update -apt-get install -y docker-ce docker-ce-cli containerd.io - -## Set up ES - -sysctl -w vm.max_map_count=262144 - -mkdir -p /root/es-docker -cd /root/es-docker - -cat < es.yml -cluster.name: es -node.name: $(hostname) -cluster.initial_master_nodes: ${local.es_clusters[count.index].init} -discovery.seed_providers: gce -discovery.gce.tags: es -cloud.gce.project_id: ${local.project} -cloud.gce.zone: ${local.zone} -network.host: 0.0.0.0 -network.publish_host: _gce_ -http.max_content_length: 500mb -EOF - -cat < Dockerfile -FROM docker.elastic.co/elasticsearch/elasticsearch:7.13.2 - -RUN bin/elasticsearch-plugin install --batch discovery-gce -COPY es.yml /usr/share/elasticsearch/config/elasticsearch.yml -EOF - -mkdir -p /root/es-data -chown 1000:0 /root/es-data - -docker build -t es . -docker run -d \ - --restart on-failure \ - --name es \ - -p 9200:9200 \ - -p 9300:9300 \ - -e ES_JAVA_OPTS="-Xmx${local.es_clusters[count.index].xmx} -Xms${local.es_clusters[count.index].xmx} -Dlog4j2.formatMsgNoLookups=true" \ - -v /root/es-data:/usr/share/elasticsearch/data \ - es - -docker run -d \ - --restart on-failure \ - --name kibana \ - -p 5601:5601 \ - --link es:elasticsearch \ - -e TELEMETRY_ENABLED=false \ - docker.elastic.co/kibana/kibana:7.13.2 - -docker run -d \ - -p 9000:9000 \ - --link es \ - --name cerebro \ - lmenezes/cerebro:0.9.4 - -## Getting container output directly to the GCP console - -( exec 1> >(while IFS= read -r line; do echo "elastic: $line"; done); docker logs -f es ) & -( exec 1> >(while IFS= read -r line; do echo "kibana: $line"; done); docker logs -f kibana ) & -( exec 1> >(while IFS= read -r line; do echo "cerebro: $line"; done); docker logs -f cerebro ) & - -for job in $(jobs -p); do - wait $job -done - -STARTUP - - - - network_interface { - network = google_compute_network.es.name - access_config {} - } - - service_account { - email = google_service_account.es-discovery.email - scopes = [ - # Required for cloud logging - "logging-write", - # Required per ES documentation - "compute-rw", - ] - } - - lifecycle { - create_before_destroy = true - } -} - -resource "google_compute_instance_group_manager" "es" { - provider = google-beta - count = length(local.es_clusters) - name = "es${local.es_clusters[count.index].suffix}" - base_instance_name = "es${local.es_clusters[count.index].suffix}" - zone = local.zone - target_size = local.es_clusters[count.index].size - - version { - name = "es${local.es_clusters[count.index].suffix}" - instance_template = google_compute_instance_template.es[count.index].self_link - } - - dynamic "named_port" { - for_each = local.es_ports - content { - name = named_port.value["name"] - port = named_port.value["port"] - } - } - - update_policy { - type = "PROACTIVE" - minimal_action = "REPLACE" - max_unavailable_percent = 100 - } -} - -resource "google_compute_address" "es" { - count = length(local.es_ports) - name = "es-${local.es_ports[count.index].name}" - network_tier = "STANDARD" -} - -resource "google_compute_health_check" "es-http" { - count = length(local.es_ports) - name = "es-http-${local.es_ports[count.index].name}" - check_interval_sec = 10 - timeout_sec = 1 - - tcp_health_check { - port = local.es_ports[count.index].port - } -} - -resource "google_compute_backend_service" "es-http" { - count = length(local.es_ports) - name = "es-http-${local.es_ports[count.index].name}" - health_checks = [google_compute_health_check.es-http[count.index].self_link] - port_name = local.es_ports[count.index].name - security_policy = google_compute_security_policy.es.self_link - load_balancing_scheme = "EXTERNAL" - - dynamic "backend" { - for_each = local.es_clusters - content { - group = google_compute_instance_group_manager.es[backend.key].instance_group - balancing_mode = "UTILIZATION" - capacity_scaler = 1 - } - } -} - -resource "google_compute_url_map" "es-http" { - count = length(local.es_ports) - name = "es-http-${local.es_ports[count.index].name}" - default_service = google_compute_backend_service.es-http[count.index].self_link -} - -resource "google_compute_target_http_proxy" "es-http" { - count = length(local.es_ports) - name = "es-http-${local.es_ports[count.index].name}" - url_map = google_compute_url_map.es-http[count.index].self_link -} - -resource "google_compute_forwarding_rule" "es-http" { - count = length(local.es_ports) - name = "es-http-${local.es_ports[count.index].name}" - target = google_compute_target_http_proxy.es-http[count.index].self_link - ip_address = google_compute_address.es[count.index].address - port_range = "80" - network_tier = "STANDARD" -} - -## The proxy implied by the forwarding rule sits outside our network, but we -## still want to limit to VPNs. -resource "google_compute_security_policy" "es" { - name = "es" - - rule { - action = "deny(403)" - priority = "2147483647" - match { - versioned_expr = "SRC_IPS_V1" - config { - src_ip_ranges = ["*"] - } - } - description = "Default: deny all" - } - - rule { - action = "allow" - priority = "1000" - match { - versioned_expr = "SRC_IPS_V1" - config { - src_ip_ranges = [ # VPNs - "35.194.81.56/32", # North Virginia - "35.189.40.124/32", # Sydney - "35.198.147.95/32", # Frankfurt - "18.210.210.130/32", # consultant - "${google_compute_address.es-feed.address}/32" - ] - } - } - description = "Allow VPNs" - } -} - -output "es_addresses" { - value = { for idx, p in local.es_ports : p.name => google_compute_address.es[idx].address } -} - -resource "google_compute_address" "es-feed" { - name = "es-feed" -} - -resource "google_service_account" "es-feed" { - account_id = "es-feed" - display_name = "es-feed" -} - -resource "google_project_iam_custom_role" "es-feed" { - role_id = "es_feed" - title = "es-feed" - description = "es-feed" - permissions = [ - # Cloud logging - "logging.logEntries.create", - # Access GCS bucket - "storage.objects.get", - "storage.objects.list", - ] -} - -resource "google_project_iam_member" "es-feed" { - project = local.project - role = google_project_iam_custom_role.es-feed.id - member = "serviceAccount:${google_service_account.es-feed.email}" -} - -resource "google_project_iam_custom_role" "es-feed-write" { - role_id = "es_feed_write" - title = "es-feed-write" - description = "es-feed-write" - permissions = [ - "storage.objects.create" - ] -} - -resource "google_project_iam_member" "es-feed-write" { - project = local.project - role = google_project_iam_custom_role.es-feed-write.id - member = "serviceAccount:${google_service_account.es-feed.email}" - - condition { - title = "es_feed_write" - description = "es_feed_write" - expression = "resource.name.startsWith(\"projects/_/buckets/${google_storage_bucket.data.name}/objects/kibana-export\")" - } -} - -resource "google_compute_instance_group_manager" "es-feed" { - provider = google-beta - name = "es-feed" - base_instance_name = "es-feed" - zone = local.zone - target_size = local.es_feed - - version { - name = "es-feed" - instance_template = google_compute_instance_template.es-feed.self_link - } - - update_policy { - type = "PROACTIVE" - minimal_action = "REPLACE" - max_unavailable_percent = 100 - } -} - -resource "google_compute_instance_template" "es-feed" { - name_prefix = "es-feed" - machine_type = "e2-standard-2" - tags = ["es"] - labels = local.machine-labels - - disk { - boot = true - source_image = "ubuntu-os-cloud/ubuntu-2004-lts" - disk_size_gb = "200" - } - - metadata_startup_script = </root/cron.sh -#!/usr/bin/env bash -set -euo pipefail - -emit_mappings() { - jq -nc '{ - mappings: { - properties: { - job: { - properties: { - timestamp: { type: "date" }, - id: { type: "keyword" }, - agent_id: { type: "keyword" }, - agent_job_name: { type: "keyword" }, - agent_machine_name: { type: "keyword" }, - agent_name: { type: "keyword" }, - agent_os: { type: "keyword" }, - agent_os_architecture: { type: "keyword" }, - build_build_id: { type: "keyword" }, - build_build_number: { type: "keyword" }, - build_definition_name: { type: "keyword" }, - build_source_branch: { type: "keyword" }, - build_source_branch_name: { type: "keyword" }, - build_source_version: { type: "keyword" }, - system_job_attempt: { type: "keyword" }, - system_job_display_name: { type: "keyword" }, - system_job_id: { type: "keyword" }, - system_job_name: { type: "keyword" }, - system_pullRequest_pullRequestId: { type: "keyword" }, - system_pullRequest_pullRequestNumber: { type: "keyword" }, - system_pullRequest_mergedAt: { type: "keyword" }, - system_pullRequest_sourceBranch: { type: "keyword" }, - system_pullRequest_targetBranch: { type: "keyword" }, - system_pullRequest_sourceRepositoryUri: { type: "keyword" }, - system_pullRequest_sourceCommitId: { type: "keyword" }, - git_branch_sha: { type: "keyword" }, - git_main_sha: { type: "keyword" }, - git_fork_point: { type: "keyword" }, - git_current_branch: { type: "keyword" }, - git_current_commit: { type: "keyword" }, - git_current_tree: { type: "keyword" }, - } - }, - command: { - properties: { - name: { type: "keyword" } - } - }, - buildEvent: { - properties: { - id: { type: "object" }, - children: { type: "nested" }, - lastMessage: { type: "boolean" }, - progress: { - properties: { - stdout: { type: "text" }, - stderr: { type: "text" }, - } - }, - aborted: { - properties: { - reason: { type: "keyword" }, - description: { type: "text" }, - } - }, - started: { - properties: { - uuid: { type: "keyword" }, - startTimeMillis: { - type: "date", - format: "epoch_millis" - }, - buildToolVersion: { type: "keyword" }, - optionsDescription: { type: "text" }, - command: { type: "keyword" }, - workingDirectory: { type: "keyword" }, - workspaceDirectory: { type: "keyword" }, - serverPid: { type: "keyword" }, - } - }, - unstructuredCommandLine: { - properties: { - args: { type: "keyword" }, - } - }, - structuredCommandLine: { - properties: { - sections: { type: "nested" }, - }, - }, - optionsParsed: { type: "object" }, - workspaceStatus: { - properties: { - item: { - type: "nested", - properties: { - key: { type: "keyword" }, - value: { type: "text" }, - }, - }, - }, - }, - fetch: { type: "object" }, - configuration: { type: "object" }, - expanded: { type: "object" }, - configured: { type: "object" }, - action: { - properties: { - actionMetadataLogs: { type: "nested" }, - }, - }, - namedSetOfFiles: { type: "object" }, - completed: { - properties: { - success: { type: "boolean" }, - outputGroup: { type: "nested" }, - importantOutput: { type: "nested" }, - directoryOutput: { type: "nested" }, - testTimeoutSeconds: { type: "long" }, - }, - }, - testResult: { - properties: { - cachedLocally: { type: "boolean" }, - testAttemptStartMillisEpoch: { - type: "date", - format: "epoch_millis", - }, - testAttemptDurationMillis: { type: "long" }, - testActionOutput: { type: "nested" }, - executionInfo: { - properties: { - timeoutSeconds: { type: "integer" }, - cachedRemotely: { type: "boolean" }, - exitCode: { type: "integer" }, - timingBreakdown: { type: "nested" }, - resourceUsage: { type: "nested" }, - }, - }, - }, - }, - testSummary: { - properties: { - overallStatus: { type: "keyword" }, - totalRunCount: { type: "integer" }, - runCount: { type: "integer" }, - shardCount: { type: "integer" }, - passed: { type: "nested" }, - failed: { type: "nested" }, - totalNumCached: { type: "integer" }, - firstStartTimeMillis: { - type: "date", - format: "epoch_millis", - }, - lastStopTimeMillis: { - type: "date", - format: "epoch_millis", - }, - totalRunDurationMillis: { type: "long" }, - }, - }, - finished: { - properties: { - overallSuccess: { type: "boolean" }, - exitCode: { - properties: { - name: { type: "keyword" }, - code: { type: "integer" }, - }, - }, - finishTimeMillis: { - type: "date", - format: "epoch_millis", - }, - anomalyReport: { - properties: { - wasSuspended: { type: "boolean" }, - }, - }, - }, - }, - buildToolLogs: { - properties: { - log: { type: "nested" }, - }, - }, - buildMetrics: { - properties: { - actionSummary: { - properties: { - actionsCreated: { type: "long" }, - actionsExecuted: { type: "long" }, - }, - }, - memoryMetrics: { - properties: { - usedHeapSizePostBuild: { type: "long" }, - peakPostGcHeapSize: { type: "long" }, - }, - }, - targetMetrics: { - properties: { - targetsLoaded: { type: "long" }, - targetsConfigured: { type: "long" }, - }, - }, - packageMetrics: { - properties: { - packagesLoaded: { type: "long" }, - }, - }, - timingMetrics: { - properties: { - cpuTimeInMs: { type: "long" }, - wallTimeInMs: { type: "long" }, - }, - }, - }, - }, - workspaceConfig: { type: "object" }, - buildMetadata: { type: "object" }, - convenienceSymlinksIdentified: { - properties: { - convenienceSymlinks: { type: "nested" }, - }, - }, - } - }, - traceEvent: { - properties: { - cat: { type: "keyword" }, - name: { type: "keyword" }, - ph: { type: "keyword" }, - pid: { type: "integer" }, - tid: { type: "integer" }, - args: { type: "object" }, - ts: { type: "long" }, - dur: { type: "long" }, - args: { - properties: { - name: { type: "keyword" }, - target: { type: "keyword" }, - }, - }, - }, - }, - } - }, - settings: { - number_of_replicas: 1, - number_of_shards: 3, - "mapping.nested_objects.limit": 100000 - } - }' -} - -ensure_index() { - local job index - job="$1" - index="$2" - if ! [ -f $DONE/$index ]; then - if curl -s --fail -I http://$ES_IP/$index >/dev/null; then - echo "$job: index $index already exists" - else - echo "$job: creating index $index" - emit_mappings | curl -XPUT http://$ES_IP/$index \ - -s \ - -H 'Content-Type: application/json' \ - --fail \ - --data-binary @- >/dev/null - fi - touch $DONE/$index - fi -} - -emit_build_events() { - local job cmd file - job="$1" - cmd="$2" - file="$3" - jq -c \ - --slurpfile job_md "$job/job-md.json" \ - --arg cmd "$cmd" \ - --arg index "$(index "$job")" \ - --arg job "$job" \ - < "$file" \ - ' - { index: { _index: $index, _id: ($job + "-" + $cmd + "-events-" + (input_line_number | tostring)) } }, - { job: $job_md[0], - command: { name: $cmd }, - buildEvent: . - } - ' -} - -emit_trace_events() { - local job cmd index file - job="$1" - cmd="$2" - file="$3" - jq -c \ - --slurpfile job_md "$job/job-md.json" \ - --arg cmd "$cmd" \ - --arg index "$(index "$job")" \ - --arg job "$job" \ - < "$file" \ - ' - .traceEvents - | to_entries[] - | { index: { _index: $index, _id: ($job + "-" + $cmd + "-profile-" + (.key | tostring)) } }, - { job: $job_md[0], - command: { name: $cmd }, - traceEvent: .value - } - ' -} - -bulk_upload() ( - - ## Uploads a bunch of JSON objects, subject to these constraints: - ## - ## 1. The input file has one JSON object per line. We cannot bbreak lines, as - ## that would result in incomplete JSON objects. - ## 2. JSON objects go in pairs: the first line is metadata for how ES should - ## ingest the second line. So we can't split in the middle of a pair - ## either. - ## 3. The maximum size for a single upload is 500mb (set in the ES - ## configuration a bit higher in this file), so if a file is larger than - ## that we need to split it, respecting constraints 1 and 2. - ## - ## Because this function is defined with () rather than the usual {}, it runs - ## in a subshell and can define its own scoped inner functions, as well as - ## its own traps. Also, all variables are local. - - tmp=$(mktemp) - chunk=$(mktemp) - trap 'rm -f $tmp $chunk' EXIT - cat - > $tmp - lines_to_process=$(wc -l $tmp | awk '{print $1'}) - processed_lines=0 - lines_per_chunk=$lines_to_process - - push_chunk() { - curl -X POST "http://$ES_IP/_bulk?filter_path=errors,items.*.status" \ - -H 'Content-Type: application/json' \ - --fail \ - -s \ - --data-binary @$chunk \ - | jq -r '.items[].index.status' | sort | uniq -c - processed_lines=$(( processed_lines + lines_per_chunk )) - lines_per_chunk=$(( lines_per_chunk * 2 )) - } - - get_next_chunk() { - ( - # tail -n +N drops the first N-1 lines - # tail is expected to fail with 141 (pipe closed) on intermediate - # iterations - tail -n +$(( processed_lines + 1)) $tmp || (( $? == 141)) - ) \ - | head -n $lines_per_chunk \ - > $chunk - } - - all_lines_have_been_processed() (( processed_lines >= lines_to_process )) - - # limit chunk size to 50MB - # This will fail dramatically if we ever have a single line over 50MB - chunk_is_too_big() (( $(du $chunk | awk '{print $1}') > 50000 )) - - reduce_chunk_size() { - # divide by two, but keep an even number - lines_per_chunk=$(( lines_per_chunk / 4 * 2)) - } - - until all_lines_have_been_processed; do - get_next_chunk - if chunk_is_too_big; then - reduce_chunk_size - else - push_chunk - fi - done -) - -patch() { - local job map file - job="$1" - # Replace shortened Scala test names by their long names. - # See //bazel_tools:scala.bzl%da_scala_test_short_name_aspect. - map="scala-test-suite-name-map.json" - if ! [[ -f "$job/$map" ]]; then - echo "$job: no $map" - else - echo "$job: applying $map" - # Generates a sed command to replace short labels by long labels. - jq_command='to_entries | map("s|\(.key)\\b|\(.value)|g") | join(";")' - sed_command="$(jq -r "$jq_command" <"$job/$map")" - for f in build-events build-profile test-events test-profile; do - file="$job/$f.json" - if [ -f "$file" ]; then - sed -i "$sed_command" "$file" - fi - done - fi -} - -push() { - local job f pids - job="$1" - pids="" - for cmd in "build" "test"; do - - f="$job/$cmd-events.json" - if ! [[ -f "$f" ]]; then - echo "$job: no $cmd-events.json" - elif ! jq . >/dev/null 2>&1 < $f; then - echo "$job: $cmd-events.json exists but is not valid json, skipping" - else - echo "$job: pushing $cmd-events.json" - (emit_build_events "$job" "$cmd" "$f" | bulk_upload) & - pids="$pids $!" - fi - - f="$job/$cmd-profile.json" - if ! [[ -f "$f" ]]; then - echo "$job: no $cmd-profile.json" - elif ! jq . >/dev/null 2>&1 < $f; then - echo "$job: $cmd-profile.json exists but is not valid json, skipping" - else - echo "$job: pushing $cmd-profile.json" - (emit_trace_events "$job" "$cmd" "$f" | bulk_upload) & - pids="$pids $!" - fi - done - for pid in $pids; do - wait $pid - done -} - -index() { - local job prefix - job="$1" - echo "events-$(echo $job | cut -c1-10)" -} - -pid=$$ -exec 2> >(while IFS= read -r line; do echo "$(date -uIs) [ingest] [$pid] [err]: $line"; done) -exec 1> >(while IFS= read -r line; do echo "$(date -uIs) [ingest] [$pid] [out]: $line"; done) - -LOCK=/root/lock - -if [ -f $LOCK ]; then - echo "Already running; skipping." - exit 0 -else - touch $LOCK - trap "rm $LOCK; echo exited" EXIT - echo "Starting..." -fi - -echo "Running rsync..." -$GSUTIL -q -m rsync -r gs://daml-data/bazel-metrics/ $DATA/ -echo "Total data size: $(du -hs $DATA | awk '{print $1}')." - -todo=$(find $DATA -type f -name \*.tar.gz | sort) -comm=$(comm -23 <(for f in $todo; do basename $${f%.tar.gz}; done | sort) <(ls $DONE | sort)) - -echo "Need to push $(echo "$comm" | sed '/^$/d' | wc -l) files out of $(echo "$todo" | sed '/^$/d' | wc -l)." - -for tar in $todo; do - job=$(basename $${tar%.tar.gz}) - cd $(dirname $tar) - if ! [ -f $DONE/$job ]; then - ensure_index "$job" "$(index "$job")" - tar --force-local -x -z -f "$(basename "$tar")" - patch "$job" - push "$job" - rm -rf $job - r=$(curl -H 'Content-Type: application/json' \ - --fail \ - -s \ - "http://$ES_IP/done/_doc/$job" \ - -d '{}') - echo "$job: $(echo $r | jq '.result')" - touch "$DONE/$job" - fi -done -CRON - -cat <<'HOURLY' >/root/hourly.sh -#!/usr/bin/env bash -set -euo pipefail - -pid=$$ -exec 2> >(while IFS= read -r line; do echo "$(date -uIs) [kibex] [$pid] [err]: $line"; done) -exec 1> >(while IFS= read -r line; do echo "$(date -uIs) [kibex] [$pid] [out]: $line"; done) - -HOUR="$(date -u -Is | cut -c 1-13)" -TMP=$(mktemp) -TARGET="gs://daml-data/kibana-export/$HOUR.gz" - -echo "Starting Kibana export..." - -# Kibana export API does not support wildcard, so we list all of the object -# types that exist as of Kibana 7.13. -curl http://$KIBANA_IP/api/saved_objects/_export \ - -XPOST \ - -d'{"excludeExportDetails": true, - "type": ["visualization", "dashboard", "search", "index-pattern", - "config", "timelion-sheet"]}' \ - -H 'kbn-xsrf: true' \ - -H 'Content-Type: application/json' \ - --fail \ - --silent \ - | gzip -9 > $TMP - - -echo "Pushing $TARGET" - -$GSUTIL -q cp $TMP $TARGET - -echo "Done." -HOURLY - -chmod +x /root/cron.sh -chmod +x /root/hourly.sh - -ES_IP=${google_compute_address.es[0].address} -KIB_IP=${google_compute_address.es[1].address} - -DATA=/root/data -mkdir -p $DATA - -DONE=/root/done -mkdir -p $DONE - -echo "Synchronizing with cluster state..." -found=0 -for prefix in jobs events; do - for idx in $(curl --fail "http://$ES_IP/_cat/indices/$prefix-*?format=json" -s | jq -r '.[] | .index'); do - found=$((found + 1)) - touch $DONE/$idx; - done -done -echo "Found $found indices." - -if curl -s --fail -I "http://$ES_IP/done" >/dev/null; then - found=0 - res=$(curl --fail "http://$ES_IP/done/_search?_source=false&size=1000&scroll=5m" -s) - while (echo $res | jq -e '.hits.hits != []' >/dev/null); do - for id in $(echo $res | jq -r '.hits.hits[]._id'); do - found=$((found + 1)) - touch $DONE/$id - done - scroll_id=$(echo $res | jq -r '._scroll_id') - res=$(curl "http://$ES_IP/_search/scroll" \ - -s \ - --fail \ - -d "$(jq --arg id "$scroll_id" \ - -n \ - '{scroll: "5m", scroll_id: $id}')" \ - -H 'Content-Type: application/json') - done - echo "Found $found jobs." -else - echo "No done index; creating..." - r=$(curl -XPUT "http://$ES_IP/done" \ - -d '{"settings": {"number_of_replicas": 2}}' \ - --fail \ - -s \ - -H 'Content-Type: application/json') - echo $r -fi - -cat <> /etc/crontab -* * * * * root GSUTIL="$(which gsutil)" DONE="$DONE" DATA="$DATA" ES_IP="$ES_IP" /root/cron.sh >> /root/log 2>&1 -1 * * * * root GSUTIL="$(which gsutil)" KIBANA_IP="$KIB_IP" /root/hourly.sh >> /root/log 2>&1 -CRONTAB - -echo "Waiting for first run..." > /root/log -tail -f /root/log - -STARTUP - - network_interface { - network = google_compute_network.es.name - access_config { - nat_ip = google_compute_address.es-feed.address - } - } - - service_account { - email = google_service_account.es-feed.email - scopes = [ - # Required for cloud logging - "logging-write", - # Read access to storage - "storage-rw", - ] - } - - scheduling { - automatic_restart = false - on_host_maintenance = "TERMINATE" - preemptible = true - } - - lifecycle { - create_before_destroy = true - } - -}