Clean up leftover bigquery datasets

PR-URL: https://github.com/hasura/graphql-engine-mono/pull/8543
GitOrigin-RevId: 9463e50aa4bf62e12e39b4bebdf551e11c824897
This commit is contained in:
Tom Harding 2023-03-28 16:41:35 +01:00 committed by hasura-bot
parent bbf71665d3
commit d80f5b0cdd
2 changed files with 57 additions and 0 deletions

View File

@ -0,0 +1,44 @@
// Delete a bunch of bigquery test datasets.
//
// Note that we can only select the first 50 datasets at a time, so you might
// have to run this a few times to clear it out completely.
const { exec } = require('child_process')
const util = require('util')
const allDatasetsCommand = "bq ls --project_id regency-polecat-beehive --format=prettyjson | jq '.[] | .datasetReference.datasetId | select(startswith(\"hasura_test_\"))'"
const singleDatasetCommand = dataset => `bq show --format=prettyjson regency-polecat-beehive:${dataset}`
const removeDatasetCommand = dataset => `bq rm --dataset=true --force=true --recursive=true regency-polecat-beehive:${dataset}`
const now = Date.now()
const removeDataset = dataset =>
util.promisify(exec)(removeDatasetCommand(dataset))
.then(_ => console.log('Deleted ' + dataset))
const checkDataset = dataset =>
util.promisify(exec)(singleDatasetCommand(dataset))
.then(({ stdout }) => {
const parsed = JSON.parse(stdout)
lastUpdate = parsed.lastModifiedTime
if (now - (1000 * 60 * 60 * 6) > lastUpdate) {
console.log('Deleting ' + dataset)
removeDataset(dataset)
} else {
console.log('Not deleting ' + dataset)
}
})
.catch(_ => console.log('Skipping ' + dataset))
util.promisify(exec)(allDatasetsCommand)
.then(({ stdout }) => {
const datasets =
stdout
.split('\n')
.filter(x => x != '')
.map(x => x.substring(1, x.length - 1))
console.log('Found ' + datasets.length + ' datasets')
datasets.map(checkDataset)
})

13
scripts/cleanup-bigquery.sh Executable file
View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
# Clean up old BigQuery test datasets.
# If we end the bigquery API test suite abruptly, the created datasets won't be
# cleaned up. In which case, we end up with a bunch of `hasura_test_*` datasets
# that hang around forever. When we get too many of these, we can run this
# script, and it will delete them in batches. See the JavaScript file for more
# information.
gcloud auth login
gcloud config set project regency-polecat-beehive
node $(dirname "$0")/cleanup-bigquery.js