Cron cleanup (#1925)

Co-authored-by: Azeem Shaikh <azeems@google.com>
This commit is contained in:
Azeem Shaikh 2022-05-18 09:48:40 -07:00 committed by GitHub
parent fc7157e38a
commit 8fdb0e767e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 15 additions and 159 deletions

View File

@ -36,20 +36,17 @@ const (
// TransferStatusFilename file identifies if shard transfer to BigQuery is completed.
TransferStatusFilename string = ".transfer_complete"
projectID string = "SCORECARD_PROJECT_ID"
resultDataBucketURL string = "SCORECARD_DATA_BUCKET_URL"
requestTopicURL string = "SCORECARD_REQUEST_TOPIC_URL"
requestSubscriptionURL string = "SCORECARD_REQUEST_SUBSCRIPTION_URL"
bigqueryDataset string = "SCORECARD_BIGQUERY_DATASET"
bigqueryTable string = "SCORECARD_BIGQUERY_TABLE"
completionThreshold string = "SCORECARD_COMPLETION_THRESHOLD"
shardSize string = "SCORECARD_SHARD_SIZE"
webhookURL string = "SCORECARD_WEBHOOK_URL"
metricExporter string = "SCORECARD_METRIC_EXPORTER"
ciiDataBucketURL string = "SCORECARD_CII_DATA_BUCKET_URL"
blacklistedChecks string = "SCORECARD_BLACKLISTED_CHECKS"
// v2 results.
bigqueryTableV2 string = "SCORECARD_BIGQUERY_TABLEV2"
resultDataBucketURLV2 string = "SCORECARD_DATA_BUCKET_URLV2"
bigqueryTable string = "SCORECARD_BIGQUERY_TABLE"
resultDataBucketURL string = "SCORECARD_DATA_BUCKET_URL"
// Raw results.
rawBigqueryTable string = "RAW_SCORECARD_BIGQUERY_TABLE"
rawResultDataBucketURL string = "RAW_SCORECARD_DATA_BUCKET_URL"
@ -64,7 +61,7 @@ var (
configYAML []byte
)
//nolint
// nolint
type config struct {
ProjectID string `yaml:"project-id"`
ResultDataBucketURL string `yaml:"result-data-bucket-url"`
@ -78,9 +75,6 @@ type config struct {
BlacklistedChecks string `yaml:"blacklisted-checks"`
MetricExporter string `yaml:"metric-exporter"`
ShardSize int `yaml:"shard-size"`
// UPGRADEv2: to remove.
ResultDataBucketURLV2 string `yaml:"result-data-bucket-url-v2"`
BigQueryTableV2 string `yaml:"bigquery-table-v2"`
// Raw results.
RawResultDataBucketURL string `yaml:"raw-result-data-bucket-url"`
RawBigQueryTable string `yaml:"raw-bigquery-table"`
@ -190,18 +184,6 @@ func GetCompletionThreshold() (float64, error) {
return getFloat64ConfigValue(completionThreshold, configYAML, "CompletionThreshold", "completion-threshold")
}
// GetBigQueryTableV2 returns the table name to transfer cron job results.
// UPGRADEv2: to remove.
func GetBigQueryTableV2() (string, error) {
return getStringConfigValue(bigqueryTableV2, configYAML, "BigQueryTableV2", "bigquery-table-v2")
}
// GetResultDataBucketURLV2 returns the bucketURL for storing cron job results.
// UPGRADEv2: to remove.
func GetResultDataBucketURLV2() (string, error) {
return getStringConfigValue(resultDataBucketURLV2, configYAML, "ResultDataBucketURLV2", "result-data-bucket-url-v2")
}
// GetRawBigQueryTable returns the table name to transfer cron job results.
func GetRawBigQueryTable() (string, error) {
return getStringConfigValue(rawBigqueryTable, configYAML,

View File

@ -13,11 +13,10 @@
# limitations under the License.
project-id: openssf
result-data-bucket-url: gs://ossf-scorecard-data
request-topic-url: gcppubsub://projects/openssf/topics/scorecard-batch-requests
request-subscription-url: gcppubsub://projects/openssf/subscriptions/scorecard-batch-worker
bigquery-dataset: scorecardcron
bigquery-table: scorecard
bigquery-table: scorecard-v2
completion-threshold: 0.99
shard-size: 10
webhook-url:
@ -26,9 +25,7 @@ cii-data-bucket-url: gs://ossf-scorecard-cii-data
# TODO(#859): Re-add Contributors after fixing inconsistencies.
blacklisted-checks: SAST,CI-Tests,Contributors
metric-exporter: stackdriver
# UPGRADEv2: to remove.
result-data-bucket-url-v2: gs://ossf-scorecard-data2
bigquery-table-v2: scorecard-v2
result-data-bucket-url: gs://ossf-scorecard-data2
# Raw results.
raw-result-data-bucket-url: gs://ossf-scorecard-rawdata
raw-bigquery-table: scorecard-rawdata

View File

@ -25,20 +25,17 @@ import (
const (
testEnvVar string = "TEST_ENV_VAR"
prodProjectID = "openssf"
prodBucket = "gs://ossf-scorecard-data"
prodBucket = "gs://ossf-scorecard-data2"
prodTopic = "gcppubsub://projects/openssf/topics/scorecard-batch-requests"
prodSubscription = "gcppubsub://projects/openssf/subscriptions/scorecard-batch-worker"
prodBigQueryDataset = "scorecardcron"
prodBigQueryTable = "scorecard"
prodBigQueryTable = "scorecard-v2"
prodCompletionThreshold = 0.99
prodWebhookURL = ""
prodCIIDataBucket = "gs://ossf-scorecard-cii-data"
prodBlacklistedChecks = "SAST,CI-Tests,Contributors"
prodShardSize int = 10
prodMetricExporter string = "stackdriver"
// UPGRADEv2: to remove.
prodBucketV2 = "gs://ossf-scorecard-data2"
prodBigQueryTableV2 = "scorecard-v2"
// Raw results.
prodRawBucket = "gs://ossf-scorecard-rawdata"
prodRawBigQueryTable = "scorecard-rawdata"
@ -75,9 +72,6 @@ func TestYAMLParsing(t *testing.T) {
BlacklistedChecks: prodBlacklistedChecks,
ShardSize: prodShardSize,
MetricExporter: prodMetricExporter,
// UPGRADEv2: to remove.
ResultDataBucketURLV2: prodBucketV2,
BigQueryTableV2: prodBigQueryTableV2,
RawResultDataBucketURL: prodRawBucket,
RawBigQueryTable: prodRawBigQueryTable,
},

View File

@ -118,11 +118,6 @@ func main() {
panic(err)
}
bucket2, err := config.GetResultDataBucketURLV2()
if err != nil {
panic(err)
}
rawBucket, err := config.GetRawResultDataBucketURL()
if err != nil {
panic(err)
@ -150,17 +145,6 @@ func main() {
panic(fmt.Errorf("error writing to BlobStore: %w", err))
}
// UPGRADEv2: to remove.
*metadata.ShardLoc = bucket2 + "/" + data.GetBlobFilename("", t)
metadataJSON, err = protojson.Marshal(&metadata)
if err != nil {
panic(fmt.Errorf("error during protojson.Marshal2: %w", err))
}
err = data.WriteToBlobStore(ctx, bucket2, data.GetShardMetadataFilename(t), metadataJSON)
if err != nil {
panic(fmt.Errorf("error writing to BlobStore2: %w", err))
}
// Raw data.
*metadata.ShardLoc = rawBucket + "/" + data.GetBlobFilename("", t)
metadataJSON, err = protojson.Marshal(&metadata)

View File

@ -93,7 +93,6 @@ func AsJSON(r *pkg.ScorecardResult, showDetails bool, logLevel log.Level, writer
Metadata: r.Metadata,
}
for _, checkResult := range r.Checks {
tmpResult := jsonCheckResult{
Name: checkResult.Name,
@ -142,7 +141,6 @@ func AsJSON2(r *pkg.ScorecardResult, showDetails bool,
AggregateScore: jsonFloatScore(score),
}
for _, checkResult := range r.Checks {
doc, e := checkDocs.GetCheck(checkResult.Name)
if e != nil {

View File

@ -33,9 +33,4 @@ spec:
memory: 1Gi
requests:
memory: 1Gi
env:
- name: SCORECARD_BIGQUERY_TABLE
value: "scorecard-v2"
- name: SCORECARD_DATA_BUCKET_URL
value: "gs://ossf-scorecard-data2"
restartPolicy: OnFailure

View File

@ -36,6 +36,8 @@ spec:
value: "scorecard_releasetest2"
- name: SCORECARD_COMPLETION_THRESHOLD
value: "0.9"
- name: SCORECARD_WEBHOOK_URL
value: "http://10.4.8.246/"
resources:
limits:
memory: 1Gi

View File

@ -1,45 +0,0 @@
# Copyright 2021 Security Scorecard Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: batch/v1
kind: CronJob
metadata:
name: scorecard-bq-transfer-releasetest
spec:
# Daily at 9am and 6pm PST.
schedule: "0 1,16 * * *"
concurrencyPolicy: "Forbid"
jobTemplate:
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: bq-transfer-releasetest
image: gcr.io/openssf/scorecard-bq-transfer:latest
imagePullPolicy: Always
env:
- name: SCORECARD_DATA_BUCKET_URL
value: "gs://ossf-scorecard-data-releasetest"
- name: SCORECARD_BIGQUERY_TABLE
value: "scorecard_releasetest"
- name: SCORECARD_COMPLETION_THRESHOLD
value: "0.9"
- name: SCORECARD_WEBHOOK_URL
value: "http://10.4.8.246/"
resources:
limits:
memory: 1Gi
requests:
memory: 1Gi

View File

@ -1,36 +0,0 @@
# Copyright 2021 Security Scorecard Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: batch/v1
kind: CronJob
metadata:
name: scorecard-bq-transfer
spec:
# At 02:00UTC on Monday and Thursday.
schedule: "0 2 * * 1,4"
concurrencyPolicy: "Forbid"
jobTemplate:
spec:
template:
spec:
containers:
- name: bq-transfer
image: gcr.io/openssf/scorecard-bq-transfer:stable
imagePullPolicy: Always
resources:
limits:
memory: 1Gi
requests:
memory: 1Gi
restartPolicy: OnFailure

View File

@ -33,16 +33,11 @@ spec:
imagePullPolicy: Always
env:
- name: SCORECARD_DATA_BUCKET_URL
value: "gs://ossf-scorecard-data-releasetest"
# UPGRADEv2: to remove.
- name: SCORECARD_DATA_BUCKET_URLV2
value: "gs://ossf-scorecard-data-releasetest2"
- name: RAW_SCORECARD_DATA_BUCKET_URL
value: "gs://ossf-scorecard-rawdata-releasetest"
- name: SCORECARD_REQUEST_SUBSCRIPTION_URL
value: "gcppubsub://projects/openssf/subscriptions/scorecard-batch-worker-releasetest"
- name: SCORECARD_BLACKLISTED_CHECKS
value: "SAST,CI-Tests,Contributors"
- name: SCORECARD_METRIC_EXPORTER
value: "printer"
- name: GITHUB_AUTH_SERVER

View File

@ -48,7 +48,7 @@ var ignoreRuntimeErrors = flag.Bool("ignoreRuntimeErrors", false, "if set to tru
// nolint: gocognit
func processRequest(ctx context.Context,
batchRequest *data.ScorecardBatchRequest,
blacklistedChecks []string, bucketURL, bucketURL2, rawBucketURL string,
blacklistedChecks []string, bucketURL, rawBucketURL string,
checkDocs docs.Doc,
repoClient clients.RepoClient, ossFuzzRepoClient clients.RepoClient,
ciiClient clients.CIIBestPracticesClient,
@ -59,22 +59,17 @@ func processRequest(ctx context.Context,
fmt.Sprintf("shard-%07d", batchRequest.GetShardNum()),
batchRequest.GetJobTime().AsTime())
// Sanity check - make sure we are not re-processing an already processed request.
exists1, err := data.BlobExists(ctx, bucketURL, filename)
existsScore, err := data.BlobExists(ctx, bucketURL, filename)
if err != nil {
return fmt.Errorf("error during BlobExists: %w", err)
}
exists2, err := data.BlobExists(ctx, bucketURL2, filename)
existsRaw, err := data.BlobExists(ctx, rawBucketURL, filename)
if err != nil {
return fmt.Errorf("error during BlobExists: %w", err)
}
exists3, err := data.BlobExists(ctx, rawBucketURL, filename)
if err != nil {
return fmt.Errorf("error during BlobExists: %w", err)
}
if exists1 && exists2 && exists3 {
if existsScore && existsRaw {
logger.Info(fmt.Sprintf("Already processed shard %s. Nothing to do.", filename))
// We have already processed this request, nothing to do.
return nil
@ -140,7 +135,7 @@ func processRequest(ctx context.Context,
}
}
if err := data.WriteToBlobStore(ctx, bucketURL2, filename, buffer2.Bytes()); err != nil {
if err := data.WriteToBlobStore(ctx, bucketURL, filename, buffer2.Bytes()); err != nil {
return fmt.Errorf("error during WriteToBlobStore2: %w", err)
}
@ -197,11 +192,6 @@ func main() {
panic(err)
}
bucketURL2, err := config.GetResultDataBucketURLV2()
if err != nil {
panic(err)
}
rawBucketURL, err := config.GetRawResultDataBucketURL()
if err != nil {
panic(err)
@ -252,7 +242,7 @@ func main() {
break
}
if err := processRequest(ctx, req, blacklistedChecks,
bucketURL, bucketURL2, rawBucketURL, checkDocs,
bucketURL, rawBucketURL, checkDocs,
repoClient, ossFuzzRepoClient, ciiClient, vulnsClient, logger); err != nil {
// TODO(log): Previously Warn. Consider logging an error here.
logger.Info(fmt.Sprintf("error processing request: %v", err))