From f979097a1fd767718f9d05489cda6251486ca74c Mon Sep 17 00:00:00 2001 From: Spencer Schrock Date: Wed, 19 Oct 2022 14:01:42 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=8C=B1=20cron:=20generalize=20and=20expos?= =?UTF-8?q?e=20worker=20(6/n)=20(#2317)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * WIP Signed-off-by: Spencer Schrock * Appease linter. Signed-off-by: Spencer Schrock * Update Makefile for worker Signed-off-by: Spencer Schrock * Extract already completed request sanity check. Signed-off-by: Spencer Schrock * Add worker test. Signed-off-by: Spencer Schrock * remove logger from worker interface Signed-off-by: Spencer Schrock * move cron data and worker out of cron/internal Signed-off-by: Spencer Schrock * Move config out of internal. Signed-off-by: Spencer Schrock * Document worker interface. Signed-off-by: Spencer Schrock * Fix typo which prevented metadata from going to cron job. Signed-off-by: Spencer Schrock * Address feedback. Signed-off-by: Spencer Schrock * Revert "Fix typo which prevented metadata from going to cron job." This reverts commit 876acb062e8a562cb9ed0d2e3e2a76c2babd850b. Will send separate PR. Signed-off-by: Spencer Schrock * Fix linter. Signed-off-by: Spencer Schrock Signed-off-by: Spencer Schrock --- Makefile | 22 +- cron/{internal => }/config/config.go | 0 cron/{internal => }/config/config.yaml | 0 cron/{internal => }/config/config_test.go | 0 .../{internal => }/config/testdata/basic.yaml | 0 .../config/testdata/missing_field.yaml | 0 .../config/testdata/optional_maps.yaml | 0 cron/{internal => }/data/README.md | 0 cron/{internal => }/data/blob.go | 2 +- cron/{internal => }/data/blob_test.go | 0 cron/{internal => }/data/format.go | 0 cron/{internal => }/data/format_test.go | 0 cron/{internal => }/data/iterator.go | 0 cron/{internal => }/data/iterator_test.go | 0 cron/data/metadata.pb.go | 185 +++++++++++++++ cron/{internal => }/data/metadata.proto | 2 +- cron/{internal => }/data/request.pb.go | 124 +++++----- cron/{internal => }/data/request.proto | 2 +- cron/{internal => }/data/testdata/basic.csv | 0 .../data/testdata/blob_test/key1.txt | 0 .../data/testdata/blob_test/key2.txt | 0 .../data/testdata/blob_test/key3.txt | 0 .../data/testdata/blob_test/subdir/key4.txt | 0 cron/{internal => }/data/testdata/comment.csv | 0 .../data/testdata/empty_row.csv | 0 .../data/testdata/extra_column.csv | 0 .../data/testdata/failing_urls.csv | 0 .../data/testdata/ignore_header.csv | 0 .../data/testdata/no_header.csv | 0 .../data/testdata/only_header.csv | 0 .../data/testdata/split_file.csv | 0 .../data/testdata/split_file_empty.csv | 0 cron/{internal => }/data/writer.go | 0 cron/{internal => }/data/writer_test.go | 0 cron/internal/bq/main.go | 4 +- cron/internal/cii/main.go | 4 +- cron/internal/controller/main.go | 4 +- cron/internal/data/add/main.go | 2 +- cron/internal/data/add/main_test.go | 2 +- cron/internal/data/metadata.pb.go | 186 --------------- cron/internal/data/update/dependency.go | 2 +- cron/internal/data/update/main.go | 2 +- cron/internal/data/validate/main.go | 2 +- cron/internal/monitoring/exporter.go | 2 +- cron/internal/pubsub/publisher.go | 2 +- cron/internal/pubsub/publisher_test.go | 2 +- cron/internal/pubsub/subscriber.go | 2 +- cron/internal/pubsub/subscriber_gcs.go | 2 +- cron/internal/pubsub/subscriber_gocloud.go | 2 +- .../pubsub/subscriber_gocloud_test.go | 2 +- cron/internal/shuffle/main.go | 2 +- cron/internal/webhook/main.go | 2 +- cron/internal/worker/main.go | 223 +++++++----------- cron/k8s/README.md | 2 +- cron/worker/worker.go | 160 +++++++++++++ cron/worker/worker_test.go | 57 +++++ 56 files changed, 586 insertions(+), 419 deletions(-) rename cron/{internal => }/config/config.go (100%) rename cron/{internal => }/config/config.yaml (100%) rename cron/{internal => }/config/config_test.go (100%) rename cron/{internal => }/config/testdata/basic.yaml (100%) rename cron/{internal => }/config/testdata/missing_field.yaml (100%) rename cron/{internal => }/config/testdata/optional_maps.yaml (100%) rename cron/{internal => }/data/README.md (100%) rename cron/{internal => }/data/blob.go (99%) rename cron/{internal => }/data/blob_test.go (100%) rename cron/{internal => }/data/format.go (100%) rename cron/{internal => }/data/format_test.go (100%) rename cron/{internal => }/data/iterator.go (100%) rename cron/{internal => }/data/iterator_test.go (100%) create mode 100644 cron/data/metadata.pb.go rename cron/{internal => }/data/metadata.proto (92%) rename cron/{internal => }/data/request.pb.go (52%) rename cron/{internal => }/data/request.proto (93%) rename cron/{internal => }/data/testdata/basic.csv (100%) rename cron/{internal => }/data/testdata/blob_test/key1.txt (100%) rename cron/{internal => }/data/testdata/blob_test/key2.txt (100%) rename cron/{internal => }/data/testdata/blob_test/key3.txt (100%) rename cron/{internal => }/data/testdata/blob_test/subdir/key4.txt (100%) rename cron/{internal => }/data/testdata/comment.csv (100%) rename cron/{internal => }/data/testdata/empty_row.csv (100%) rename cron/{internal => }/data/testdata/extra_column.csv (100%) rename cron/{internal => }/data/testdata/failing_urls.csv (100%) rename cron/{internal => }/data/testdata/ignore_header.csv (100%) rename cron/{internal => }/data/testdata/no_header.csv (100%) rename cron/{internal => }/data/testdata/only_header.csv (100%) rename cron/{internal => }/data/testdata/split_file.csv (100%) rename cron/{internal => }/data/testdata/split_file_empty.csv (100%) rename cron/{internal => }/data/writer.go (100%) rename cron/{internal => }/data/writer_test.go (100%) delete mode 100644 cron/internal/data/metadata.pb.go create mode 100644 cron/worker/worker.go create mode 100644 cron/worker/worker_test.go diff --git a/Makefile b/Makefile index 9682d266..a36aa11b 100644 --- a/Makefile +++ b/Makefile @@ -129,11 +129,11 @@ build: ## Build all binaries and images in the repo. build: $(build-targets) build-proto: ## Compiles and generates all required protobufs -build-proto: cron/internal/data/request.pb.go cron/internal/data/metadata.pb.go -cron/internal/data/request.pb.go: cron/internal/data/request.proto | $(PROTOC) $(PROTOC_GEN_GO) - $(PROTOC) --plugin=$(PROTOC_GEN_GO) --go_out=. --go_opt=paths=source_relative cron/internal/data/request.proto -cron/internal/data/metadata.pb.go: cron/internal/data/metadata.proto | $(PROTOC) $(PROTOC_GEN_GO) - $(PROTOC) --plugin=$(PROTOC_GEN_GO) --go_out=. --go_opt=paths=source_relative cron/internal/data/metadata.proto +build-proto: cron/data/request.pb.go cron/data/metadata.pb.go +cron/data/request.pb.go: cron/data/request.proto | $(PROTOC) $(PROTOC_GEN_GO) + $(PROTOC) --plugin=$(PROTOC_GEN_GO) --go_out=. --go_opt=paths=source_relative cron/data/request.proto +cron/data/metadata.pb.go: cron/data/metadata.proto | $(PROTOC) $(PROTOC_GEN_GO) + $(PROTOC) --plugin=$(PROTOC_GEN_GO) --go_out=. --go_opt=paths=source_relative cron/data/metadata.proto generate-mocks: ## Compiles and generates all mocks using mockgen. generate-mocks: clients/mockclients/repo_client.go \ @@ -219,14 +219,14 @@ cron/internal/cii/cii-worker.docker: cron/internal/cii/Dockerfile $(CRON_CII_DEP --tag $(IMAGE_NAME)-cii-worker && \ touch cron/internal/cii/cii-worker.docker -CRON_SHUFFLER_DEPS = $(shell find cron/internal/data/ cron/internal/shuffle/ -iname "*.go") +CRON_SHUFFLER_DEPS = $(shell find cron/data/ cron/internal/shuffle/ -iname "*.go") build-shuffler: ## Build cron shuffle script build-shuffler: cron/internal/shuffle/shuffle cron/internal/shuffle/shuffle: $(CRON_SHUFFLER_DEPS) # Run go build on the cron shuffle script cd cron/internal/shuffle && CGO_ENABLED=0 go build -trimpath -a -ldflags '$(LDFLAGS)' -o shuffle -CRON_TRANSFER_DEPS = $(shell find cron/internal/data/ cron/internal/config/ cron/internal/bq/ -iname "*.go") +CRON_TRANSFER_DEPS = $(shell find cron/data/ cron/config/ cron/internal/bq/ -iname "*.go") build-bq-transfer: ## Build cron BQ transfer worker build-bq-transfer: cron/internal/bq/data-transfer cron/internal/bq/data-transfer: $(CRON_TRANSFER_DEPS) @@ -255,7 +255,7 @@ clients/githubrepo/roundtripper/tokens/server/github-auth-server.docker: \ --tag ${IMAGE_NAME}-github-server && \ touch clients/githubrepo/roundtripper/tokens/server/github-auth-server.docker -CRON_WEBHOOK_DEPS = $(shell find cron/internal/webhook/ cron/internal/data/ -iname "*.go") +CRON_WEBHOOK_DEPS = $(shell find cron/internal/webhook/ cron/data/ -iname "*.go") build-webhook: ## Build cron webhook server build-webhook: cron/internal/webhook/webhook cron/internal/webhook/webhook: $(CRON_WEBHOOK_DEPS) @@ -271,19 +271,19 @@ cron/internal/webhook/webhook.docker: cron/internal/webhook/Dockerfile $(CRON_WE build-add-script: ## Runs go build on the add script build-add-script: cron/internal/data/add/add -cron/internal/data/add/add: cron/internal/data/add/*.go cron/internal/data/*.go cron/internal/data/projects.csv +cron/internal/data/add/add: cron/internal/data/add/*.go cron/data/*.go cron/internal/data/projects.csv # Run go build on the add script cd cron/internal/data/add && CGO_ENABLED=0 go build -trimpath -a -ldflags '$(LDFLAGS)' -o add build-validate-script: ## Runs go build on the validate script build-validate-script: cron/internal/data/validate/validate -cron/internal/data/validate/validate: cron/internal/data/validate/*.go cron/internal/data/*.go cron/internal/data/projects.csv +cron/internal/data/validate/validate: cron/internal/data/validate/*.go cron/data/*.go cron/internal/data/projects.csv # Run go build on the validate script cd cron/internal/data/validate && CGO_ENABLED=0 go build -trimpath -a -ldflags '$(LDFLAGS)' -o validate build-update-script: ## Runs go build on the update script build-update-script: cron/internal/data/update/projects-update -cron/internal/data/update/projects-update: cron/internal/data/update/*.go cron/internal/data/*.go +cron/internal/data/update/projects-update: cron/internal/data/update/*.go cron/data/*.go # Run go build on the update script cd cron/internal/data/update && CGO_ENABLED=0 go build -trimpath -a -tags netgo -ldflags '$(LDFLAGS)' -o projects-update diff --git a/cron/internal/config/config.go b/cron/config/config.go similarity index 100% rename from cron/internal/config/config.go rename to cron/config/config.go diff --git a/cron/internal/config/config.yaml b/cron/config/config.yaml similarity index 100% rename from cron/internal/config/config.yaml rename to cron/config/config.yaml diff --git a/cron/internal/config/config_test.go b/cron/config/config_test.go similarity index 100% rename from cron/internal/config/config_test.go rename to cron/config/config_test.go diff --git a/cron/internal/config/testdata/basic.yaml b/cron/config/testdata/basic.yaml similarity index 100% rename from cron/internal/config/testdata/basic.yaml rename to cron/config/testdata/basic.yaml diff --git a/cron/internal/config/testdata/missing_field.yaml b/cron/config/testdata/missing_field.yaml similarity index 100% rename from cron/internal/config/testdata/missing_field.yaml rename to cron/config/testdata/missing_field.yaml diff --git a/cron/internal/config/testdata/optional_maps.yaml b/cron/config/testdata/optional_maps.yaml similarity index 100% rename from cron/internal/config/testdata/optional_maps.yaml rename to cron/config/testdata/optional_maps.yaml diff --git a/cron/internal/data/README.md b/cron/data/README.md similarity index 100% rename from cron/internal/data/README.md rename to cron/data/README.md diff --git a/cron/internal/data/blob.go b/cron/data/blob.go similarity index 99% rename from cron/internal/data/blob.go rename to cron/data/blob.go index b4d082ec..94097f57 100644 --- a/cron/internal/data/blob.go +++ b/cron/data/blob.go @@ -28,7 +28,7 @@ import ( // Needed to link in GCP drivers. _ "gocloud.dev/blob/gcsblob" - "github.com/ossf/scorecard/v4/cron/internal/config" + "github.com/ossf/scorecard/v4/cron/config" ) const ( diff --git a/cron/internal/data/blob_test.go b/cron/data/blob_test.go similarity index 100% rename from cron/internal/data/blob_test.go rename to cron/data/blob_test.go diff --git a/cron/internal/data/format.go b/cron/data/format.go similarity index 100% rename from cron/internal/data/format.go rename to cron/data/format.go diff --git a/cron/internal/data/format_test.go b/cron/data/format_test.go similarity index 100% rename from cron/internal/data/format_test.go rename to cron/data/format_test.go diff --git a/cron/internal/data/iterator.go b/cron/data/iterator.go similarity index 100% rename from cron/internal/data/iterator.go rename to cron/data/iterator.go diff --git a/cron/internal/data/iterator_test.go b/cron/data/iterator_test.go similarity index 100% rename from cron/internal/data/iterator_test.go rename to cron/data/iterator_test.go diff --git a/cron/data/metadata.pb.go b/cron/data/metadata.pb.go new file mode 100644 index 00000000..938deaae --- /dev/null +++ b/cron/data/metadata.pb.go @@ -0,0 +1,185 @@ +// Copyright 2021 Security Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.28.1 +// protoc v3.21.6 +// source: cron/data/metadata.proto + +package data + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type ShardMetadata struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ShardLoc *string `protobuf:"bytes,1,opt,name=shard_loc,json=shardLoc,proto3,oneof" json:"shard_loc,omitempty"` + NumShard *int32 `protobuf:"varint,2,opt,name=num_shard,json=numShard,proto3,oneof" json:"num_shard,omitempty"` + CommitSha *string `protobuf:"bytes,3,opt,name=commit_sha,json=commitSha,proto3,oneof" json:"commit_sha,omitempty"` +} + +func (x *ShardMetadata) Reset() { + *x = ShardMetadata{} + if protoimpl.UnsafeEnabled { + mi := &file_cron_data_metadata_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ShardMetadata) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ShardMetadata) ProtoMessage() {} + +func (x *ShardMetadata) ProtoReflect() protoreflect.Message { + mi := &file_cron_data_metadata_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ShardMetadata.ProtoReflect.Descriptor instead. +func (*ShardMetadata) Descriptor() ([]byte, []int) { + return file_cron_data_metadata_proto_rawDescGZIP(), []int{0} +} + +func (x *ShardMetadata) GetShardLoc() string { + if x != nil && x.ShardLoc != nil { + return *x.ShardLoc + } + return "" +} + +func (x *ShardMetadata) GetNumShard() int32 { + if x != nil && x.NumShard != nil { + return *x.NumShard + } + return 0 +} + +func (x *ShardMetadata) GetCommitSha() string { + if x != nil && x.CommitSha != nil { + return *x.CommitSha + } + return "" +} + +var File_cron_data_metadata_proto protoreflect.FileDescriptor + +var file_cron_data_metadata_proto_rawDesc = []byte{ + 0x0a, 0x18, 0x63, 0x72, 0x6f, 0x6e, 0x2f, 0x64, 0x61, 0x74, 0x61, 0x2f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x21, 0x6f, 0x73, 0x73, 0x66, + 0x2e, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x63, 0x61, 0x72, 0x64, 0x2e, 0x63, 0x72, 0x6f, 0x6e, 0x2e, + 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x22, 0xa2, 0x01, + 0x0a, 0x0d, 0x53, 0x68, 0x61, 0x72, 0x64, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, + 0x20, 0x0a, 0x09, 0x73, 0x68, 0x61, 0x72, 0x64, 0x5f, 0x6c, 0x6f, 0x63, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x48, 0x00, 0x52, 0x08, 0x73, 0x68, 0x61, 0x72, 0x64, 0x4c, 0x6f, 0x63, 0x88, 0x01, + 0x01, 0x12, 0x20, 0x0a, 0x09, 0x6e, 0x75, 0x6d, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x05, 0x48, 0x01, 0x52, 0x08, 0x6e, 0x75, 0x6d, 0x53, 0x68, 0x61, 0x72, 0x64, + 0x88, 0x01, 0x01, 0x12, 0x22, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x5f, 0x73, 0x68, + 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x48, 0x02, 0x52, 0x09, 0x63, 0x6f, 0x6d, 0x6d, 0x69, + 0x74, 0x53, 0x68, 0x61, 0x88, 0x01, 0x01, 0x42, 0x0c, 0x0a, 0x0a, 0x5f, 0x73, 0x68, 0x61, 0x72, + 0x64, 0x5f, 0x6c, 0x6f, 0x63, 0x42, 0x0c, 0x0a, 0x0a, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x73, 0x68, + 0x61, 0x72, 0x64, 0x42, 0x0d, 0x0a, 0x0b, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x5f, 0x73, + 0x68, 0x61, 0x42, 0x25, 0x5a, 0x23, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, + 0x2f, 0x6f, 0x73, 0x73, 0x66, 0x2f, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x63, 0x61, 0x72, 0x64, 0x2f, + 0x63, 0x72, 0x6f, 0x6e, 0x2f, 0x64, 0x61, 0x74, 0x61, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, +} + +var ( + file_cron_data_metadata_proto_rawDescOnce sync.Once + file_cron_data_metadata_proto_rawDescData = file_cron_data_metadata_proto_rawDesc +) + +func file_cron_data_metadata_proto_rawDescGZIP() []byte { + file_cron_data_metadata_proto_rawDescOnce.Do(func() { + file_cron_data_metadata_proto_rawDescData = protoimpl.X.CompressGZIP(file_cron_data_metadata_proto_rawDescData) + }) + return file_cron_data_metadata_proto_rawDescData +} + +var file_cron_data_metadata_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_cron_data_metadata_proto_goTypes = []interface{}{ + (*ShardMetadata)(nil), // 0: ossf.scorecard.cron.internal.data.ShardMetadata +} +var file_cron_data_metadata_proto_depIdxs = []int32{ + 0, // [0:0] is the sub-list for method output_type + 0, // [0:0] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_cron_data_metadata_proto_init() } +func file_cron_data_metadata_proto_init() { + if File_cron_data_metadata_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_cron_data_metadata_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ShardMetadata); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + file_cron_data_metadata_proto_msgTypes[0].OneofWrappers = []interface{}{} + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_cron_data_metadata_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_cron_data_metadata_proto_goTypes, + DependencyIndexes: file_cron_data_metadata_proto_depIdxs, + MessageInfos: file_cron_data_metadata_proto_msgTypes, + }.Build() + File_cron_data_metadata_proto = out.File + file_cron_data_metadata_proto_rawDesc = nil + file_cron_data_metadata_proto_goTypes = nil + file_cron_data_metadata_proto_depIdxs = nil +} diff --git a/cron/internal/data/metadata.proto b/cron/data/metadata.proto similarity index 92% rename from cron/internal/data/metadata.proto rename to cron/data/metadata.proto index a215278d..38caf1e1 100644 --- a/cron/internal/data/metadata.proto +++ b/cron/data/metadata.proto @@ -16,7 +16,7 @@ syntax = "proto3"; package ossf.scorecard.cron.internal.data; -option go_package = "github.com/ossf/scorecard/cron/internal/data"; +option go_package = "github.com/ossf/scorecard/cron/data"; message ShardMetadata { optional string shard_loc = 1; diff --git a/cron/internal/data/request.pb.go b/cron/data/request.pb.go similarity index 52% rename from cron/internal/data/request.pb.go rename to cron/data/request.pb.go index 9d141fc2..175572a8 100644 --- a/cron/internal/data/request.pb.go +++ b/cron/data/request.pb.go @@ -16,7 +16,7 @@ // versions: // protoc-gen-go v1.28.1 // protoc v3.21.6 -// source: cron/internal/data/request.proto +// source: cron/data/request.proto package data @@ -48,7 +48,7 @@ type Repo struct { func (x *Repo) Reset() { *x = Repo{} if protoimpl.UnsafeEnabled { - mi := &file_cron_internal_data_request_proto_msgTypes[0] + mi := &file_cron_data_request_proto_msgTypes[0] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -61,7 +61,7 @@ func (x *Repo) String() string { func (*Repo) ProtoMessage() {} func (x *Repo) ProtoReflect() protoreflect.Message { - mi := &file_cron_internal_data_request_proto_msgTypes[0] + mi := &file_cron_data_request_proto_msgTypes[0] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -74,7 +74,7 @@ func (x *Repo) ProtoReflect() protoreflect.Message { // Deprecated: Use Repo.ProtoReflect.Descriptor instead. func (*Repo) Descriptor() ([]byte, []int) { - return file_cron_internal_data_request_proto_rawDescGZIP(), []int{0} + return file_cron_data_request_proto_rawDescGZIP(), []int{0} } func (x *Repo) GetUrl() string { @@ -111,7 +111,7 @@ type ScorecardBatchRequest struct { func (x *ScorecardBatchRequest) Reset() { *x = ScorecardBatchRequest{} if protoimpl.UnsafeEnabled { - mi := &file_cron_internal_data_request_proto_msgTypes[1] + mi := &file_cron_data_request_proto_msgTypes[1] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -124,7 +124,7 @@ func (x *ScorecardBatchRequest) String() string { func (*ScorecardBatchRequest) ProtoMessage() {} func (x *ScorecardBatchRequest) ProtoReflect() protoreflect.Message { - mi := &file_cron_internal_data_request_proto_msgTypes[1] + mi := &file_cron_data_request_proto_msgTypes[1] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -137,7 +137,7 @@ func (x *ScorecardBatchRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use ScorecardBatchRequest.ProtoReflect.Descriptor instead. func (*ScorecardBatchRequest) Descriptor() ([]byte, []int) { - return file_cron_internal_data_request_proto_rawDescGZIP(), []int{1} + return file_cron_data_request_proto_rawDescGZIP(), []int{1} } func (x *ScorecardBatchRequest) GetRepos() []*Repo { @@ -161,61 +161,59 @@ func (x *ScorecardBatchRequest) GetJobTime() *timestamppb.Timestamp { return nil } -var File_cron_internal_data_request_proto protoreflect.FileDescriptor +var File_cron_data_request_proto protoreflect.FileDescriptor -var file_cron_internal_data_request_proto_rawDesc = []byte{ - 0x0a, 0x20, 0x63, 0x72, 0x6f, 0x6e, 0x2f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, - 0x64, 0x61, 0x74, 0x61, 0x2f, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x12, 0x21, 0x6f, 0x73, 0x73, 0x66, 0x2e, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x63, 0x61, +var file_cron_data_request_proto_rawDesc = []byte{ + 0x0a, 0x17, 0x63, 0x72, 0x6f, 0x6e, 0x2f, 0x64, 0x61, 0x74, 0x61, 0x2f, 0x72, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x21, 0x6f, 0x73, 0x73, 0x66, 0x2e, + 0x73, 0x63, 0x6f, 0x72, 0x65, 0x63, 0x61, 0x72, 0x64, 0x2e, 0x63, 0x72, 0x6f, 0x6e, 0x2e, 0x69, + 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x1f, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, + 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x69, 0x0a, + 0x04, 0x52, 0x65, 0x70, 0x6f, 0x12, 0x15, 0x0a, 0x03, 0x75, 0x72, 0x6c, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x48, 0x00, 0x52, 0x03, 0x75, 0x72, 0x6c, 0x88, 0x01, 0x01, 0x12, 0x1b, 0x0a, 0x06, + 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x48, 0x01, 0x52, 0x06, + 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x88, 0x01, 0x01, 0x12, 0x1a, 0x0a, 0x08, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x42, 0x06, 0x0a, 0x04, 0x5f, 0x75, 0x72, 0x6c, 0x42, 0x09, 0x0a, + 0x07, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x22, 0xd5, 0x01, 0x0a, 0x15, 0x53, 0x63, 0x6f, + 0x72, 0x65, 0x63, 0x61, 0x72, 0x64, 0x42, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x12, 0x3d, 0x0a, 0x05, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x27, 0x2e, 0x6f, 0x73, 0x73, 0x66, 0x2e, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x63, 0x61, 0x72, 0x64, 0x2e, 0x63, 0x72, 0x6f, 0x6e, 0x2e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, - 0x2e, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x1f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, - 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x69, 0x0a, 0x04, 0x52, 0x65, 0x70, 0x6f, 0x12, 0x15, - 0x0a, 0x03, 0x75, 0x72, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x03, 0x75, - 0x72, 0x6c, 0x88, 0x01, 0x01, 0x12, 0x1b, 0x0a, 0x06, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x48, 0x01, 0x52, 0x06, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x88, - 0x01, 0x01, 0x12, 0x1a, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x02, - 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x42, 0x06, - 0x0a, 0x04, 0x5f, 0x75, 0x72, 0x6c, 0x42, 0x09, 0x0a, 0x07, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x69, - 0x74, 0x22, 0xd5, 0x01, 0x0a, 0x15, 0x53, 0x63, 0x6f, 0x72, 0x65, 0x63, 0x61, 0x72, 0x64, 0x42, - 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3d, 0x0a, 0x05, 0x72, - 0x65, 0x70, 0x6f, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x6f, 0x73, 0x73, - 0x66, 0x2e, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x63, 0x61, 0x72, 0x64, 0x2e, 0x63, 0x72, 0x6f, 0x6e, - 0x2e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x52, - 0x65, 0x70, 0x6f, 0x52, 0x05, 0x72, 0x65, 0x70, 0x6f, 0x73, 0x12, 0x20, 0x0a, 0x09, 0x73, 0x68, - 0x61, 0x72, 0x64, 0x5f, 0x6e, 0x75, 0x6d, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x48, 0x00, 0x52, - 0x08, 0x73, 0x68, 0x61, 0x72, 0x64, 0x4e, 0x75, 0x6d, 0x88, 0x01, 0x01, 0x12, 0x3a, 0x0a, 0x08, - 0x6a, 0x6f, 0x62, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, - 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, - 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x48, 0x01, 0x52, 0x07, 0x6a, 0x6f, - 0x62, 0x54, 0x69, 0x6d, 0x65, 0x88, 0x01, 0x01, 0x42, 0x0c, 0x0a, 0x0a, 0x5f, 0x73, 0x68, 0x61, - 0x72, 0x64, 0x5f, 0x6e, 0x75, 0x6d, 0x42, 0x0b, 0x0a, 0x09, 0x5f, 0x6a, 0x6f, 0x62, 0x5f, 0x74, - 0x69, 0x6d, 0x65, 0x4a, 0x04, 0x08, 0x01, 0x10, 0x02, 0x42, 0x2e, 0x5a, 0x2c, 0x67, 0x69, 0x74, - 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6f, 0x73, 0x73, 0x66, 0x2f, 0x73, 0x63, 0x6f, - 0x72, 0x65, 0x63, 0x61, 0x72, 0x64, 0x2f, 0x63, 0x72, 0x6f, 0x6e, 0x2f, 0x69, 0x6e, 0x74, 0x65, - 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x64, 0x61, 0x74, 0x61, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x33, + 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x52, 0x65, 0x70, 0x6f, 0x52, 0x05, 0x72, 0x65, 0x70, 0x6f, + 0x73, 0x12, 0x20, 0x0a, 0x09, 0x73, 0x68, 0x61, 0x72, 0x64, 0x5f, 0x6e, 0x75, 0x6d, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x05, 0x48, 0x00, 0x52, 0x08, 0x73, 0x68, 0x61, 0x72, 0x64, 0x4e, 0x75, 0x6d, + 0x88, 0x01, 0x01, 0x12, 0x3a, 0x0a, 0x08, 0x6a, 0x6f, 0x62, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, + 0x70, 0x48, 0x01, 0x52, 0x07, 0x6a, 0x6f, 0x62, 0x54, 0x69, 0x6d, 0x65, 0x88, 0x01, 0x01, 0x42, + 0x0c, 0x0a, 0x0a, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x64, 0x5f, 0x6e, 0x75, 0x6d, 0x42, 0x0b, 0x0a, + 0x09, 0x5f, 0x6a, 0x6f, 0x62, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x4a, 0x04, 0x08, 0x01, 0x10, 0x02, + 0x42, 0x25, 0x5a, 0x23, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6f, + 0x73, 0x73, 0x66, 0x2f, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x63, 0x61, 0x72, 0x64, 0x2f, 0x63, 0x72, + 0x6f, 0x6e, 0x2f, 0x64, 0x61, 0x74, 0x61, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( - file_cron_internal_data_request_proto_rawDescOnce sync.Once - file_cron_internal_data_request_proto_rawDescData = file_cron_internal_data_request_proto_rawDesc + file_cron_data_request_proto_rawDescOnce sync.Once + file_cron_data_request_proto_rawDescData = file_cron_data_request_proto_rawDesc ) -func file_cron_internal_data_request_proto_rawDescGZIP() []byte { - file_cron_internal_data_request_proto_rawDescOnce.Do(func() { - file_cron_internal_data_request_proto_rawDescData = protoimpl.X.CompressGZIP(file_cron_internal_data_request_proto_rawDescData) +func file_cron_data_request_proto_rawDescGZIP() []byte { + file_cron_data_request_proto_rawDescOnce.Do(func() { + file_cron_data_request_proto_rawDescData = protoimpl.X.CompressGZIP(file_cron_data_request_proto_rawDescData) }) - return file_cron_internal_data_request_proto_rawDescData + return file_cron_data_request_proto_rawDescData } -var file_cron_internal_data_request_proto_msgTypes = make([]protoimpl.MessageInfo, 2) -var file_cron_internal_data_request_proto_goTypes = []interface{}{ +var file_cron_data_request_proto_msgTypes = make([]protoimpl.MessageInfo, 2) +var file_cron_data_request_proto_goTypes = []interface{}{ (*Repo)(nil), // 0: ossf.scorecard.cron.internal.data.Repo (*ScorecardBatchRequest)(nil), // 1: ossf.scorecard.cron.internal.data.ScorecardBatchRequest (*timestamppb.Timestamp)(nil), // 2: google.protobuf.Timestamp } -var file_cron_internal_data_request_proto_depIdxs = []int32{ +var file_cron_data_request_proto_depIdxs = []int32{ 0, // 0: ossf.scorecard.cron.internal.data.ScorecardBatchRequest.repos:type_name -> ossf.scorecard.cron.internal.data.Repo 2, // 1: ossf.scorecard.cron.internal.data.ScorecardBatchRequest.job_time:type_name -> google.protobuf.Timestamp 2, // [2:2] is the sub-list for method output_type @@ -225,13 +223,13 @@ var file_cron_internal_data_request_proto_depIdxs = []int32{ 0, // [0:2] is the sub-list for field type_name } -func init() { file_cron_internal_data_request_proto_init() } -func file_cron_internal_data_request_proto_init() { - if File_cron_internal_data_request_proto != nil { +func init() { file_cron_data_request_proto_init() } +func file_cron_data_request_proto_init() { + if File_cron_data_request_proto != nil { return } if !protoimpl.UnsafeEnabled { - file_cron_internal_data_request_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + file_cron_data_request_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*Repo); i { case 0: return &v.state @@ -243,7 +241,7 @@ func file_cron_internal_data_request_proto_init() { return nil } } - file_cron_internal_data_request_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + file_cron_data_request_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*ScorecardBatchRequest); i { case 0: return &v.state @@ -256,24 +254,24 @@ func file_cron_internal_data_request_proto_init() { } } } - file_cron_internal_data_request_proto_msgTypes[0].OneofWrappers = []interface{}{} - file_cron_internal_data_request_proto_msgTypes[1].OneofWrappers = []interface{}{} + file_cron_data_request_proto_msgTypes[0].OneofWrappers = []interface{}{} + file_cron_data_request_proto_msgTypes[1].OneofWrappers = []interface{}{} type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_cron_internal_data_request_proto_rawDesc, + RawDescriptor: file_cron_data_request_proto_rawDesc, NumEnums: 0, NumMessages: 2, NumExtensions: 0, NumServices: 0, }, - GoTypes: file_cron_internal_data_request_proto_goTypes, - DependencyIndexes: file_cron_internal_data_request_proto_depIdxs, - MessageInfos: file_cron_internal_data_request_proto_msgTypes, + GoTypes: file_cron_data_request_proto_goTypes, + DependencyIndexes: file_cron_data_request_proto_depIdxs, + MessageInfos: file_cron_data_request_proto_msgTypes, }.Build() - File_cron_internal_data_request_proto = out.File - file_cron_internal_data_request_proto_rawDesc = nil - file_cron_internal_data_request_proto_goTypes = nil - file_cron_internal_data_request_proto_depIdxs = nil + File_cron_data_request_proto = out.File + file_cron_data_request_proto_rawDesc = nil + file_cron_data_request_proto_goTypes = nil + file_cron_data_request_proto_depIdxs = nil } diff --git a/cron/internal/data/request.proto b/cron/data/request.proto similarity index 93% rename from cron/internal/data/request.proto rename to cron/data/request.proto index 8c6a7c75..ac3718ec 100644 --- a/cron/internal/data/request.proto +++ b/cron/data/request.proto @@ -18,7 +18,7 @@ package ossf.scorecard.cron.internal.data; import "google/protobuf/timestamp.proto"; -option go_package = "github.com/ossf/scorecard/cron/internal/data"; +option go_package = "github.com/ossf/scorecard/cron/data"; message Repo { optional string url = 1; diff --git a/cron/internal/data/testdata/basic.csv b/cron/data/testdata/basic.csv similarity index 100% rename from cron/internal/data/testdata/basic.csv rename to cron/data/testdata/basic.csv diff --git a/cron/internal/data/testdata/blob_test/key1.txt b/cron/data/testdata/blob_test/key1.txt similarity index 100% rename from cron/internal/data/testdata/blob_test/key1.txt rename to cron/data/testdata/blob_test/key1.txt diff --git a/cron/internal/data/testdata/blob_test/key2.txt b/cron/data/testdata/blob_test/key2.txt similarity index 100% rename from cron/internal/data/testdata/blob_test/key2.txt rename to cron/data/testdata/blob_test/key2.txt diff --git a/cron/internal/data/testdata/blob_test/key3.txt b/cron/data/testdata/blob_test/key3.txt similarity index 100% rename from cron/internal/data/testdata/blob_test/key3.txt rename to cron/data/testdata/blob_test/key3.txt diff --git a/cron/internal/data/testdata/blob_test/subdir/key4.txt b/cron/data/testdata/blob_test/subdir/key4.txt similarity index 100% rename from cron/internal/data/testdata/blob_test/subdir/key4.txt rename to cron/data/testdata/blob_test/subdir/key4.txt diff --git a/cron/internal/data/testdata/comment.csv b/cron/data/testdata/comment.csv similarity index 100% rename from cron/internal/data/testdata/comment.csv rename to cron/data/testdata/comment.csv diff --git a/cron/internal/data/testdata/empty_row.csv b/cron/data/testdata/empty_row.csv similarity index 100% rename from cron/internal/data/testdata/empty_row.csv rename to cron/data/testdata/empty_row.csv diff --git a/cron/internal/data/testdata/extra_column.csv b/cron/data/testdata/extra_column.csv similarity index 100% rename from cron/internal/data/testdata/extra_column.csv rename to cron/data/testdata/extra_column.csv diff --git a/cron/internal/data/testdata/failing_urls.csv b/cron/data/testdata/failing_urls.csv similarity index 100% rename from cron/internal/data/testdata/failing_urls.csv rename to cron/data/testdata/failing_urls.csv diff --git a/cron/internal/data/testdata/ignore_header.csv b/cron/data/testdata/ignore_header.csv similarity index 100% rename from cron/internal/data/testdata/ignore_header.csv rename to cron/data/testdata/ignore_header.csv diff --git a/cron/internal/data/testdata/no_header.csv b/cron/data/testdata/no_header.csv similarity index 100% rename from cron/internal/data/testdata/no_header.csv rename to cron/data/testdata/no_header.csv diff --git a/cron/internal/data/testdata/only_header.csv b/cron/data/testdata/only_header.csv similarity index 100% rename from cron/internal/data/testdata/only_header.csv rename to cron/data/testdata/only_header.csv diff --git a/cron/internal/data/testdata/split_file.csv b/cron/data/testdata/split_file.csv similarity index 100% rename from cron/internal/data/testdata/split_file.csv rename to cron/data/testdata/split_file.csv diff --git a/cron/internal/data/testdata/split_file_empty.csv b/cron/data/testdata/split_file_empty.csv similarity index 100% rename from cron/internal/data/testdata/split_file_empty.csv rename to cron/data/testdata/split_file_empty.csv diff --git a/cron/internal/data/writer.go b/cron/data/writer.go similarity index 100% rename from cron/internal/data/writer.go rename to cron/data/writer.go diff --git a/cron/internal/data/writer_test.go b/cron/data/writer_test.go similarity index 100% rename from cron/internal/data/writer_test.go rename to cron/data/writer_test.go diff --git a/cron/internal/bq/main.go b/cron/internal/bq/main.go index c30c9c86..8a6401fb 100644 --- a/cron/internal/bq/main.go +++ b/cron/internal/bq/main.go @@ -28,8 +28,8 @@ import ( "google.golang.org/protobuf/encoding/protojson" - "github.com/ossf/scorecard/v4/cron/internal/config" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/config" + "github.com/ossf/scorecard/v4/cron/data" ) type shardSummary struct { diff --git a/cron/internal/cii/main.go b/cron/internal/cii/main.go index 983a69bc..cece18af 100644 --- a/cron/internal/cii/main.go +++ b/cron/internal/cii/main.go @@ -25,8 +25,8 @@ import ( "strings" "github.com/ossf/scorecard/v4/clients" - "github.com/ossf/scorecard/v4/cron/internal/config" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/config" + "github.com/ossf/scorecard/v4/cron/data" ) const ciiBaseURL = "https://bestpractices.coreinfrastructure.org/projects.json" diff --git a/cron/internal/controller/main.go b/cron/internal/controller/main.go index aaef1f67..da37de62 100644 --- a/cron/internal/controller/main.go +++ b/cron/internal/controller/main.go @@ -28,8 +28,8 @@ import ( "sigs.k8s.io/release-utils/version" "github.com/ossf/scorecard/v4/clients" - "github.com/ossf/scorecard/v4/cron/internal/config" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/config" + "github.com/ossf/scorecard/v4/cron/data" "github.com/ossf/scorecard/v4/cron/internal/pubsub" ) diff --git a/cron/internal/data/add/main.go b/cron/internal/data/add/main.go index ce435cba..d214ef1b 100644 --- a/cron/internal/data/add/main.go +++ b/cron/internal/data/add/main.go @@ -22,7 +22,7 @@ import ( "os" "strings" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) // Script to add new project repositories to the projects.csv file: diff --git a/cron/internal/data/add/main_test.go b/cron/internal/data/add/main_test.go index 08e51792..27a9c042 100644 --- a/cron/internal/data/add/main_test.go +++ b/cron/internal/data/add/main_test.go @@ -22,7 +22,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) func lessThanURI(x, y data.RepoFormat) bool { diff --git a/cron/internal/data/metadata.pb.go b/cron/internal/data/metadata.pb.go deleted file mode 100644 index 1e260e46..00000000 --- a/cron/internal/data/metadata.pb.go +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright 2021 Security Scorecard Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Code generated by protoc-gen-go. DO NOT EDIT. -// versions: -// protoc-gen-go v1.28.1 -// protoc v3.21.6 -// source: cron/internal/data/metadata.proto - -package data - -import ( - protoreflect "google.golang.org/protobuf/reflect/protoreflect" - protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" -) - -const ( - // Verify that this generated code is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) - // Verify that runtime/protoimpl is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) -) - -type ShardMetadata struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - ShardLoc *string `protobuf:"bytes,1,opt,name=shard_loc,json=shardLoc,proto3,oneof" json:"shard_loc,omitempty"` - NumShard *int32 `protobuf:"varint,2,opt,name=num_shard,json=numShard,proto3,oneof" json:"num_shard,omitempty"` - CommitSha *string `protobuf:"bytes,3,opt,name=commit_sha,json=commitSha,proto3,oneof" json:"commit_sha,omitempty"` -} - -func (x *ShardMetadata) Reset() { - *x = ShardMetadata{} - if protoimpl.UnsafeEnabled { - mi := &file_cron_internal_data_metadata_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *ShardMetadata) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*ShardMetadata) ProtoMessage() {} - -func (x *ShardMetadata) ProtoReflect() protoreflect.Message { - mi := &file_cron_internal_data_metadata_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use ShardMetadata.ProtoReflect.Descriptor instead. -func (*ShardMetadata) Descriptor() ([]byte, []int) { - return file_cron_internal_data_metadata_proto_rawDescGZIP(), []int{0} -} - -func (x *ShardMetadata) GetShardLoc() string { - if x != nil && x.ShardLoc != nil { - return *x.ShardLoc - } - return "" -} - -func (x *ShardMetadata) GetNumShard() int32 { - if x != nil && x.NumShard != nil { - return *x.NumShard - } - return 0 -} - -func (x *ShardMetadata) GetCommitSha() string { - if x != nil && x.CommitSha != nil { - return *x.CommitSha - } - return "" -} - -var File_cron_internal_data_metadata_proto protoreflect.FileDescriptor - -var file_cron_internal_data_metadata_proto_rawDesc = []byte{ - 0x0a, 0x21, 0x63, 0x72, 0x6f, 0x6e, 0x2f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, - 0x64, 0x61, 0x74, 0x61, 0x2f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x12, 0x21, 0x6f, 0x73, 0x73, 0x66, 0x2e, 0x73, 0x63, 0x6f, 0x72, 0x65, 0x63, - 0x61, 0x72, 0x64, 0x2e, 0x63, 0x72, 0x6f, 0x6e, 0x2e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, - 0x6c, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x22, 0xa2, 0x01, 0x0a, 0x0d, 0x53, 0x68, 0x61, 0x72, 0x64, - 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x20, 0x0a, 0x09, 0x73, 0x68, 0x61, 0x72, - 0x64, 0x5f, 0x6c, 0x6f, 0x63, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x08, 0x73, - 0x68, 0x61, 0x72, 0x64, 0x4c, 0x6f, 0x63, 0x88, 0x01, 0x01, 0x12, 0x20, 0x0a, 0x09, 0x6e, 0x75, - 0x6d, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x48, 0x01, 0x52, - 0x08, 0x6e, 0x75, 0x6d, 0x53, 0x68, 0x61, 0x72, 0x64, 0x88, 0x01, 0x01, 0x12, 0x22, 0x0a, 0x0a, - 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x5f, 0x73, 0x68, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, - 0x48, 0x02, 0x52, 0x09, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x53, 0x68, 0x61, 0x88, 0x01, 0x01, - 0x42, 0x0c, 0x0a, 0x0a, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x64, 0x5f, 0x6c, 0x6f, 0x63, 0x42, 0x0c, - 0x0a, 0x0a, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x73, 0x68, 0x61, 0x72, 0x64, 0x42, 0x0d, 0x0a, 0x0b, - 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x5f, 0x73, 0x68, 0x61, 0x42, 0x2e, 0x5a, 0x2c, 0x67, - 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6f, 0x73, 0x73, 0x66, 0x2f, 0x73, - 0x63, 0x6f, 0x72, 0x65, 0x63, 0x61, 0x72, 0x64, 0x2f, 0x63, 0x72, 0x6f, 0x6e, 0x2f, 0x69, 0x6e, - 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x64, 0x61, 0x74, 0x61, 0x62, 0x06, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x33, -} - -var ( - file_cron_internal_data_metadata_proto_rawDescOnce sync.Once - file_cron_internal_data_metadata_proto_rawDescData = file_cron_internal_data_metadata_proto_rawDesc -) - -func file_cron_internal_data_metadata_proto_rawDescGZIP() []byte { - file_cron_internal_data_metadata_proto_rawDescOnce.Do(func() { - file_cron_internal_data_metadata_proto_rawDescData = protoimpl.X.CompressGZIP(file_cron_internal_data_metadata_proto_rawDescData) - }) - return file_cron_internal_data_metadata_proto_rawDescData -} - -var file_cron_internal_data_metadata_proto_msgTypes = make([]protoimpl.MessageInfo, 1) -var file_cron_internal_data_metadata_proto_goTypes = []interface{}{ - (*ShardMetadata)(nil), // 0: ossf.scorecard.cron.internal.data.ShardMetadata -} -var file_cron_internal_data_metadata_proto_depIdxs = []int32{ - 0, // [0:0] is the sub-list for method output_type - 0, // [0:0] is the sub-list for method input_type - 0, // [0:0] is the sub-list for extension type_name - 0, // [0:0] is the sub-list for extension extendee - 0, // [0:0] is the sub-list for field type_name -} - -func init() { file_cron_internal_data_metadata_proto_init() } -func file_cron_internal_data_metadata_proto_init() { - if File_cron_internal_data_metadata_proto != nil { - return - } - if !protoimpl.UnsafeEnabled { - file_cron_internal_data_metadata_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ShardMetadata); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } - file_cron_internal_data_metadata_proto_msgTypes[0].OneofWrappers = []interface{}{} - type x struct{} - out := protoimpl.TypeBuilder{ - File: protoimpl.DescBuilder{ - GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_cron_internal_data_metadata_proto_rawDesc, - NumEnums: 0, - NumMessages: 1, - NumExtensions: 0, - NumServices: 0, - }, - GoTypes: file_cron_internal_data_metadata_proto_goTypes, - DependencyIndexes: file_cron_internal_data_metadata_proto_depIdxs, - MessageInfos: file_cron_internal_data_metadata_proto_msgTypes, - }.Build() - File_cron_internal_data_metadata_proto = out.File - file_cron_internal_data_metadata_proto_rawDesc = nil - file_cron_internal_data_metadata_proto_goTypes = nil - file_cron_internal_data_metadata_proto_depIdxs = nil -} diff --git a/cron/internal/data/update/dependency.go b/cron/internal/data/update/dependency.go index 60813138..8f1e13b3 100644 --- a/cron/internal/data/update/dependency.go +++ b/cron/internal/data/update/dependency.go @@ -30,7 +30,7 @@ import ( "golang.org/x/tools/go/vcs" "github.com/ossf/scorecard/v4/clients/githubrepo" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) var ( diff --git a/cron/internal/data/update/main.go b/cron/internal/data/update/main.go index 07d0d914..d3d9f49e 100644 --- a/cron/internal/data/update/main.go +++ b/cron/internal/data/update/main.go @@ -19,7 +19,7 @@ import ( "bytes" "os" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) // Adds "project=${PROJECT},dependency=true" to the repositories metadata. diff --git a/cron/internal/data/validate/main.go b/cron/internal/data/validate/main.go index 47ddac04..aeb8141b 100644 --- a/cron/internal/data/validate/main.go +++ b/cron/internal/data/validate/main.go @@ -19,7 +19,7 @@ import ( "log" "os" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) // Validates data.Iterator used by production PubSub cron job. diff --git a/cron/internal/monitoring/exporter.go b/cron/internal/monitoring/exporter.go index 3bc5e483..922ce1b4 100644 --- a/cron/internal/monitoring/exporter.go +++ b/cron/internal/monitoring/exporter.go @@ -24,7 +24,7 @@ import ( "contrib.go.opencensus.io/exporter/stackdriver/monitoredresource/gcp" "go.opencensus.io/stats/view" - "github.com/ossf/scorecard/v4/cron/internal/config" + "github.com/ossf/scorecard/v4/cron/config" ) var errorUndefinedExporter = errors.New("unsupported exporterType") diff --git a/cron/internal/pubsub/publisher.go b/cron/internal/pubsub/publisher.go index fc420049..6310e166 100644 --- a/cron/internal/pubsub/publisher.go +++ b/cron/internal/pubsub/publisher.go @@ -28,7 +28,7 @@ import ( _ "gocloud.dev/pubsub/gcppubsub" "google.golang.org/protobuf/encoding/protojson" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) var errorPublish = errors.New("total errors when publishing") diff --git a/cron/internal/pubsub/publisher_test.go b/cron/internal/pubsub/publisher_test.go index 39270de5..3f098157 100644 --- a/cron/internal/pubsub/publisher_test.go +++ b/cron/internal/pubsub/publisher_test.go @@ -21,7 +21,7 @@ import ( "gocloud.dev/pubsub" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) type mockSucceedTopic struct{} diff --git a/cron/internal/pubsub/subscriber.go b/cron/internal/pubsub/subscriber.go index fcca1a51..86ecff60 100644 --- a/cron/internal/pubsub/subscriber.go +++ b/cron/internal/pubsub/subscriber.go @@ -21,7 +21,7 @@ import ( "google.golang.org/protobuf/encoding/protojson" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) // ErrorInParse indicates there was an error while unmarshalling the protocol buffer message. diff --git a/cron/internal/pubsub/subscriber_gcs.go b/cron/internal/pubsub/subscriber_gcs.go index e3553ba6..b9373e1b 100644 --- a/cron/internal/pubsub/subscriber_gcs.go +++ b/cron/internal/pubsub/subscriber_gcs.go @@ -24,7 +24,7 @@ import ( pubsub "cloud.google.com/go/pubsub/apiv1" pubsubpb "google.golang.org/genproto/googleapis/pubsub/v1" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) const ( diff --git a/cron/internal/pubsub/subscriber_gocloud.go b/cron/internal/pubsub/subscriber_gocloud.go index 5dbd8801..faaf7ca6 100644 --- a/cron/internal/pubsub/subscriber_gocloud.go +++ b/cron/internal/pubsub/subscriber_gocloud.go @@ -23,7 +23,7 @@ import ( // Needed to link in GCP drivers. _ "gocloud.dev/pubsub/gcppubsub" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) type receiver interface { diff --git a/cron/internal/pubsub/subscriber_gocloud_test.go b/cron/internal/pubsub/subscriber_gocloud_test.go index c593cdff..88ab1288 100644 --- a/cron/internal/pubsub/subscriber_gocloud_test.go +++ b/cron/internal/pubsub/subscriber_gocloud_test.go @@ -23,7 +23,7 @@ import ( "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/proto" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) var repo1 = "repo1" diff --git a/cron/internal/shuffle/main.go b/cron/internal/shuffle/main.go index 671f8f0f..805b0c18 100644 --- a/cron/internal/shuffle/main.go +++ b/cron/internal/shuffle/main.go @@ -21,7 +21,7 @@ import ( "strconv" "time" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) func main() { diff --git a/cron/internal/webhook/main.go b/cron/internal/webhook/main.go index 7b7f03bc..42649a7f 100644 --- a/cron/internal/webhook/main.go +++ b/cron/internal/webhook/main.go @@ -26,7 +26,7 @@ import ( "github.com/google/go-containerregistry/pkg/v1/google" "google.golang.org/protobuf/encoding/protojson" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/data" ) const stableTag = "stable" diff --git a/cron/internal/worker/main.go b/cron/internal/worker/main.go index fe6782a1..2ad727a2 100644 --- a/cron/internal/worker/main.go +++ b/cron/internal/worker/main.go @@ -30,11 +30,11 @@ import ( "github.com/ossf/scorecard/v4/clients" "github.com/ossf/scorecard/v4/clients/githubrepo" githubstats "github.com/ossf/scorecard/v4/clients/githubrepo/stats" - "github.com/ossf/scorecard/v4/cron/internal/config" - "github.com/ossf/scorecard/v4/cron/internal/data" + "github.com/ossf/scorecard/v4/cron/config" + "github.com/ossf/scorecard/v4/cron/data" format "github.com/ossf/scorecard/v4/cron/internal/format" "github.com/ossf/scorecard/v4/cron/internal/monitoring" - "github.com/ossf/scorecard/v4/cron/internal/pubsub" + "github.com/ossf/scorecard/v4/cron/worker" docs "github.com/ossf/scorecard/v4/docs/checks" sce "github.com/ossf/scorecard/v4/errors" "github.com/ossf/scorecard/v4/log" @@ -50,6 +50,81 @@ const ( var ignoreRuntimeErrors = flag.Bool("ignoreRuntimeErrors", false, "if set to true any runtime errors will be ignored") +type ScorecardWorker struct { + ctx context.Context + logger *log.Logger + checkDocs docs.Doc + exporter monitoring.Exporter + repoClient clients.RepoClient + ciiClient clients.CIIBestPracticesClient + ossFuzzRepoClient clients.RepoClient + vulnsClient clients.VulnerabilitiesClient + apiBucketURL string + rawBucketURL string + blacklistedChecks []string +} + +func newScorecardWorker() (*ScorecardWorker, error) { + var err error + sw := &ScorecardWorker{} + if sw.checkDocs, err = docs.Read(); err != nil { + return nil, fmt.Errorf("docs.Read: %w", err) + } + + if sw.rawBucketURL, err = config.GetRawResultDataBucketURL(); err != nil { + return nil, fmt.Errorf("docs.GetRawResultDataBucketURL: %w", err) + } + + if sw.blacklistedChecks, err = config.GetBlacklistedChecks(); err != nil { + return nil, fmt.Errorf("config.GetBlacklistedChecks: %w", err) + } + + var ciiDataBucketURL string + if ciiDataBucketURL, err = config.GetCIIDataBucketURL(); err != nil { + return nil, fmt.Errorf("config.GetCIIDataBucketURL: %w", err) + } + + if sw.apiBucketURL, err = config.GetAPIResultsBucketURL(); err != nil { + return nil, fmt.Errorf("config.GetAPIResultsBucketURL: %w", err) + } + + sw.ctx = context.Background() + sw.logger = log.NewLogger(log.InfoLevel) + sw.repoClient = githubrepo.CreateGithubRepoClient(sw.ctx, sw.logger) + sw.ciiClient = clients.BlobCIIBestPracticesClient(ciiDataBucketURL) + if sw.ossFuzzRepoClient, err = githubrepo.CreateOssFuzzRepoClient(sw.ctx, sw.logger); err != nil { + return nil, fmt.Errorf("githubrepo.CreateOssFuzzRepoClient: %w", err) + } + + sw.vulnsClient = clients.DefaultVulnerabilitiesClient() + + if sw.exporter, err = startMetricsExporter(); err != nil { + return nil, fmt.Errorf("startMetricsExporter: %w", err) + } + + // Exposed for monitoring runtime profiles + go func() { + // TODO(log): Previously Fatal. Need to handle the error here. + //nolint:gosec // not internet facing. + sw.logger.Info(fmt.Sprintf("%v", http.ListenAndServe(":8080", nil))) + }() + return sw, nil +} + +func (sw *ScorecardWorker) Close() { + sw.exporter.StopMetricsExporter() + sw.ossFuzzRepoClient.Close() +} + +func (sw *ScorecardWorker) Process(ctx context.Context, req *data.ScorecardBatchRequest, bucketURL string) error { + return processRequest(ctx, req, sw.blacklistedChecks, bucketURL, sw.rawBucketURL, sw.apiBucketURL, + sw.checkDocs, sw.repoClient, sw.ossFuzzRepoClient, sw.ciiClient, sw.vulnsClient, sw.logger) +} + +func (sw *ScorecardWorker) PostProcess() { + sw.exporter.Flush() +} + //nolint:gocognit func processRequest(ctx context.Context, batchRequest *data.ScorecardBatchRequest, @@ -60,25 +135,7 @@ func processRequest(ctx context.Context, vulnsClient clients.VulnerabilitiesClient, logger *log.Logger, ) error { - filename := data.GetBlobFilename( - fmt.Sprintf("shard-%07d", batchRequest.GetShardNum()), - batchRequest.GetJobTime().AsTime()) - // Sanity check - make sure we are not re-processing an already processed request. - existsScore, err := data.BlobExists(ctx, bucketURL, filename) - if err != nil { - return fmt.Errorf("error during BlobExists: %w", err) - } - - existsRaw, err := data.BlobExists(ctx, rawBucketURL, filename) - if err != nil { - return fmt.Errorf("error during BlobExists: %w", err) - } - - if existsScore && existsRaw { - logger.Info(fmt.Sprintf("Already processed shard %s. Nothing to do.", filename)) - // We have already processed this request, nothing to do. - return nil - } + filename := worker.ResultFilename(batchRequest) var buffer2 bytes.Buffer var rawBuffer bytes.Buffer @@ -171,12 +228,14 @@ func processRequest(ctx context.Context, } } - if err := data.WriteToBlobStore(ctx, bucketURL, filename, buffer2.Bytes()); err != nil { + // Raw result. + if err := data.WriteToBlobStore(ctx, rawBucketURL, filename, rawBuffer.Bytes()); err != nil { return fmt.Errorf("error during WriteToBlobStore2: %w", err) } - // Raw result. - if err := data.WriteToBlobStore(ctx, rawBucketURL, filename, rawBuffer.Bytes()); err != nil { + // write to the canonical bucket last, as the presence of filename indicates the job was completed. + // see worker package for details. + if err := data.WriteToBlobStore(ctx, bucketURL, filename, buffer2.Bytes()); err != nil { return fmt.Errorf("error during WriteToBlobStore2: %w", err) } @@ -204,121 +263,15 @@ func startMetricsExporter() (monitoring.Exporter, error) { return exporter, nil } -func hasMetadataFile(ctx context.Context, req *data.ScorecardBatchRequest, bucketURL string) (bool, error) { - filename := data.GetBlobFilename(config.ShardMetadataFilename, req.GetJobTime().AsTime()) - exists, err := data.BlobExists(ctx, bucketURL, filename) - if err != nil { - return false, fmt.Errorf("data.BlobExists: %w", err) - } - return exists, nil -} - func main() { - ctx := context.Background() - flag.Parse() - if err := config.ReadConfig(); err != nil { - panic(err) - } - - checkDocs, err := docs.Read() + sw, err := newScorecardWorker() if err != nil { panic(err) } - - subscriptionURL, err := config.GetRequestSubscriptionURL() - if err != nil { - panic(err) - } - subscriber, err := pubsub.CreateSubscriber(ctx, subscriptionURL) - if err != nil { - panic(err) - } - - bucketURL, err := config.GetResultDataBucketURL() - if err != nil { - panic(err) - } - - rawBucketURL, err := config.GetRawResultDataBucketURL() - if err != nil { - panic(err) - } - - blacklistedChecks, err := config.GetBlacklistedChecks() - if err != nil { - panic(err) - } - - ciiDataBucketURL, err := config.GetCIIDataBucketURL() - if err != nil { - panic(err) - } - - apiBucketURL, err := config.GetAPIResultsBucketURL() - if err != nil { - panic(err) - } - - logger := log.NewLogger(log.InfoLevel) - repoClient := githubrepo.CreateGithubRepoClient(ctx, logger) - ciiClient := clients.BlobCIIBestPracticesClient(ciiDataBucketURL) - ossFuzzRepoClient, err := githubrepo.CreateOssFuzzRepoClient(ctx, logger) - vulnsClient := clients.DefaultVulnerabilitiesClient() - if err != nil { - panic(err) - } - defer ossFuzzRepoClient.Close() - - exporter, err := startMetricsExporter() - if err != nil { - panic(err) - } - defer exporter.StopMetricsExporter() - - // Exposed for monitoring runtime profiles - go func() { - // TODO(log): Previously Fatal. Need to handle the error here. - //nolint: gosec // internal server. - logger.Info(fmt.Sprintf("%v", http.ListenAndServe(":8080", nil))) - }() - - for { - req, err := subscriber.SynchronousPull() - if err != nil { - panic(err) - } - - logger.Info("Received message from subscription") - if req == nil { - // TODO(log): Previously Warn. Consider logging an error here. - logger.Info("subscription returned nil message during Receive, exiting") - break - } - - // don't process requests from jobs without metadata files, as the results will never be transferred. - // https://github.com/ossf/scorecard/issues/2307 - if hasMd, err := hasMetadataFile(ctx, req, bucketURL); !hasMd || err != nil { - // nack the message so it can be tried later, as the metadata file may not have been created yet. - subscriber.Nack() - continue - } - - if err := processRequest(ctx, req, blacklistedChecks, - bucketURL, rawBucketURL, apiBucketURL, checkDocs, - repoClient, ossFuzzRepoClient, ciiClient, vulnsClient, logger); err != nil { - // TODO(log): Previously Warn. Consider logging an error here. - logger.Info(fmt.Sprintf("error processing request: %v", err)) - // Nack the message so that another worker can retry. - subscriber.Nack() - continue - } - - exporter.Flush() - subscriber.Ack() - } - err = subscriber.Close() - if err != nil { + defer sw.Close() + wl := worker.NewWorkLoop(sw) + if err := wl.Run(); err != nil { panic(err) } } diff --git a/cron/k8s/README.md b/cron/k8s/README.md index 314fd527..aee2f7fe 100644 --- a/cron/k8s/README.md +++ b/cron/k8s/README.md @@ -25,7 +25,7 @@ The cluster name is `openssf` which is in zone `us-central1-c`. ## Creating or updating the ConfigMap using the config.yaml file -We use [ConfigMaps](https://kubernetes.io/docs/concepts/configuration/configmap/) to store our config file (`cron/internal/config/config.yaml`). +We use [ConfigMaps](https://kubernetes.io/docs/concepts/configuration/configmap/) to store our config file (`cron/config/config.yaml`). The file can be created for the first time, or updated, with the same command: ``` kubectl create configmap scorecard-config --from-file=config.yaml -o yaml --dry-run=client | kubectl apply -f - diff --git a/cron/worker/worker.go b/cron/worker/worker.go new file mode 100644 index 00000000..71b57d4f --- /dev/null +++ b/cron/worker/worker.go @@ -0,0 +1,160 @@ +// Copyright 2022 Security Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package worker implements the generic cron worker logic. +package worker + +import ( + "context" + "flag" + "fmt" + + "github.com/ossf/scorecard/v4/cron/config" + "github.com/ossf/scorecard/v4/cron/data" + "github.com/ossf/scorecard/v4/cron/internal/pubsub" + "github.com/ossf/scorecard/v4/log" +) + +// Worker is the interface used to process batch requests. +// +// Process does the processing for a batch request. Returning an error will cause the request to be nack'd, +// allowing it to be re-processed later. If no error is returned, the request will be ack'd, consuming it. +// +// PostProcess is called only after an error-free call to Process. +type Worker interface { + Process(ctx context.Context, req *data.ScorecardBatchRequest, bucketURL string) error + PostProcess() +} + +// WorkLoop is the entry point into the common cron worker structure. +type WorkLoop struct { + worker Worker +} + +// NewWorkLoop creates a workloop using a specified worker. +func NewWorkLoop(worker Worker) WorkLoop { + return WorkLoop{worker: worker} +} + +// Run initiates the processing performed by the WorkLoop. +func (wl *WorkLoop) Run() error { + ctx := context.Background() + + if !flag.Parsed() { + flag.Parse() + } + + if err := config.ReadConfig(); err != nil { + return fmt.Errorf("config.ReadConfig: %w", err) + } + + subscriptionURL, err := config.GetRequestSubscriptionURL() + if err != nil { + return fmt.Errorf("config.GetRequestSubscriptionURL: %w", err) + } + + subscriber, err := pubsub.CreateSubscriber(ctx, subscriptionURL) + if err != nil { + return fmt.Errorf("config.CreateSubscriber: %w", err) + } + + bucketURL, err := config.GetResultDataBucketURL() + if err != nil { + return fmt.Errorf("config.GetResultDataBucketURL: %w", err) + } + + logger := log.NewLogger(log.InfoLevel) + + for { + req, err := subscriber.SynchronousPull() + if err != nil { + return fmt.Errorf("subscriber.SynchronousPull: %w", err) + } + + logger.Info("Received message from subscription") + if req == nil { + // TODO(log): Previously Warn. Consider logging an error here. + logger.Info("subscription returned nil message during Receive, exiting") + break + } + + // don't process requests from jobs without metadata files, as the results will never be transferred. + // https://github.com/ossf/scorecard/issues/2307 + hasMd, err := hasMetadataFile(ctx, req, bucketURL) + if err != nil { + announceError(err, subscriber, logger) + continue + } + + if !hasMd { + // nack the message so it can be tried later, as the metadata file may not have been created yet. + subscriber.Nack() + continue + } + + exists, err := resultExists(ctx, req, bucketURL) + if err != nil { + announceError(err, subscriber, logger) + continue + } + + // Sanity check - make sure we are not re-processing an already processed request. + if exists { + logger.Info(fmt.Sprintf("Skipping already processed request: %s.", req.String())) + } else { + if err := wl.worker.Process(ctx, req, bucketURL); err != nil { + announceError(err, subscriber, logger) + continue + } + } + + wl.worker.PostProcess() + subscriber.Ack() + } + if err := subscriber.Close(); err != nil { + return fmt.Errorf("subscriber.Close: %w", err) + } + return nil +} + +func announceError(err error, subscriber pubsub.Subscriber, logger *log.Logger) { + // TODO(log): Previously Warn. Consider logging an error here. + logger.Info(fmt.Sprintf("error processing request: %v", err)) + // Nack the message so that another worker can retry. + subscriber.Nack() +} + +func resultExists(ctx context.Context, sbr *data.ScorecardBatchRequest, bucketURL string) (bool, error) { + exists, err := data.BlobExists(ctx, bucketURL, ResultFilename(sbr)) + if err != nil { + return false, fmt.Errorf("error during BlobExists: %w", err) + } + return exists, nil +} + +// ResultFilename returns the filename where the result from processing a batch request should go. +// This naming convention is used to detect duplicate requests, as well as transfer the results to BigQuery. +func ResultFilename(sbr *data.ScorecardBatchRequest) string { + shardname := fmt.Sprintf("shard-%07d", sbr.GetShardNum()) + return data.GetBlobFilename(shardname, sbr.GetJobTime().AsTime()) +} + +func hasMetadataFile(ctx context.Context, req *data.ScorecardBatchRequest, bucketURL string) (bool, error) { + filename := data.GetShardMetadataFilename(req.GetJobTime().AsTime()) + exists, err := data.BlobExists(ctx, bucketURL, filename) + if err != nil { + return false, fmt.Errorf("data.BlobExists: %w", err) + } + return exists, nil +} diff --git a/cron/worker/worker_test.go b/cron/worker/worker_test.go new file mode 100644 index 00000000..cbe7451f --- /dev/null +++ b/cron/worker/worker_test.go @@ -0,0 +1,57 @@ +// Copyright 2022 Security Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package worker + +import ( + "testing" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/ossf/scorecard/v4/cron/data" +) + +func asPointer(i int32) *int32 { + return &i +} + +func TestResultFilename(t *testing.T) { + t.Parallel() + testcases := []struct { + name string + req *data.ScorecardBatchRequest + want string + }{ + { + name: "Basic", + req: &data.ScorecardBatchRequest{ + JobTime: timestamppb.New(time.Date(1979, time.October, 12, 1, 2, 3, 0, time.UTC)), + ShardNum: asPointer(42), + }, + want: "1979.10.12/010203/shard-0000042", + }, + } + + for _, testcase := range testcases { + testcase := testcase + t.Run(testcase.name, func(t *testing.T) { + t.Parallel() + got := ResultFilename(testcase.req) + if got != testcase.want { + t.Errorf("\nexpected: \n%s \ngot: \n%s", testcase.want, got) + } + }) + } +}