2022-12-02 02:08:48 +03:00
|
|
|
// Copyright 2021 OpenSSF Scorecard Authors
|
2022-11-16 21:34:50 +03:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package data
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"google.golang.org/protobuf/encoding/protojson"
|
|
|
|
|
2024-04-13 00:51:50 +03:00
|
|
|
"github.com/ossf/scorecard/v5/cron/config"
|
2022-11-16 21:34:50 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
// ShardSummary is a summary of information about a set of shards with the same
|
|
|
|
// creation time.
|
|
|
|
type ShardSummary struct {
|
|
|
|
creationTime time.Time
|
|
|
|
shardMetadata []byte
|
|
|
|
shardsExpected int
|
|
|
|
shardsCreated int
|
|
|
|
isTransferred bool
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsCompleted checks if the percentage of completed shards is over the desired completion threshold.
|
|
|
|
// It also returns false to prevent transfers in cases where the expected number of shards is 0,
|
|
|
|
// as either the .shard_metadata file is missing, or there is nothing to transfer anyway.
|
|
|
|
func (s *ShardSummary) IsCompleted(completionThreshold float64) bool {
|
|
|
|
completedPercentage := float64(s.shardsCreated) / float64(s.shardsExpected)
|
|
|
|
return s.shardsExpected > 0 && completedPercentage >= completionThreshold
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsTransferred returns true if the shards have already been transferred.
|
|
|
|
// A true value indicates that a transfer should not occur, a false value
|
|
|
|
// indicates that a transfer should occur if IsCompleted() also returns true.
|
|
|
|
func (s *ShardSummary) IsTransferred() bool {
|
|
|
|
return s.isTransferred
|
|
|
|
}
|
|
|
|
|
|
|
|
// Metadata returns the raw metadata about the bucket.
|
|
|
|
func (s *ShardSummary) Metadata() []byte {
|
|
|
|
return s.shardMetadata
|
|
|
|
}
|
|
|
|
|
|
|
|
// CreationTime returns the time the shards were created. This corresponds to
|
|
|
|
// the job time generated by the controller.
|
|
|
|
func (s *ShardSummary) CreationTime() time.Time {
|
|
|
|
return s.creationTime
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *ShardSummary) MarkTransferred(ctx context.Context, bucketURL string) error {
|
|
|
|
transferStatusFilename := GetTransferStatusFilename(s.creationTime)
|
|
|
|
if err := WriteToBlobStore(ctx, bucketURL, transferStatusFilename, nil); err != nil {
|
|
|
|
return fmt.Errorf("error during WriteToBlobStore: %w", err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// BucketSummary contains details about all the shards in a bucket grouped by
|
|
|
|
// their creation time.
|
|
|
|
type BucketSummary struct {
|
|
|
|
shards map[time.Time]*ShardSummary
|
|
|
|
}
|
|
|
|
|
|
|
|
func (summary *BucketSummary) getOrCreate(t time.Time) *ShardSummary {
|
|
|
|
if summary.shards[t] == nil {
|
|
|
|
summary.shards[t] = &ShardSummary{
|
|
|
|
creationTime: t,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return summary.shards[t]
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shards returns a slice of ShardSummary instances for each shard creation time.
|
|
|
|
func (summary *BucketSummary) Shards() []*ShardSummary {
|
|
|
|
var shards []*ShardSummary
|
|
|
|
for _, s := range summary.shards {
|
|
|
|
shards = append(shards, s)
|
|
|
|
}
|
|
|
|
return shards
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetBucketSummary iterates through all files in a bucket and returns a
|
|
|
|
// BucketSummary with details on each set of shards grouped by creation time.
|
|
|
|
func GetBucketSummary(ctx context.Context, bucketURL string) (*BucketSummary, error) {
|
|
|
|
keys, err := GetBlobKeys(ctx, bucketURL)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("error getting BlobKeys: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
summary := BucketSummary{
|
|
|
|
shards: make(map[time.Time]*ShardSummary),
|
|
|
|
}
|
|
|
|
for _, key := range keys {
|
|
|
|
creationTime, filename, err := ParseBlobFilename(key)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("error parsing Blob key: %w", err)
|
|
|
|
}
|
|
|
|
switch {
|
|
|
|
case strings.HasPrefix(filename, "shard-"):
|
|
|
|
summary.getOrCreate(creationTime).shardsCreated++
|
|
|
|
case filename == config.TransferStatusFilename:
|
|
|
|
summary.getOrCreate(creationTime).isTransferred = true
|
|
|
|
case filename == config.ShardMetadataFilename:
|
|
|
|
keyData, err := GetBlobContent(ctx, bucketURL, key)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("error during GetBlobContent: %w", err)
|
|
|
|
}
|
|
|
|
var metadata ShardMetadata
|
|
|
|
if err := protojson.Unmarshal(keyData, &metadata); err != nil {
|
|
|
|
return nil, fmt.Errorf("error parsing data as ShardMetadata: %w", err)
|
|
|
|
}
|
|
|
|
summary.getOrCreate(creationTime).shardsExpected = int(metadata.GetNumShard())
|
|
|
|
summary.getOrCreate(creationTime).shardMetadata = keyData
|
|
|
|
default:
|
2023-11-15 22:44:28 +03:00
|
|
|
//nolint:goerr113
|
2022-11-16 21:34:50 +03:00
|
|
|
return nil, fmt.Errorf("found unrecognized file: %s", key)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &summary, nil
|
|
|
|
}
|