50k cron repos and allow skipping 404 URLs (#591)

Co-authored-by: Azeem Shaikh <azeems@google.com>
This commit is contained in:
Azeem Shaikh 2021-06-18 16:00:08 -07:00 committed by GitHub
parent c41f068223
commit db02490da4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 23042 additions and 527 deletions

View File

@ -60,7 +60,8 @@ check-linter: $(GOLANGGCI_LINT)
add-projects: ## Adds new projects to ./cron/data/projects.csv
add-projects: ./cron/data/projects.csv | build-add-script
# Add new projects to ./cron/data/projects.csv
./cron/data/add/add ./cron/data/projects.csv
./cron/data/add/add ./cron/data/projects.new.csv
mv ./cron/data/projects.new.csv ./cron/data/projects.csv
validate-projects: ## Validates ./cron/data/projects.csv
validate-projects: ./cron/data/projects.csv | build-validate-script
@ -108,13 +109,13 @@ build-bq-transfer: ./cron/bq/*.go
build-add-script: ## Runs go build on the add script
build-add-script: cron/data/add/add
cron/data/add/add: cron/data/add/*.go cron/data/*.go
cron/data/add/add: cron/data/add/*.go cron/data/*.go cron/data/projects.csv
# Run go build on the add script
cd cron/data/add && CGO_ENABLED=0 go build -a -ldflags '-w -extldflags "-static"' -o add
build-validate-script: ## Runs go build on the validate script
build-validate-script: cron/data/validate/validate
cron/data/validate/validate: cron/data/validate/*.go cron/data/*.go
cron/data/validate/validate: cron/data/validate/*.go cron/data/*.go cron/data/projects.csv
# Run go build on the validate script
cd cron/data/validate && CGO_ENABLED=0 go build -a -ldflags '-w -extldflags "-static"' -o validate

View File

@ -42,7 +42,8 @@ func (client *Client) InitRepo(owner, repoName string) error {
client.repoName = repoName
repo, _, err := client.repoClient.Repositories.Get(client.ctx, client.owner, client.repoName)
if err != nil {
return fmt.Errorf("error during Repositories.Get: %w", err)
// nolint: wrapcheck
return clients.NewRepoUnavailableError(err)
}
client.repo = repo

View File

@ -14,7 +14,28 @@
package clients
import "io"
import (
"fmt"
"io"
)
type ErrRepoUnavailable struct {
innerError error
}
func (e *ErrRepoUnavailable) Error() string {
return fmt.Sprintf("repo cannot be accessed: %v", e.innerError)
}
func (e *ErrRepoUnavailable) Unwrap() error {
return e.innerError
}
func NewRepoUnavailableError(err error) error {
return &ErrRepoUnavailable{
innerError: err,
}
}
type RepoClient interface {
InitRepo(owner, repo string) error

View File

@ -53,7 +53,7 @@ func main() {
if err := data.SortAndAppendTo(&buf, repoURLs, nil); err != nil {
panic(err)
}
projects, err := os.OpenFile(os.Args[1], os.O_WRONLY, 0o644)
projects, err := os.OpenFile(os.Args[1], os.O_WRONLY|os.O_CREATE, 0o644)
if err != nil {
panic(err)
}

File diff suppressed because it is too large Load Diff

View File

@ -16,6 +16,8 @@ package pkg
import (
"context"
"errors"
"fmt"
"log"
"net/http"
"sync"
@ -82,7 +84,17 @@ func RunScorecards(ctx context.Context,
}
defer logStats(ctx, time.Now())
if err := repoClient.InitRepo(repo.Owner, repo.Repo); err != nil {
var e clients.ErrRepoUnavailable
if err := repoClient.InitRepo(repo.Owner, repo.Repo); errors.Is(err, &e) {
// Unable to access repo URL. Continue.
log.Printf("%s: %v", repo.URL(), err)
return repos.RepoResult{
Repo: repo.URL(),
Date: time.Now().Format("2006-01-02"),
Checks: make([]checker.CheckResult, 0),
Metadata: []string{fmt.Sprintf("%v", err)},
}
} else if err != nil {
log.Panicf("error during InitRepo: %v", err)
}
ret := repos.RepoResult{