From 93761ebaa16e2075b3adadc5c81988e173706bb6 Mon Sep 17 00:00:00 2001 From: naveen <172697+naveensrinivasan@users.noreply.github.com> Date: Fri, 5 Mar 2021 21:18:45 +0000 Subject: [PATCH] Feat- Parmeterize cache folder in gitcache The cache temp folder was hardcoded to using the current working directory. With this it will be using the directory that is configured in "TEMP_DIR". The TEMP_DIR would be an emptydir in k8s. Included tests for basic e2e tests. --- .../{e2e_test.go => e2e_bucketcache_test.go} | 0 gitcache/e2e/e2e_http_test.go | 54 +++++++++++++++++++ gitcache/main.go | 13 +++-- gitcache/pkg/repourl.go | 4 ++ gitcache/pkg/service.go | 15 ++++-- gitcache/pkg/storagepath.go | 19 +++---- 6 files changed, 85 insertions(+), 20 deletions(-) rename gitcache/e2e/{e2e_test.go => e2e_bucketcache_test.go} (100%) create mode 100644 gitcache/e2e/e2e_http_test.go diff --git a/gitcache/e2e/e2e_test.go b/gitcache/e2e/e2e_bucketcache_test.go similarity index 100% rename from gitcache/e2e/e2e_test.go rename to gitcache/e2e/e2e_bucketcache_test.go diff --git a/gitcache/e2e/e2e_http_test.go b/gitcache/e2e/e2e_http_test.go new file mode 100644 index 00000000..ffd3c1e3 --- /dev/null +++ b/gitcache/e2e/e2e_http_test.go @@ -0,0 +1,54 @@ +// Copyright 2020 Security Scorecard Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package e2e + +import ( + "bytes" + "net/http" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +var _ = Describe("E2E TEST:HTTP endpoint-gitcache", func() { + url := "http://localhost:8080/" + Context("E2E TEST:Validating http endpoint for the cache", func() { + It("Should be able to fetch a valid git repo", func() { + jsonStr := []byte(`{"url":"http://github.com/ossf/scorecard"}`) + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonStr)) + Expect(err).Should(BeNil()) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + panic(err) + } + defer resp.Body.Close() + Expect(resp.StatusCode).Should(BeEquivalentTo(200)) + }) + It("Should fail when an invalid git repo is passed", func() { + jsonStr := []byte(`{"url":"http://iiiiaaa.imt/bar/scorecard"}`) + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonStr)) + Expect(err).Should(BeNil()) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + panic(err) + } + defer resp.Body.Close() + Expect(resp.StatusCode).Should(BeEquivalentTo(500)) + }) + }) +}) diff --git a/gitcache/main.go b/gitcache/main.go index a77677d8..3b7ec378 100644 --- a/gitcache/main.go +++ b/gitcache/main.go @@ -30,8 +30,8 @@ type cache struct { } var ( - blob string - logf func(s string, f ...interface{}) + blob, tempDir string + logf func(s string, f ...interface{}) ) func handler(w http.ResponseWriter, r *http.Request) { @@ -43,7 +43,7 @@ func handler(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusBadRequest) return } - cache, err := pkg.NewCacheService(blob, logf) + cache, err := pkg.NewCacheService(blob, tempDir, logf) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return @@ -77,6 +77,13 @@ func main() { if blob == "" { log.Panic("BLOB_URL env is not set.") } + // tempDir is the storage space for archiving the repository. + // not using the tempfs https://en.wikipedia.org/wiki/Tmpfs as it is in memory and some of the + // repositories can be in large. + tempDir = os.Getenv("TEMP_DIR") + if tempDir == "" { + log.Panic("TEMP_DIR env is not set.") + } sugar.Info("BLOB_URL ", blob) // no need to lock this as it being written only within this method. logf = sugar.Infof diff --git a/gitcache/pkg/repourl.go b/gitcache/pkg/repourl.go index a28af551..bc96346f 100644 --- a/gitcache/pkg/repourl.go +++ b/gitcache/pkg/repourl.go @@ -32,6 +32,10 @@ func (r *RepoURL) String() string { return fmt.Sprintf("%s/%s/%s", r.Host, r.Owner, r.Repo) } +func (r *RepoURL) NonURLString() string { + return fmt.Sprintf("%s-%s-%s", r.Host, r.Owner, r.Repo) +} + func (r *RepoURL) Set(s string) error { // Allow skipping scheme for ease-of-use, default to https. if !strings.Contains(s, "://") { diff --git a/gitcache/pkg/service.go b/gitcache/pkg/service.go index 8e2e8789..5bdeebcb 100644 --- a/gitcache/pkg/service.go +++ b/gitcache/pkg/service.go @@ -14,6 +14,7 @@ import ( type cacheService struct { BlobURL string + TempDir string Logf func(s string, f ...interface{}) } @@ -24,16 +25,20 @@ type CacheService interface { } // NewCacheService returns new CacheService. -func NewCacheService(blobURL string, logf func(s string, f ...interface{})) (CacheService, error) { +func NewCacheService(blobURL, tempDir string, logf func(s string, f ...interface{})) (CacheService, error) { if blobURL == "" { return nil, errors.New("BLOB_URL env cannot be empty") } + if tempDir == "" { + return nil, errors.New("TEMP_DIR env cannot be empty") + } if logf == nil { return nil, errors.New("Log function cannot be nil") } return cacheService{ BlobURL: blobURL, Logf: logf, + TempDir: tempDir, }, nil } @@ -52,7 +57,7 @@ func (c cacheService) UpdateCache(s string) error { } // gets all the path configuration. - storage, err := NewStoragePath(repo) + storage, err := NewStoragePath(repo, c.TempDir) if err != nil { return errors.Wrapf(err, "unable get storage") } @@ -63,7 +68,7 @@ func (c cacheService) UpdateCache(s string) error { // checks if there is an existing git repo in the bucket if data, exists := bucket.Get(storage.BlobGitFolderPath); exists { c.Logf("bucket ", c.BlobURL, " already has git folder") - gitRepo, alreadyUptoDate, err = fetchGitRepo(&storage, data) + gitRepo, alreadyUptoDate, err = fetchGitRepo(&storage, data, repo) } else { c.Logf("bucket ", c.BlobURL, " does not have a git folder") gitRepo, err = cloneGitRepo(&storage, repo) @@ -163,7 +168,7 @@ func archiveFolder(folderToArchive, archivePath string) ([]byte, error) { } // fetchGitRepo fetches the git repo. Returns git repository, bool if it is already up to date and error. -func fetchGitRepo(storagePath *StoragePath, data []byte) (*git.Repository, bool, error) { +func fetchGitRepo(storagePath *StoragePath, data []byte, repo RepoURL) (*git.Repository, bool, error) { const fileMode os.FileMode = 0600 if err := ioutil.WriteFile("gitfolder.tar.gz", data, fileMode); err != nil { return nil, false, errors.Wrapf(err, "unable write targz file %s", storagePath.BlobArchiveFile) @@ -172,7 +177,7 @@ func fetchGitRepo(storagePath *StoragePath, data []byte) (*git.Repository, bool, return nil, false, errors.Wrapf(err, "unable unarchive targz file %s in %s", storagePath.BlobArchiveFile, storagePath.BlobArchiveDir) } - p := path.Join(storagePath.GitDir, storagePath.GitDir) + p := path.Join(storagePath.GitDir, repo.NonURLString()) gitRepo, err := git.PlainOpen(p) if err != nil { return nil, false, errors.Wrapf(err, "unable to open the git dir %s", p) diff --git a/gitcache/pkg/storagepath.go b/gitcache/pkg/storagepath.go index b8ab99b2..9c9fa0ca 100644 --- a/gitcache/pkg/storagepath.go +++ b/gitcache/pkg/storagepath.go @@ -15,7 +15,6 @@ package pkg import ( "fmt" - "io/ioutil" "os" "path" @@ -36,24 +35,20 @@ type StoragePath struct { } // NewStoragePath returns path for blob, archiving and also creates temp directories for archiving. -func NewStoragePath(repo RepoURL) (StoragePath, error) { - cwd, err := os.Getwd() - if err != nil { - return StoragePath{}, errors.Wrap(err, "unable to the current working dir") - } - +func NewStoragePath(repo RepoURL, tempDir string) (StoragePath, error) { bucketPath := fmt.Sprintf("gitcache/%s/%s/%s", repo.Host, repo.Owner, repo.Repo) - gitDir := repo.Host + repo.Owner + repo.Repo + gitDir := path.Join(tempDir, repo.NonURLString()) - err = os.Mkdir(gitDir, 0755) + err := os.Mkdir(gitDir, 0755) if err != nil { - return StoragePath{}, errors.Wrapf(err, "unable to create temp directory %s", gitDir) + return StoragePath{}, errors.Wrapf(err, "unable to temp directory %s", gitDir) } gitTarPath := path.Join(gitDir, "gitfolder.tar.gz") - blobArchiveDir, err := ioutil.TempDir(cwd, gitDir+"tar") + blobArchiveDir := gitDir + "tar" + err = os.Mkdir(blobArchiveDir, 0755) if err != nil { - return StoragePath{}, errors.Wrapf(err, "unable to create temp directory %s", gitDir+"tar") + return StoragePath{}, errors.Wrapf(err, "unable to create temp directory for blob archive %s", blobArchiveDir) } blobArchivePath := path.Join(blobArchiveDir, fmt.Sprintf("%s.tar.gz", repo.Repo))