From a018c297d9e725c2e8936ffe67a9d18d7ccf86e7 Mon Sep 17 00:00:00 2001 From: Brian McGee Date: Tue, 9 Jul 2024 13:36:26 +0100 Subject: [PATCH 1/4] fix: relative path resolution in filesystem walker Signed-off-by: Brian McGee --- walk/filesystem.go | 6 +-- walk/filesystem_test.go | 95 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 walk/filesystem_test.go diff --git a/walk/filesystem.go b/walk/filesystem.go index 18cc3df..c14fc45 100644 --- a/walk/filesystem.go +++ b/walk/filesystem.go @@ -20,11 +20,11 @@ func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error { relPathOffset := len(f.root) + 1 relPathFn := func(path string) (string, error) { - // quick optimisation for the majority of use cases - // todo check that root is a prefix in path? - if len(path) >= relPathOffset { + // quick optimization for the majority of use cases + if len(path) >= relPathOffset && path[:len(f.root)] == f.root { return path[relPathOffset:], nil } + // fallback to proper relative path resolution return filepath.Rel(f.root, path) } diff --git a/walk/filesystem_test.go b/walk/filesystem_test.go new file mode 100644 index 0000000..202f059 --- /dev/null +++ b/walk/filesystem_test.go @@ -0,0 +1,95 @@ +package walk + +import ( + "context" + "os" + "testing" + + "git.numtide.com/numtide/treefmt/test" + "github.com/stretchr/testify/require" +) + +var examplesPaths = []string{ + ".", + "elm", + "elm/elm.json", + "elm/src", + "elm/src/Main.elm", + "go", + "go/go.mod", + "go/main.go", + "haskell", + "haskell/CHANGELOG.md", + "haskell/Foo.hs", + "haskell/Main.hs", + "haskell/Nested", + "haskell/Nested/Foo.hs", + "haskell/Setup.hs", + "haskell/haskell.cabal", + "haskell/treefmt.toml", + "haskell-frontend", + "haskell-frontend/CHANGELOG.md", + "haskell-frontend/Main.hs", + "haskell-frontend/Setup.hs", + "haskell-frontend/haskell-frontend.cabal", + "html", + "html/index.html", + "html/scripts", + "html/scripts/.gitkeep", + "javascript", + "javascript/source", + "javascript/source/hello.js", + "nix", + "nix/sources.nix", + "nixpkgs.toml", + "python", + "python/main.py", + "python/requirements.txt", + "python/virtualenv_proxy.py", + "ruby", + "ruby/bundler.rb", + "rust", + "rust/Cargo.toml", + "rust/src", + "rust/src/main.rs", + "shell", + "shell/foo.sh", + "terraform", + "terraform/main.tf", + "terraform/two.tf", + "touch.toml", + "treefmt.toml", + "yaml", + "yaml/test.yaml", +} + +func TestFilesystemWalker_Walk(t *testing.T) { + tempDir := test.TempExamples(t) + + paths := make(chan string, 1) + go func() { + paths <- tempDir + close(paths) + }() + + as := require.New(t) + + walker, err := NewFilesystem(tempDir, paths) + as.NoError(err) + + idx := 0 + err = walker.Walk(context.Background(), func(file *File, err error) error { + as.Equal(examplesPaths[idx], file.RelPath) + idx += 1 + return nil + }) + as.NoError(err) + + // capture current cwd, so we can replace it after the test is finished + cwd, err := os.Getwd() + as.NoError(err) + t.Cleanup(func() { + // return to the previous working directory + as.NoError(os.Chdir(cwd)) + }) +} From ff8b1ed367717ed70e67309f1fe3c266b65b92fd Mon Sep 17 00:00:00 2001 From: Brian McGee Date: Tue, 9 Jul 2024 19:12:06 +0100 Subject: [PATCH 2/4] feat: refactor relative path function for filesystem walker Signed-off-by: Brian McGee --- walk/filesystem.go | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/walk/filesystem.go b/walk/filesystem.go index c14fc45..35aec7c 100644 --- a/walk/filesystem.go +++ b/walk/filesystem.go @@ -8,35 +8,35 @@ import ( ) type filesystemWalker struct { - root string - pathsCh chan string + root string + pathsCh chan string + relPathOffset int } func (f filesystemWalker) Root() string { return f.root } -func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error { - relPathOffset := len(f.root) + 1 - - relPathFn := func(path string) (string, error) { - // quick optimization for the majority of use cases - if len(path) >= relPathOffset && path[:len(f.root)] == f.root { - return path[relPathOffset:], nil - } - // fallback to proper relative path resolution - return filepath.Rel(f.root, path) +func (f filesystemWalker) relPath(path string) (string, error) { + // quick optimization for the majority of use cases + if len(path) >= f.relPathOffset && path[:len(f.root)] == f.root { + return path[f.relPathOffset:], nil } + // fallback to proper relative path resolution + return filepath.Rel(f.root, path) +} +func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error { walkFn := func(path string, info fs.FileInfo, _ error) error { if info == nil { return fmt.Errorf("no such file or directory '%s'", path) } - relPath, err := relPathFn(path) + relPath, err := f.relPath(path) if err != nil { return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) } + file := File{ Path: path, RelPath: relPath, @@ -55,5 +55,9 @@ func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error { } func NewFilesystem(root string, paths chan string) (Walker, error) { - return filesystemWalker{root, paths}, nil + return filesystemWalker{ + root: root, + pathsCh: paths, + relPathOffset: len(root) + 1, + }, nil } From 0a8ffe0c63c7f214ac93cd740992b23f9187c181 Mon Sep 17 00:00:00 2001 From: Brian McGee Date: Tue, 9 Jul 2024 19:21:09 +0100 Subject: [PATCH 3/4] feat: move filtering of directories and symlinks into walker implementations Signed-off-by: Brian McGee --- cache/cache.go | 5 ----- cli/format.go | 9 +++------ walk/filesystem.go | 6 ++++++ walk/filesystem_test.go | 19 ------------------- walk/git.go | 6 +++++- 5 files changed, 14 insertions(+), 31 deletions(-) diff --git a/cache/cache.go b/cache/cache.go index a6c743f..fe1c1ac 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -218,11 +218,6 @@ func ChangeSet(ctx context.Context, walker walk.Walker, filesCh chan<- *walk.Fil } } - // ignore symlinks - if file.Info.Mode()&os.ModeSymlink == os.ModeSymlink { - return nil - } - // open a new read tx if there isn't one in progress // we have to periodically open a new read tx to prevent writes from being blocked if tx == nil { diff --git a/cli/format.go b/cli/format.go index 7218a95..95ff37b 100644 --- a/cli/format.go +++ b/cli/format.go @@ -237,12 +237,9 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error { case <-ctx.Done(): return ctx.Err() default: - // ignore symlinks and directories - if !(file.Info.IsDir() || file.Info.Mode()&os.ModeSymlink == os.ModeSymlink) { - stats.Add(stats.Traversed, 1) - stats.Add(stats.Emitted, 1) - f.filesCh <- file - } + stats.Add(stats.Traversed, 1) + stats.Add(stats.Emitted, 1) + f.filesCh <- file return nil } }) diff --git a/walk/filesystem.go b/walk/filesystem.go index 35aec7c..729a497 100644 --- a/walk/filesystem.go +++ b/walk/filesystem.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io/fs" + "os" "path/filepath" ) @@ -32,6 +33,11 @@ func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error { return fmt.Errorf("no such file or directory '%s'", path) } + // ignore directories and symlinks + if info.IsDir() || info.Mode()&os.ModeSymlink == os.ModeSymlink { + return nil + } + relPath, err := f.relPath(path) if err != nil { return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) diff --git a/walk/filesystem_test.go b/walk/filesystem_test.go index 202f059..503ec0d 100644 --- a/walk/filesystem_test.go +++ b/walk/filesystem_test.go @@ -10,56 +10,37 @@ import ( ) var examplesPaths = []string{ - ".", - "elm", "elm/elm.json", - "elm/src", "elm/src/Main.elm", - "go", "go/go.mod", "go/main.go", - "haskell", "haskell/CHANGELOG.md", "haskell/Foo.hs", "haskell/Main.hs", - "haskell/Nested", "haskell/Nested/Foo.hs", "haskell/Setup.hs", "haskell/haskell.cabal", "haskell/treefmt.toml", - "haskell-frontend", "haskell-frontend/CHANGELOG.md", "haskell-frontend/Main.hs", "haskell-frontend/Setup.hs", "haskell-frontend/haskell-frontend.cabal", - "html", "html/index.html", - "html/scripts", "html/scripts/.gitkeep", - "javascript", - "javascript/source", "javascript/source/hello.js", - "nix", "nix/sources.nix", "nixpkgs.toml", - "python", "python/main.py", "python/requirements.txt", "python/virtualenv_proxy.py", - "ruby", "ruby/bundler.rb", - "rust", "rust/Cargo.toml", - "rust/src", "rust/src/main.rs", - "shell", "shell/foo.sh", - "terraform", "terraform/main.tf", "terraform/two.tf", "touch.toml", "treefmt.toml", - "yaml", "yaml/test.yaml", } diff --git a/walk/git.go b/walk/git.go index 58afa4e..cfe5f12 100644 --- a/walk/git.go +++ b/walk/git.go @@ -50,9 +50,13 @@ func (g *gitWalker) Walk(ctx context.Context, fn WalkFunc) error { case <-ctx.Done(): return ctx.Err() default: - path := filepath.Join(g.root, entry.Name) + // we only want regular files, not directories or symlinks + if !entry.Mode.IsRegular() { + continue + } // stat the file + path := filepath.Join(g.root, entry.Name) info, err := os.Lstat(path) file := File{ From 0953dd58f2dd1bf267dcdf58f2c496b859878c6d Mon Sep 17 00:00:00 2001 From: Brian McGee Date: Tue, 9 Jul 2024 19:45:38 +0100 Subject: [PATCH 4/4] feat: refactor relative path function for git walker Signed-off-by: Brian McGee --- walk/git.go | 51 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/walk/git.go b/walk/git.go index cfe5f12..4dc9196 100644 --- a/walk/git.go +++ b/walk/git.go @@ -13,26 +13,26 @@ import ( ) type gitWalker struct { - root string - paths chan string - repo *git.Repository + root string + paths chan string + repo *git.Repository + relPathOffset int } -func (g *gitWalker) Root() string { +func (g gitWalker) Root() string { return g.root } -func (g *gitWalker) Walk(ctx context.Context, fn WalkFunc) error { - // for quick relative paths - relPathOffset := len(g.root) + 1 - - relPathFn := func(path string) (relPath string) { - if len(path) >= relPathOffset { - relPath = path[relPathOffset:] - } - return +func (g gitWalker) relPath(path string) (string, error) { + // quick optimization for the majority of use cases + if len(path) >= g.relPathOffset && path[:len(g.root)] == g.root { + return path[g.relPathOffset:], nil } + // fallback to proper relative path resolution + return filepath.Rel(g.root, path) +} +func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error { idx, err := g.repo.Storer.Index() if err != nil { return fmt.Errorf("failed to open git index: %w", err) @@ -57,11 +57,21 @@ func (g *gitWalker) Walk(ctx context.Context, fn WalkFunc) error { // stat the file path := filepath.Join(g.root, entry.Name) + info, err := os.Lstat(path) + if err != nil { + return fmt.Errorf("failed to stat %s: %w", path, err) + } + + // determine a relative path + relPath, err := g.relPath(path) + if err != nil { + return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) + } file := File{ Path: path, - RelPath: relPathFn(path), + RelPath: relPath, Info: info, } @@ -97,9 +107,9 @@ func (g *gitWalker) Walk(ctx context.Context, fn WalkFunc) error { return nil } - relPath, err := filepath.Rel(g.root, path) + relPath, err := g.relPath(path) if err != nil { - return err + return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) } if _, ok := cache[relPath]; !ok { @@ -109,7 +119,7 @@ func (g *gitWalker) Walk(ctx context.Context, fn WalkFunc) error { file := File{ Path: path, - RelPath: relPathFn(path), + RelPath: relPath, Info: info, } @@ -125,5 +135,10 @@ func NewGit(root string, paths chan string) (Walker, error) { if err != nil { return nil, fmt.Errorf("failed to open git repo: %w", err) } - return &gitWalker{root, paths, repo}, nil + return &gitWalker{ + root: root, + paths: paths, + repo: repo, + relPathOffset: len(root) + 1, + }, nil }