diff --git a/cli/format.go b/cli/format.go index bd85ae3..0a23387 100644 --- a/cli/format.go +++ b/cli/format.go @@ -207,10 +207,28 @@ func updateCache(ctx context.Context) func() error { func walkFilesystem(ctx context.Context) func() error { return func() error { - paths := Cli.Paths + eg, ctx := errgroup.WithContext(ctx) + pathsCh := make(chan string, BatchSize) - // we read paths from stdin if the cli flag has been set and no paths were provided as cli args - if len(paths) == 0 && Cli.Stdin { + walkPaths := func() error { + defer close(pathsCh) + + var idx int + for idx < len(Cli.Paths) { + select { + case <-ctx.Done(): + return ctx.Err() + default: + pathsCh <- Cli.Paths[idx] + idx += 1 + } + } + + return nil + } + + walkStdin := func() error { + defer close(pathsCh) // determine the current working directory cwd, err := os.Getwd() @@ -220,20 +238,35 @@ func walkFilesystem(ctx context.Context) func() error { // read in all the paths scanner := bufio.NewScanner(os.Stdin) - for scanner.Scan() { - path := scanner.Text() - if !strings.HasPrefix(path, "/") { - // append the cwd - path = filepath.Join(cwd, path) - } - // append the fully qualified path to our paths list - paths = append(paths, path) + for scanner.Scan() { + select { + case <-ctx.Done(): + return ctx.Err() + default: + path := scanner.Text() + if !strings.HasPrefix(path, "/") { + // append the cwd + path = filepath.Join(cwd, path) + } + pathsCh <- path + } } + return nil + } + + if len(Cli.Paths) > 0 { + eg.Go(walkPaths) + } else if Cli.Stdin { + eg.Go(walkStdin) + } else { + // no explicit paths to process, so we only need to process root + pathsCh <- Cli.TreeRoot + close(pathsCh) } // create a filesystem walker - walker, err := walk.New(Cli.Walk, Cli.TreeRoot, paths) + walker, err := walk.New(Cli.Walk, Cli.TreeRoot, pathsCh) if err != nil { return fmt.Errorf("failed to create walker: %w", err) } diff --git a/cli/format_test.go b/cli/format_test.go index cd3d990..7a4b8e8 100644 --- a/cli/format_test.go +++ b/cli/format_test.go @@ -488,7 +488,7 @@ func TestPathsArg(t *testing.T) { // specify some explicit paths _, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Foo.hs") as.NoError(err) - assertStats(t, as, 4, 4, 4, 0) + assertStats(t, as, 2, 2, 2, 0) // specify a bad path _, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Bar.hs") @@ -548,7 +548,7 @@ go/main.go _, err = cmd(t, "-C", tempDir, "--stdin") as.NoError(err) - assertStats(t, as, 6, 6, 6, 0) + assertStats(t, as, 3, 3, 3, 0) } func TestDeterministicOrderingInPipeline(t *testing.T) { diff --git a/walk/filesystem.go b/walk/filesystem.go index 36e9166..c98390f 100644 --- a/walk/filesystem.go +++ b/walk/filesystem.go @@ -3,13 +3,12 @@ package walk import ( "context" "io/fs" - "os" "path/filepath" ) type filesystemWalker struct { - root string - paths []string + root string + pathsCh chan string } func (f filesystemWalker) Root() string { @@ -35,23 +34,8 @@ func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error { return fn(&file, err) } - if len(f.paths) == 0 { - return filepath.Walk(f.root, walkFn) - } - - for _, path := range f.paths { - info, err := os.Stat(path) - if err = filepath.Walk(path, walkFn); err != nil { - return err - } - - file := File{ - Path: path, - RelPath: relPathFn(path), - Info: info, - } - - if err = fn(&file, err); err != nil { + for path := range f.pathsCh { + if err := filepath.Walk(path, walkFn); err != nil { return err } } @@ -59,6 +43,6 @@ func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error { return nil } -func NewFilesystem(root string, paths []string) (Walker, error) { +func NewFilesystem(root string, paths chan string) (Walker, error) { return filesystemWalker{root, paths}, nil } diff --git a/walk/git.go b/walk/git.go index 32a740e..f5e60a9 100644 --- a/walk/git.go +++ b/walk/git.go @@ -2,21 +2,19 @@ package walk import ( "context" - "errors" "fmt" "io/fs" "os" "path/filepath" "github.com/charmbracelet/log" - "github.com/go-git/go-git/v5/plumbing/format/index" "github.com/go-git/go-git/v5" ) type gitWalker struct { root string - paths []string + paths chan string repo *git.Repository } @@ -40,66 +38,85 @@ func (g *gitWalker) Walk(ctx context.Context, fn WalkFunc) error { return fmt.Errorf("failed to open git index: %w", err) } - if len(g.paths) > 0 { - for _, path := range g.paths { + // cache in-memory whether a path is present in the git index + var cache map[string]bool - err = filepath.Walk(path, func(path string, info fs.FileInfo, err error) error { - if info.IsDir() { - return nil + for path := range g.paths { + + if path == g.root { + // we can just iterate the index entries + for _, entry := range idx.Entries { + select { + case <-ctx.Done(): + return ctx.Err() + default: + path := filepath.Join(g.root, entry.Name) + + // stat the file + info, err := os.Lstat(path) + + file := File{ + Path: path, + RelPath: relPathFn(path), + Info: info, + } + + if err = fn(&file, err); err != nil { + return err + } } + } + continue + } - relPath, err := filepath.Rel(g.root, path) - if err != nil { - return err - } + // otherwise we ensure the git index entries are cached and then check if they are in the git index + if cache == nil { + cache = make(map[string]bool) + for _, entry := range idx.Entries { + cache[entry.Name] = true + } + } - if _, err = idx.Entry(relPath); errors.Is(err, index.ErrEntryNotFound) { - // we skip this path as it's not staged - log.Debugf("Path not found in git index, skipping: %v, %v", relPath, path) - return nil - } + relPath, err := filepath.Rel(g.root, path) + if err != nil { + return fmt.Errorf("failed to find relative path for %v: %w", path, err) + } - file := File{ - Path: path, - RelPath: relPathFn(path), - Info: info, - } + _, ok := cache[relPath] + if !(path == g.root || ok) { + log.Debugf("path %v not found in git index, skipping", path) + continue + } - return fn(&file, err) - }) + return filepath.Walk(path, func(path string, info fs.FileInfo, err error) error { + if info.IsDir() { + return nil + } + + relPath, err := filepath.Rel(g.root, path) if err != nil { return err } - } - } else { - for _, entry := range idx.Entries { - select { - case <-ctx.Done(): - return ctx.Err() - default: - path := filepath.Join(g.root, entry.Name) - - // stat the file - info, err := os.Lstat(path) - - file := File{ - Path: path, - RelPath: relPathFn(path), - Info: info, - } - - if err = fn(&file, err); err != nil { - return err - } + if _, ok := cache[relPath]; !ok { + log.Debugf("path %v not found in git index, skipping", path) + return nil } - } + + file := File{ + Path: path, + RelPath: relPathFn(path), + Info: info, + } + + return fn(&file, err) + }) } return nil } -func NewGit(root string, paths []string) (Walker, error) { +func NewGit(root string, paths chan string) (Walker, error) { repo, err := git.PlainOpen(root) if err != nil { return nil, fmt.Errorf("failed to open git repo: %w", err) diff --git a/walk/walker.go b/walk/walker.go index 0309094..6ff1091 100644 --- a/walk/walker.go +++ b/walk/walker.go @@ -31,24 +31,24 @@ type Walker interface { Walk(ctx context.Context, fn WalkFunc) error } -func New(walkerType Type, root string, paths []string) (Walker, error) { +func New(walkerType Type, root string, pathsCh chan string) (Walker, error) { switch walkerType { case Git: - return NewGit(root, paths) + return NewGit(root, pathsCh) case Auto: - return Detect(root, paths) + return Detect(root, pathsCh) case Filesystem: - return NewFilesystem(root, paths) + return NewFilesystem(root, pathsCh) default: return nil, fmt.Errorf("unknown walker type: %v", walkerType) } } -func Detect(root string, paths []string) (Walker, error) { +func Detect(root string, pathsCh chan string) (Walker, error) { // for now, we keep it simple and try git first, filesystem second - w, err := NewGit(root, paths) + w, err := NewGit(root, pathsCh) if err == nil { return w, err } - return NewFilesystem(root, paths) + return NewFilesystem(root, pathsCh) }