Merge pull request #1 from numtide/feat/improve-stdin-and-path-processing

Streaming processing of paths passed in from stdin
This commit is contained in:
Brian McGee 2024-05-08 08:35:47 +01:00 committed by GitHub
commit 9328b466cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 123 additions and 89 deletions

View File

@ -207,10 +207,28 @@ func updateCache(ctx context.Context) func() error {
func walkFilesystem(ctx context.Context) func() error {
return func() error {
paths := Cli.Paths
eg, ctx := errgroup.WithContext(ctx)
pathsCh := make(chan string, BatchSize)
// we read paths from stdin if the cli flag has been set and no paths were provided as cli args
if len(paths) == 0 && Cli.Stdin {
walkPaths := func() error {
defer close(pathsCh)
var idx int
for idx < len(Cli.Paths) {
select {
case <-ctx.Done():
return ctx.Err()
default:
pathsCh <- Cli.Paths[idx]
idx += 1
}
}
return nil
}
walkStdin := func() error {
defer close(pathsCh)
// determine the current working directory
cwd, err := os.Getwd()
@ -220,20 +238,35 @@ func walkFilesystem(ctx context.Context) func() error {
// read in all the paths
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
path := scanner.Text()
if !strings.HasPrefix(path, "/") {
// append the cwd
path = filepath.Join(cwd, path)
}
// append the fully qualified path to our paths list
paths = append(paths, path)
for scanner.Scan() {
select {
case <-ctx.Done():
return ctx.Err()
default:
path := scanner.Text()
if !strings.HasPrefix(path, "/") {
// append the cwd
path = filepath.Join(cwd, path)
}
pathsCh <- path
}
}
return nil
}
if len(Cli.Paths) > 0 {
eg.Go(walkPaths)
} else if Cli.Stdin {
eg.Go(walkStdin)
} else {
// no explicit paths to process, so we only need to process root
pathsCh <- Cli.TreeRoot
close(pathsCh)
}
// create a filesystem walker
walker, err := walk.New(Cli.Walk, Cli.TreeRoot, paths)
walker, err := walk.New(Cli.Walk, Cli.TreeRoot, pathsCh)
if err != nil {
return fmt.Errorf("failed to create walker: %w", err)
}

View File

@ -488,7 +488,7 @@ func TestPathsArg(t *testing.T) {
// specify some explicit paths
_, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Foo.hs")
as.NoError(err)
assertStats(t, as, 4, 4, 4, 0)
assertStats(t, as, 2, 2, 2, 0)
// specify a bad path
_, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Bar.hs")
@ -548,7 +548,7 @@ go/main.go
_, err = cmd(t, "-C", tempDir, "--stdin")
as.NoError(err)
assertStats(t, as, 6, 6, 6, 0)
assertStats(t, as, 3, 3, 3, 0)
}
func TestDeterministicOrderingInPipeline(t *testing.T) {

View File

@ -3,13 +3,12 @@ package walk
import (
"context"
"io/fs"
"os"
"path/filepath"
)
type filesystemWalker struct {
root string
paths []string
root string
pathsCh chan string
}
func (f filesystemWalker) Root() string {
@ -35,23 +34,8 @@ func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error {
return fn(&file, err)
}
if len(f.paths) == 0 {
return filepath.Walk(f.root, walkFn)
}
for _, path := range f.paths {
info, err := os.Stat(path)
if err = filepath.Walk(path, walkFn); err != nil {
return err
}
file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}
if err = fn(&file, err); err != nil {
for path := range f.pathsCh {
if err := filepath.Walk(path, walkFn); err != nil {
return err
}
}
@ -59,6 +43,6 @@ func (f filesystemWalker) Walk(_ context.Context, fn WalkFunc) error {
return nil
}
func NewFilesystem(root string, paths []string) (Walker, error) {
func NewFilesystem(root string, paths chan string) (Walker, error) {
return filesystemWalker{root, paths}, nil
}

View File

@ -2,21 +2,19 @@ package walk
import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"github.com/charmbracelet/log"
"github.com/go-git/go-git/v5/plumbing/format/index"
"github.com/go-git/go-git/v5"
)
type gitWalker struct {
root string
paths []string
paths chan string
repo *git.Repository
}
@ -40,66 +38,85 @@ func (g *gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
return fmt.Errorf("failed to open git index: %w", err)
}
if len(g.paths) > 0 {
for _, path := range g.paths {
// cache in-memory whether a path is present in the git index
var cache map[string]bool
err = filepath.Walk(path, func(path string, info fs.FileInfo, err error) error {
if info.IsDir() {
return nil
for path := range g.paths {
if path == g.root {
// we can just iterate the index entries
for _, entry := range idx.Entries {
select {
case <-ctx.Done():
return ctx.Err()
default:
path := filepath.Join(g.root, entry.Name)
// stat the file
info, err := os.Lstat(path)
file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}
if err = fn(&file, err); err != nil {
return err
}
}
}
continue
}
relPath, err := filepath.Rel(g.root, path)
if err != nil {
return err
}
// otherwise we ensure the git index entries are cached and then check if they are in the git index
if cache == nil {
cache = make(map[string]bool)
for _, entry := range idx.Entries {
cache[entry.Name] = true
}
}
if _, err = idx.Entry(relPath); errors.Is(err, index.ErrEntryNotFound) {
// we skip this path as it's not staged
log.Debugf("Path not found in git index, skipping: %v, %v", relPath, path)
return nil
}
relPath, err := filepath.Rel(g.root, path)
if err != nil {
return fmt.Errorf("failed to find relative path for %v: %w", path, err)
}
file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}
_, ok := cache[relPath]
if !(path == g.root || ok) {
log.Debugf("path %v not found in git index, skipping", path)
continue
}
return fn(&file, err)
})
return filepath.Walk(path, func(path string, info fs.FileInfo, err error) error {
if info.IsDir() {
return nil
}
relPath, err := filepath.Rel(g.root, path)
if err != nil {
return err
}
}
} else {
for _, entry := range idx.Entries {
select {
case <-ctx.Done():
return ctx.Err()
default:
path := filepath.Join(g.root, entry.Name)
// stat the file
info, err := os.Lstat(path)
file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}
if err = fn(&file, err); err != nil {
return err
}
if _, ok := cache[relPath]; !ok {
log.Debugf("path %v not found in git index, skipping", path)
return nil
}
}
file := File{
Path: path,
RelPath: relPathFn(path),
Info: info,
}
return fn(&file, err)
})
}
return nil
}
func NewGit(root string, paths []string) (Walker, error) {
func NewGit(root string, paths chan string) (Walker, error) {
repo, err := git.PlainOpen(root)
if err != nil {
return nil, fmt.Errorf("failed to open git repo: %w", err)

View File

@ -31,24 +31,24 @@ type Walker interface {
Walk(ctx context.Context, fn WalkFunc) error
}
func New(walkerType Type, root string, paths []string) (Walker, error) {
func New(walkerType Type, root string, pathsCh chan string) (Walker, error) {
switch walkerType {
case Git:
return NewGit(root, paths)
return NewGit(root, pathsCh)
case Auto:
return Detect(root, paths)
return Detect(root, pathsCh)
case Filesystem:
return NewFilesystem(root, paths)
return NewFilesystem(root, pathsCh)
default:
return nil, fmt.Errorf("unknown walker type: %v", walkerType)
}
}
func Detect(root string, paths []string) (Walker, error) {
func Detect(root string, pathsCh chan string) (Walker, error) {
// for now, we keep it simple and try git first, filesystem second
w, err := NewGit(root, paths)
w, err := NewGit(root, pathsCh)
if err == nil {
return w, err
}
return NewFilesystem(root, paths)
return NewFilesystem(root, pathsCh)
}