mirror of
https://github.com/numtide/treefmt.git
synced 2024-10-05 21:17:37 +03:00
Merge pull request #301 from numtide/feat/better-concurrency-model
feat: simplify pipeline model
This commit is contained in:
commit
06f15f4237
152
cli/format.go
152
cli/format.go
@ -30,11 +30,11 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
globalExcludes []glob.Glob
|
||||
formatters map[string]*format.Formatter
|
||||
pipelines map[string]*format.Pipeline
|
||||
filesCh chan *walk.File
|
||||
processedCh chan *walk.File
|
||||
excludes []glob.Glob
|
||||
formatters map[string]*format.Formatter
|
||||
|
||||
filesCh chan *walk.File
|
||||
processedCh chan *walk.File
|
||||
|
||||
ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled")
|
||||
)
|
||||
@ -73,21 +73,18 @@ func (f *Format) Run() (err error) {
|
||||
}
|
||||
|
||||
// compile global exclude globs
|
||||
if globalExcludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
|
||||
if excludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
|
||||
return fmt.Errorf("failed to compile global excludes: %w", err)
|
||||
}
|
||||
|
||||
// initialise pipelines
|
||||
pipelines = make(map[string]*format.Pipeline)
|
||||
// initialise formatters
|
||||
formatters = make(map[string]*format.Formatter)
|
||||
|
||||
// iterate the formatters in lexicographical order
|
||||
for _, name := range cfg.Names {
|
||||
// init formatter
|
||||
formatterCfg := cfg.Formatters[name]
|
||||
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes)
|
||||
for name, formatterCfg := range cfg.Formatters {
|
||||
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, excludes)
|
||||
|
||||
if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
|
||||
log.Debugf("formatter not found: %v", name)
|
||||
log.Debugf("formatter command not found: %v", name)
|
||||
continue
|
||||
} else if err != nil {
|
||||
return fmt.Errorf("%w: failed to initialise formatter: %v", err, name)
|
||||
@ -95,23 +92,6 @@ func (f *Format) Run() (err error) {
|
||||
|
||||
// store formatter by name
|
||||
formatters[name] = formatter
|
||||
|
||||
// If no pipeline is configured, we add the formatter to a nominal pipeline of size 1 with the key being the
|
||||
// formatter's name. If a pipeline is configured, we add the formatter to a pipeline keyed by
|
||||
// 'p:<pipeline_name>' in which it is sorted by priority.
|
||||
if formatterCfg.Pipeline == "" {
|
||||
pipeline := format.Pipeline{}
|
||||
pipeline.Add(formatter)
|
||||
pipelines[name] = &pipeline
|
||||
} else {
|
||||
key := fmt.Sprintf("p:%s", formatterCfg.Pipeline)
|
||||
pipeline, ok := pipelines[key]
|
||||
if !ok {
|
||||
pipeline = &format.Pipeline{}
|
||||
pipelines[key] = pipeline
|
||||
}
|
||||
pipeline.Add(formatter)
|
||||
}
|
||||
}
|
||||
|
||||
// open the cache
|
||||
@ -304,37 +284,43 @@ func walkFilesystem(ctx context.Context) func() error {
|
||||
func applyFormatters(ctx context.Context) func() error {
|
||||
// create our own errgroup for concurrent formatting tasks
|
||||
fg, ctx := errgroup.WithContext(ctx)
|
||||
// simple optimization to avoid too many concurrent formatting tasks
|
||||
// we can queue them up faster than the formatters can process them, this paces things a bit
|
||||
fg.SetLimit(runtime.NumCPU())
|
||||
|
||||
// pre-initialise batches keyed by pipeline
|
||||
batches := make(map[string][]*walk.File)
|
||||
for key := range pipelines {
|
||||
batches[key] = make([]*walk.File, 0, BatchSize)
|
||||
}
|
||||
// track batches of formatting task based on their batch keys, which are determined by the unique sequence of
|
||||
// formatters which should be applied to their respective files
|
||||
batches := make(map[string][]*format.Task)
|
||||
|
||||
// for a given pipeline key, add the provided file to the current batch and trigger a format if the batch size has
|
||||
// been reached
|
||||
tryApply := func(key string, file *walk.File) {
|
||||
// append to batch
|
||||
batches[key] = append(batches[key], file)
|
||||
|
||||
// check if the batch is full
|
||||
apply := func(key string, flush bool) {
|
||||
// lookup the batch and exit early if it's empty
|
||||
batch := batches[key]
|
||||
if len(batch) == BatchSize {
|
||||
// get the pipeline
|
||||
pipeline := pipelines[key]
|
||||
if len(batch) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// copy the batch
|
||||
files := make([]*walk.File, len(batch))
|
||||
copy(files, batch)
|
||||
// process the batch if it's full, or we've been asked to flush partial batches
|
||||
if flush || len(batch) == BatchSize {
|
||||
|
||||
// apply to the pipeline
|
||||
// copy the batch as we re-use it for the next batch
|
||||
tasks := make([]*format.Task, len(batch))
|
||||
copy(tasks, batch)
|
||||
|
||||
// asynchronously apply the sequence formatters to the batch
|
||||
fg.Go(func() error {
|
||||
if err := pipeline.Apply(ctx, files); err != nil {
|
||||
return err
|
||||
// iterate the formatters, applying them in sequence to the batch of tasks
|
||||
// we get the formatters list from the first task since they have all the same formatters list
|
||||
for _, f := range tasks[0].Formatters {
|
||||
if err := f.Apply(ctx, tasks); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, path := range files {
|
||||
processedCh <- path
|
||||
|
||||
// pass each file to the processed channel
|
||||
for _, task := range tasks {
|
||||
processedCh <- task.File
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
@ -343,25 +329,12 @@ func applyFormatters(ctx context.Context) func() error {
|
||||
}
|
||||
}
|
||||
|
||||
// format any partial batches
|
||||
flushBatches := func() {
|
||||
for key, pipeline := range pipelines {
|
||||
|
||||
batch := batches[key]
|
||||
pipeline := pipeline // capture for closure
|
||||
|
||||
if len(batch) > 0 {
|
||||
fg.Go(func() error {
|
||||
if err := pipeline.Apply(ctx, batch); err != nil {
|
||||
return fmt.Errorf("%s failure: %w", key, err)
|
||||
}
|
||||
for _, path := range batch {
|
||||
processedCh <- path
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
tryApply := func(task *format.Task) {
|
||||
// append to batch
|
||||
key := task.BatchKey
|
||||
batches[key] = append(batches[key], task)
|
||||
// try to apply
|
||||
apply(key, false)
|
||||
}
|
||||
|
||||
return func() error {
|
||||
@ -370,35 +343,38 @@ func applyFormatters(ctx context.Context) func() error {
|
||||
close(processedCh)
|
||||
}()
|
||||
|
||||
// iterate the files channel, checking if any pipeline wants it, and attempting to apply if so.
|
||||
// iterate the files channel
|
||||
for file := range filesCh {
|
||||
var matches []string
|
||||
|
||||
for key, pipeline := range pipelines {
|
||||
if !pipeline.Wants(file) {
|
||||
continue
|
||||
// determine a list of formatters that are interested in file
|
||||
var matches []*format.Formatter
|
||||
for _, formatter := range formatters {
|
||||
if formatter.Wants(file) {
|
||||
matches = append(matches, formatter)
|
||||
}
|
||||
matches = append(matches, key)
|
||||
tryApply(key, file)
|
||||
}
|
||||
switch len(matches) {
|
||||
case 0:
|
||||
log.Debugf("no match found: %s", file.Path)
|
||||
|
||||
if len(matches) == 0 {
|
||||
// no match, so we send it direct to the processed channel
|
||||
log.Debugf("no match found: %s", file.Path)
|
||||
processedCh <- file
|
||||
case 1:
|
||||
} else {
|
||||
// record the match
|
||||
stats.Add(stats.Matched, 1)
|
||||
default:
|
||||
return fmt.Errorf("path '%s' matched multiple formatters/pipelines %v", file.Path, matches)
|
||||
// create a new format task, add it to a batch based on its batch key and try to apply if the batch is full
|
||||
task := format.NewTask(file, matches)
|
||||
tryApply(&task)
|
||||
}
|
||||
}
|
||||
|
||||
// flush any partial batches which remain
|
||||
flushBatches()
|
||||
for key := range batches {
|
||||
apply(key, true)
|
||||
}
|
||||
|
||||
// wait for all outstanding formatting tasks to complete
|
||||
if err := fg.Wait(); err != nil {
|
||||
return fmt.Errorf("pipeline processing failure: %w", err)
|
||||
return fmt.Errorf("formatting failure: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -196,37 +196,6 @@ func TestIncludesAndExcludes(t *testing.T) {
|
||||
assertStats(t, as, 31, 31, 2, 0)
|
||||
}
|
||||
|
||||
func TestMatchingMultiplePipelines(t *testing.T) {
|
||||
as := require.New(t)
|
||||
|
||||
tempDir := test.TempExamples(t)
|
||||
configPath := tempDir + "/multiple.toml"
|
||||
|
||||
cfg := config2.Config{
|
||||
Formatters: map[string]*config2.Formatter{
|
||||
"echo": {
|
||||
Command: "echo",
|
||||
Includes: []string{"*"},
|
||||
},
|
||||
"touch": {
|
||||
Command: "touch",
|
||||
Includes: []string{"*"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
test.WriteConfig(t, configPath, cfg)
|
||||
_, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
|
||||
as.ErrorContains(err, "matched multiple formatters/pipelines")
|
||||
as.ErrorContains(err, "echo")
|
||||
as.ErrorContains(err, "touch")
|
||||
|
||||
// run with only one formatter
|
||||
test.WriteConfig(t, configPath, cfg)
|
||||
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "-f", "echo")
|
||||
as.NoError(err)
|
||||
}
|
||||
|
||||
func TestCache(t *testing.T) {
|
||||
as := require.New(t)
|
||||
|
||||
@ -604,25 +573,23 @@ func TestDeterministicOrderingInPipeline(t *testing.T) {
|
||||
|
||||
test.WriteConfig(t, configPath, config2.Config{
|
||||
Formatters: map[string]*config2.Formatter{
|
||||
// a and b should execute in lexicographical order as they have default priority 0, with c last since it has
|
||||
// priority 1
|
||||
// a and b have no priority set, which means they default to 0 and should execute first
|
||||
// a and b should execute in lexicographical order
|
||||
// c should execute first since it has a priority of 1
|
||||
"fmt-a": {
|
||||
Command: "test-fmt",
|
||||
Options: []string{"fmt-a"},
|
||||
Includes: []string{"*.py"},
|
||||
Pipeline: "foo",
|
||||
},
|
||||
"fmt-b": {
|
||||
Command: "test-fmt",
|
||||
Options: []string{"fmt-b"},
|
||||
Includes: []string{"*.py"},
|
||||
Pipeline: "foo",
|
||||
},
|
||||
"fmt-c": {
|
||||
Command: "test-fmt",
|
||||
Options: []string{"fmt-c"},
|
||||
Includes: []string{"*.py"},
|
||||
Pipeline: "foo",
|
||||
Priority: 1,
|
||||
},
|
||||
},
|
||||
|
@ -2,7 +2,6 @@ package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
)
|
||||
@ -13,7 +12,6 @@ type Config struct {
|
||||
// Excludes is an optional list of glob patterns used to exclude certain files from all formatters.
|
||||
Excludes []string
|
||||
}
|
||||
Names []string `toml:"-"`
|
||||
Formatters map[string]*Formatter `toml:"formatter"`
|
||||
}
|
||||
|
||||
@ -40,12 +38,5 @@ func ReadFile(path string, names []string) (cfg *Config, err error) {
|
||||
cfg.Formatters = filtered
|
||||
}
|
||||
|
||||
// sort the formatter names so that, as we construct pipelines, we add formatters in a determinstic fashion. This
|
||||
// ensures a deterministic order even when all priority values are the same e.g. 0
|
||||
for name := range cfg.Formatters {
|
||||
cfg.Names = append(cfg.Names, name)
|
||||
}
|
||||
sort.Strings(cfg.Names)
|
||||
|
||||
return
|
||||
}
|
||||
|
@ -59,7 +59,6 @@ func TestReadConfigFile(t *testing.T) {
|
||||
as.Nil(alejandra.Options)
|
||||
as.Equal([]string{"*.nix"}, alejandra.Includes)
|
||||
as.Equal([]string{"examples/nix/sources.nix"}, alejandra.Excludes)
|
||||
as.Equal("nix", alejandra.Pipeline)
|
||||
as.Equal(1, alejandra.Priority)
|
||||
|
||||
// deadnix
|
||||
@ -69,7 +68,6 @@ func TestReadConfigFile(t *testing.T) {
|
||||
as.Nil(deadnix.Options)
|
||||
as.Equal([]string{"*.nix"}, deadnix.Includes)
|
||||
as.Nil(deadnix.Excludes)
|
||||
as.Equal("nix", deadnix.Pipeline)
|
||||
as.Equal(2, deadnix.Priority)
|
||||
|
||||
// ruby
|
||||
|
@ -9,8 +9,6 @@ type Formatter struct {
|
||||
Includes []string
|
||||
// Excludes is an optional list of glob patterns used to exclude certain files from this Formatter.
|
||||
Excludes []string
|
||||
// Indicates this formatter should be executed as part of a group of formatters all sharing the same pipeline key.
|
||||
Pipeline string
|
||||
// Indicates the order of precedence when executing as part of a pipeline.
|
||||
// Indicates the order of precedence when executing this Formatter in a sequence of Formatters.
|
||||
Priority int
|
||||
}
|
||||
|
@ -23,19 +23,19 @@ includes = ["*.go"]
|
||||
command = "black"
|
||||
includes = ["*.py"]
|
||||
|
||||
# use the priority field to control the order of execution
|
||||
|
||||
# run shellcheck first
|
||||
[formatter.shellcheck]
|
||||
command = "shellcheck"
|
||||
includes = ["*.sh"]
|
||||
pipeline = "sh"
|
||||
priority = 0
|
||||
priority = 0 # default is 0, but we set it here for clarity
|
||||
|
||||
# shfmt second
|
||||
[formatter.shfmt]
|
||||
command = "shfmt"
|
||||
options = ["-s", "-w"]
|
||||
includes = ["*.sh"]
|
||||
pipeline = "sh"
|
||||
priority = 1
|
||||
```
|
||||
|
||||
@ -49,13 +49,21 @@ priority = 1
|
||||
- `options` - an optional list of args to be passed to `command`.
|
||||
- `includes` - a list of glob patterns used to determine whether the formatter should be applied against a given path.
|
||||
- `excludes` - an optional list of glob patterns used to exclude certain files from this formatter.
|
||||
- `pipeline` - an optional key used to group related formatters together, ensuring they are executed sequentially
|
||||
against a given path.
|
||||
- `priority` - indicates the order of execution when this formatter is operating as part of a pipeline. Greater
|
||||
precedence is given to lower numbers, with the default being `0`.
|
||||
- `priority` - influences the order of execution. Greater precedence is given to lower numbers, with the default being `0`.
|
||||
|
||||
> When two or more formatters in a pipeline have the same priority, they are executed in lexicographical order to
|
||||
> ensure deterministic behaviour over multiple executions.
|
||||
## Same file, multiple formatters?
|
||||
|
||||
For each file, `treefmt` determines a list of formatters based on the configured `includes` / `excludes` rules. This list is
|
||||
then sorted, first by priority (lower the value, higher the precedence) and secondly by formatter name (lexicographically).
|
||||
|
||||
The resultant sequence of formatters is used to create a batch key, and similarly matched files get added to that batch
|
||||
until it is full, at which point the files are passed to each formatter in turn.
|
||||
|
||||
This means that `treefmt` **guarantees only one formatter will be operating on a given file at any point in time**.
|
||||
Another consequence is that formatting is deterministic for a given file and a given `treefmt` configuration.
|
||||
|
||||
By setting the priority fields appropriately, you can control the order in which those formatters are applied for any
|
||||
files they _both happen to match on_.
|
||||
|
||||
## Supported Formatters
|
||||
|
||||
|
@ -40,43 +40,28 @@ func (f *Formatter) Executable() string {
|
||||
return f.executable
|
||||
}
|
||||
|
||||
func (f *Formatter) Apply(ctx context.Context, files []*walk.File, filter bool) error {
|
||||
func (f *Formatter) Name() string {
|
||||
return f.name
|
||||
}
|
||||
|
||||
func (f *Formatter) Priority() int {
|
||||
return f.config.Priority
|
||||
}
|
||||
|
||||
func (f *Formatter) Apply(ctx context.Context, tasks []*Task) error {
|
||||
start := time.Now()
|
||||
|
||||
// construct args, starting with config
|
||||
args := f.config.Options
|
||||
|
||||
// If filter is true it indicates we are executing as part of a pipeline.
|
||||
// In such a scenario each formatter must sub filter the paths provided as different formatters might want different
|
||||
// files in a pipeline.
|
||||
if filter {
|
||||
// reset the batch
|
||||
f.batch = f.batch[:0]
|
||||
// exit early if nothing to process
|
||||
if len(tasks) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// filter paths
|
||||
for _, file := range files {
|
||||
if f.Wants(file) {
|
||||
f.batch = append(f.batch, file.RelPath)
|
||||
}
|
||||
}
|
||||
|
||||
// exit early if nothing to process
|
||||
if len(f.batch) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// append paths to the args
|
||||
args = append(args, f.batch...)
|
||||
} else {
|
||||
// exit early if nothing to process
|
||||
if len(files) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// append paths to the args
|
||||
for _, file := range files {
|
||||
args = append(args, file.RelPath)
|
||||
}
|
||||
// append paths to the args
|
||||
for _, task := range tasks {
|
||||
args = append(args, task.File.RelPath)
|
||||
}
|
||||
|
||||
// execute the command
|
||||
@ -95,7 +80,7 @@ func (f *Formatter) Apply(ctx context.Context, files []*walk.File, filter bool)
|
||||
|
||||
//
|
||||
|
||||
f.log.Infof("%v files processed in %v", len(files), time.Now().Sub(start))
|
||||
f.log.Infof("%v files processed in %v", len(tasks), time.Now().Sub(start))
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -136,10 +121,10 @@ func NewFormatter(
|
||||
f.executable = executable
|
||||
|
||||
// initialise internal state
|
||||
if cfg.Pipeline == "" {
|
||||
f.log = log.WithPrefix(fmt.Sprintf("format | %s", name))
|
||||
if cfg.Priority > 0 {
|
||||
f.log = log.WithPrefix(fmt.Sprintf("format | %s[%d]", name, cfg.Priority))
|
||||
} else {
|
||||
f.log = log.WithPrefix(fmt.Sprintf("format | %s[%s]", cfg.Pipeline, name))
|
||||
f.log = log.WithPrefix(fmt.Sprintf("format | %s", name))
|
||||
}
|
||||
|
||||
f.includes, err = CompileGlobs(cfg.Includes)
|
||||
|
@ -1,40 +0,0 @@
|
||||
package format
|
||||
|
||||
import (
|
||||
"context"
|
||||
"slices"
|
||||
|
||||
"git.numtide.com/numtide/treefmt/walk"
|
||||
)
|
||||
|
||||
type Pipeline struct {
|
||||
sequence []*Formatter
|
||||
}
|
||||
|
||||
func (p *Pipeline) Add(f *Formatter) {
|
||||
p.sequence = append(p.sequence, f)
|
||||
// sort by priority in ascending order
|
||||
slices.SortFunc(p.sequence, func(a, b *Formatter) int {
|
||||
return a.config.Priority - b.config.Priority
|
||||
})
|
||||
}
|
||||
|
||||
func (p *Pipeline) Wants(path *walk.File) bool {
|
||||
var match bool
|
||||
for _, f := range p.sequence {
|
||||
match = f.Wants(path)
|
||||
if match {
|
||||
break
|
||||
}
|
||||
}
|
||||
return match
|
||||
}
|
||||
|
||||
func (p *Pipeline) Apply(ctx context.Context, paths []*walk.File) error {
|
||||
for _, f := range p.sequence {
|
||||
if err := f.Apply(ctx, paths, len(p.sequence) > 1); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
42
format/task.go
Normal file
42
format/task.go
Normal file
@ -0,0 +1,42 @@
|
||||
package format
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"slices"
|
||||
|
||||
"git.numtide.com/numtide/treefmt/walk"
|
||||
)
|
||||
|
||||
type Task struct {
|
||||
File *walk.File
|
||||
Formatters []*Formatter
|
||||
BatchKey string
|
||||
}
|
||||
|
||||
func NewTask(file *walk.File, formatters []*Formatter) Task {
|
||||
// sort by priority in ascending order
|
||||
slices.SortFunc(formatters, func(a, b *Formatter) int {
|
||||
priorityA := a.Priority()
|
||||
priorityB := b.Priority()
|
||||
|
||||
result := priorityA - priorityB
|
||||
if result == 0 {
|
||||
// formatters with the same priority are sorted lexicographically to ensure a deterministic outcome
|
||||
result = cmp.Compare(a.Name(), b.Name())
|
||||
}
|
||||
return result
|
||||
})
|
||||
|
||||
// construct a batch key which represents the unique sequence of formatters to be applied to file
|
||||
var key string
|
||||
for _, f := range formatters {
|
||||
key += f.name + ":"
|
||||
}
|
||||
key = key[:len(key)-1]
|
||||
|
||||
return Task{
|
||||
File: file,
|
||||
Formatters: formatters,
|
||||
BatchKey: key,
|
||||
}
|
||||
}
|
@ -24,17 +24,14 @@
|
||||
|
||||
settings.formatter = {
|
||||
deadnix = {
|
||||
pipeline = "nix";
|
||||
priority = 1;
|
||||
};
|
||||
|
||||
statix = {
|
||||
pipeline = "nix";
|
||||
priority = 2;
|
||||
};
|
||||
|
||||
alejandra = {
|
||||
pipeline = "nix";
|
||||
priority = 3;
|
||||
};
|
||||
|
||||
|
@ -4,11 +4,9 @@
|
||||
command = "deadnix"
|
||||
options = ["--edit"]
|
||||
includes = ["*.nix"]
|
||||
pipeline = "nix"
|
||||
priority = 1
|
||||
|
||||
[formatter.nixpkgs-fmt]
|
||||
command = "nixpkgs-fmt"
|
||||
includes = ["*.nix"]
|
||||
pipeline = "nix"
|
||||
priority = 2
|
@ -31,13 +31,11 @@ command = "alejandra"
|
||||
includes = ["*.nix"]
|
||||
# Act as an example on how to exclude specific files
|
||||
excludes = ["examples/nix/sources.nix"]
|
||||
pipeline = "nix"
|
||||
priority = 1
|
||||
|
||||
[formatter.deadnix]
|
||||
command = "deadnix"
|
||||
includes = ["*.nix"]
|
||||
pipeline = "nix"
|
||||
priority = 2
|
||||
|
||||
[formatter.ruby]
|
||||
|
Loading…
Reference in New Issue
Block a user