Merge pull request #301 from numtide/feat/better-concurrency-model

feat: simplify pipeline model
This commit is contained in:
mergify[bot] 2024-05-26 20:11:48 +00:00 committed by GitHub
commit 06f15f4237
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 147 additions and 229 deletions

View File

@ -30,11 +30,11 @@ const (
)
var (
globalExcludes []glob.Glob
formatters map[string]*format.Formatter
pipelines map[string]*format.Pipeline
filesCh chan *walk.File
processedCh chan *walk.File
excludes []glob.Glob
formatters map[string]*format.Formatter
filesCh chan *walk.File
processedCh chan *walk.File
ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled")
)
@ -73,21 +73,18 @@ func (f *Format) Run() (err error) {
}
// compile global exclude globs
if globalExcludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
if excludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
return fmt.Errorf("failed to compile global excludes: %w", err)
}
// initialise pipelines
pipelines = make(map[string]*format.Pipeline)
// initialise formatters
formatters = make(map[string]*format.Formatter)
// iterate the formatters in lexicographical order
for _, name := range cfg.Names {
// init formatter
formatterCfg := cfg.Formatters[name]
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes)
for name, formatterCfg := range cfg.Formatters {
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, excludes)
if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
log.Debugf("formatter not found: %v", name)
log.Debugf("formatter command not found: %v", name)
continue
} else if err != nil {
return fmt.Errorf("%w: failed to initialise formatter: %v", err, name)
@ -95,23 +92,6 @@ func (f *Format) Run() (err error) {
// store formatter by name
formatters[name] = formatter
// If no pipeline is configured, we add the formatter to a nominal pipeline of size 1 with the key being the
// formatter's name. If a pipeline is configured, we add the formatter to a pipeline keyed by
// 'p:<pipeline_name>' in which it is sorted by priority.
if formatterCfg.Pipeline == "" {
pipeline := format.Pipeline{}
pipeline.Add(formatter)
pipelines[name] = &pipeline
} else {
key := fmt.Sprintf("p:%s", formatterCfg.Pipeline)
pipeline, ok := pipelines[key]
if !ok {
pipeline = &format.Pipeline{}
pipelines[key] = pipeline
}
pipeline.Add(formatter)
}
}
// open the cache
@ -304,37 +284,43 @@ func walkFilesystem(ctx context.Context) func() error {
func applyFormatters(ctx context.Context) func() error {
// create our own errgroup for concurrent formatting tasks
fg, ctx := errgroup.WithContext(ctx)
// simple optimization to avoid too many concurrent formatting tasks
// we can queue them up faster than the formatters can process them, this paces things a bit
fg.SetLimit(runtime.NumCPU())
// pre-initialise batches keyed by pipeline
batches := make(map[string][]*walk.File)
for key := range pipelines {
batches[key] = make([]*walk.File, 0, BatchSize)
}
// track batches of formatting task based on their batch keys, which are determined by the unique sequence of
// formatters which should be applied to their respective files
batches := make(map[string][]*format.Task)
// for a given pipeline key, add the provided file to the current batch and trigger a format if the batch size has
// been reached
tryApply := func(key string, file *walk.File) {
// append to batch
batches[key] = append(batches[key], file)
// check if the batch is full
apply := func(key string, flush bool) {
// lookup the batch and exit early if it's empty
batch := batches[key]
if len(batch) == BatchSize {
// get the pipeline
pipeline := pipelines[key]
if len(batch) == 0 {
return
}
// copy the batch
files := make([]*walk.File, len(batch))
copy(files, batch)
// process the batch if it's full, or we've been asked to flush partial batches
if flush || len(batch) == BatchSize {
// apply to the pipeline
// copy the batch as we re-use it for the next batch
tasks := make([]*format.Task, len(batch))
copy(tasks, batch)
// asynchronously apply the sequence formatters to the batch
fg.Go(func() error {
if err := pipeline.Apply(ctx, files); err != nil {
return err
// iterate the formatters, applying them in sequence to the batch of tasks
// we get the formatters list from the first task since they have all the same formatters list
for _, f := range tasks[0].Formatters {
if err := f.Apply(ctx, tasks); err != nil {
return err
}
}
for _, path := range files {
processedCh <- path
// pass each file to the processed channel
for _, task := range tasks {
processedCh <- task.File
}
return nil
})
@ -343,25 +329,12 @@ func applyFormatters(ctx context.Context) func() error {
}
}
// format any partial batches
flushBatches := func() {
for key, pipeline := range pipelines {
batch := batches[key]
pipeline := pipeline // capture for closure
if len(batch) > 0 {
fg.Go(func() error {
if err := pipeline.Apply(ctx, batch); err != nil {
return fmt.Errorf("%s failure: %w", key, err)
}
for _, path := range batch {
processedCh <- path
}
return nil
})
}
}
tryApply := func(task *format.Task) {
// append to batch
key := task.BatchKey
batches[key] = append(batches[key], task)
// try to apply
apply(key, false)
}
return func() error {
@ -370,35 +343,38 @@ func applyFormatters(ctx context.Context) func() error {
close(processedCh)
}()
// iterate the files channel, checking if any pipeline wants it, and attempting to apply if so.
// iterate the files channel
for file := range filesCh {
var matches []string
for key, pipeline := range pipelines {
if !pipeline.Wants(file) {
continue
// determine a list of formatters that are interested in file
var matches []*format.Formatter
for _, formatter := range formatters {
if formatter.Wants(file) {
matches = append(matches, formatter)
}
matches = append(matches, key)
tryApply(key, file)
}
switch len(matches) {
case 0:
log.Debugf("no match found: %s", file.Path)
if len(matches) == 0 {
// no match, so we send it direct to the processed channel
log.Debugf("no match found: %s", file.Path)
processedCh <- file
case 1:
} else {
// record the match
stats.Add(stats.Matched, 1)
default:
return fmt.Errorf("path '%s' matched multiple formatters/pipelines %v", file.Path, matches)
// create a new format task, add it to a batch based on its batch key and try to apply if the batch is full
task := format.NewTask(file, matches)
tryApply(&task)
}
}
// flush any partial batches which remain
flushBatches()
for key := range batches {
apply(key, true)
}
// wait for all outstanding formatting tasks to complete
if err := fg.Wait(); err != nil {
return fmt.Errorf("pipeline processing failure: %w", err)
return fmt.Errorf("formatting failure: %w", err)
}
return nil
}

View File

@ -196,37 +196,6 @@ func TestIncludesAndExcludes(t *testing.T) {
assertStats(t, as, 31, 31, 2, 0)
}
func TestMatchingMultiplePipelines(t *testing.T) {
as := require.New(t)
tempDir := test.TempExamples(t)
configPath := tempDir + "/multiple.toml"
cfg := config2.Config{
Formatters: map[string]*config2.Formatter{
"echo": {
Command: "echo",
Includes: []string{"*"},
},
"touch": {
Command: "touch",
Includes: []string{"*"},
},
},
}
test.WriteConfig(t, configPath, cfg)
_, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.ErrorContains(err, "matched multiple formatters/pipelines")
as.ErrorContains(err, "echo")
as.ErrorContains(err, "touch")
// run with only one formatter
test.WriteConfig(t, configPath, cfg)
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "-f", "echo")
as.NoError(err)
}
func TestCache(t *testing.T) {
as := require.New(t)
@ -604,25 +573,23 @@ func TestDeterministicOrderingInPipeline(t *testing.T) {
test.WriteConfig(t, configPath, config2.Config{
Formatters: map[string]*config2.Formatter{
// a and b should execute in lexicographical order as they have default priority 0, with c last since it has
// priority 1
// a and b have no priority set, which means they default to 0 and should execute first
// a and b should execute in lexicographical order
// c should execute first since it has a priority of 1
"fmt-a": {
Command: "test-fmt",
Options: []string{"fmt-a"},
Includes: []string{"*.py"},
Pipeline: "foo",
},
"fmt-b": {
Command: "test-fmt",
Options: []string{"fmt-b"},
Includes: []string{"*.py"},
Pipeline: "foo",
},
"fmt-c": {
Command: "test-fmt",
Options: []string{"fmt-c"},
Includes: []string{"*.py"},
Pipeline: "foo",
Priority: 1,
},
},

View File

@ -2,7 +2,6 @@ package config
import (
"fmt"
"sort"
"github.com/BurntSushi/toml"
)
@ -13,7 +12,6 @@ type Config struct {
// Excludes is an optional list of glob patterns used to exclude certain files from all formatters.
Excludes []string
}
Names []string `toml:"-"`
Formatters map[string]*Formatter `toml:"formatter"`
}
@ -40,12 +38,5 @@ func ReadFile(path string, names []string) (cfg *Config, err error) {
cfg.Formatters = filtered
}
// sort the formatter names so that, as we construct pipelines, we add formatters in a determinstic fashion. This
// ensures a deterministic order even when all priority values are the same e.g. 0
for name := range cfg.Formatters {
cfg.Names = append(cfg.Names, name)
}
sort.Strings(cfg.Names)
return
}

View File

@ -59,7 +59,6 @@ func TestReadConfigFile(t *testing.T) {
as.Nil(alejandra.Options)
as.Equal([]string{"*.nix"}, alejandra.Includes)
as.Equal([]string{"examples/nix/sources.nix"}, alejandra.Excludes)
as.Equal("nix", alejandra.Pipeline)
as.Equal(1, alejandra.Priority)
// deadnix
@ -69,7 +68,6 @@ func TestReadConfigFile(t *testing.T) {
as.Nil(deadnix.Options)
as.Equal([]string{"*.nix"}, deadnix.Includes)
as.Nil(deadnix.Excludes)
as.Equal("nix", deadnix.Pipeline)
as.Equal(2, deadnix.Priority)
// ruby

View File

@ -9,8 +9,6 @@ type Formatter struct {
Includes []string
// Excludes is an optional list of glob patterns used to exclude certain files from this Formatter.
Excludes []string
// Indicates this formatter should be executed as part of a group of formatters all sharing the same pipeline key.
Pipeline string
// Indicates the order of precedence when executing as part of a pipeline.
// Indicates the order of precedence when executing this Formatter in a sequence of Formatters.
Priority int
}

View File

@ -23,19 +23,19 @@ includes = ["*.go"]
command = "black"
includes = ["*.py"]
# use the priority field to control the order of execution
# run shellcheck first
[formatter.shellcheck]
command = "shellcheck"
includes = ["*.sh"]
pipeline = "sh"
priority = 0
priority = 0 # default is 0, but we set it here for clarity
# shfmt second
[formatter.shfmt]
command = "shfmt"
options = ["-s", "-w"]
includes = ["*.sh"]
pipeline = "sh"
priority = 1
```
@ -49,13 +49,21 @@ priority = 1
- `options` - an optional list of args to be passed to `command`.
- `includes` - a list of glob patterns used to determine whether the formatter should be applied against a given path.
- `excludes` - an optional list of glob patterns used to exclude certain files from this formatter.
- `pipeline` - an optional key used to group related formatters together, ensuring they are executed sequentially
against a given path.
- `priority` - indicates the order of execution when this formatter is operating as part of a pipeline. Greater
precedence is given to lower numbers, with the default being `0`.
- `priority` - influences the order of execution. Greater precedence is given to lower numbers, with the default being `0`.
> When two or more formatters in a pipeline have the same priority, they are executed in lexicographical order to
> ensure deterministic behaviour over multiple executions.
## Same file, multiple formatters?
For each file, `treefmt` determines a list of formatters based on the configured `includes` / `excludes` rules. This list is
then sorted, first by priority (lower the value, higher the precedence) and secondly by formatter name (lexicographically).
The resultant sequence of formatters is used to create a batch key, and similarly matched files get added to that batch
until it is full, at which point the files are passed to each formatter in turn.
This means that `treefmt` **guarantees only one formatter will be operating on a given file at any point in time**.
Another consequence is that formatting is deterministic for a given file and a given `treefmt` configuration.
By setting the priority fields appropriately, you can control the order in which those formatters are applied for any
files they _both happen to match on_.
## Supported Formatters

View File

@ -40,43 +40,28 @@ func (f *Formatter) Executable() string {
return f.executable
}
func (f *Formatter) Apply(ctx context.Context, files []*walk.File, filter bool) error {
func (f *Formatter) Name() string {
return f.name
}
func (f *Formatter) Priority() int {
return f.config.Priority
}
func (f *Formatter) Apply(ctx context.Context, tasks []*Task) error {
start := time.Now()
// construct args, starting with config
args := f.config.Options
// If filter is true it indicates we are executing as part of a pipeline.
// In such a scenario each formatter must sub filter the paths provided as different formatters might want different
// files in a pipeline.
if filter {
// reset the batch
f.batch = f.batch[:0]
// exit early if nothing to process
if len(tasks) == 0 {
return nil
}
// filter paths
for _, file := range files {
if f.Wants(file) {
f.batch = append(f.batch, file.RelPath)
}
}
// exit early if nothing to process
if len(f.batch) == 0 {
return nil
}
// append paths to the args
args = append(args, f.batch...)
} else {
// exit early if nothing to process
if len(files) == 0 {
return nil
}
// append paths to the args
for _, file := range files {
args = append(args, file.RelPath)
}
// append paths to the args
for _, task := range tasks {
args = append(args, task.File.RelPath)
}
// execute the command
@ -95,7 +80,7 @@ func (f *Formatter) Apply(ctx context.Context, files []*walk.File, filter bool)
//
f.log.Infof("%v files processed in %v", len(files), time.Now().Sub(start))
f.log.Infof("%v files processed in %v", len(tasks), time.Now().Sub(start))
return nil
}
@ -136,10 +121,10 @@ func NewFormatter(
f.executable = executable
// initialise internal state
if cfg.Pipeline == "" {
f.log = log.WithPrefix(fmt.Sprintf("format | %s", name))
if cfg.Priority > 0 {
f.log = log.WithPrefix(fmt.Sprintf("format | %s[%d]", name, cfg.Priority))
} else {
f.log = log.WithPrefix(fmt.Sprintf("format | %s[%s]", cfg.Pipeline, name))
f.log = log.WithPrefix(fmt.Sprintf("format | %s", name))
}
f.includes, err = CompileGlobs(cfg.Includes)

View File

@ -1,40 +0,0 @@
package format
import (
"context"
"slices"
"git.numtide.com/numtide/treefmt/walk"
)
type Pipeline struct {
sequence []*Formatter
}
func (p *Pipeline) Add(f *Formatter) {
p.sequence = append(p.sequence, f)
// sort by priority in ascending order
slices.SortFunc(p.sequence, func(a, b *Formatter) int {
return a.config.Priority - b.config.Priority
})
}
func (p *Pipeline) Wants(path *walk.File) bool {
var match bool
for _, f := range p.sequence {
match = f.Wants(path)
if match {
break
}
}
return match
}
func (p *Pipeline) Apply(ctx context.Context, paths []*walk.File) error {
for _, f := range p.sequence {
if err := f.Apply(ctx, paths, len(p.sequence) > 1); err != nil {
return err
}
}
return nil
}

42
format/task.go Normal file
View File

@ -0,0 +1,42 @@
package format
import (
"cmp"
"slices"
"git.numtide.com/numtide/treefmt/walk"
)
type Task struct {
File *walk.File
Formatters []*Formatter
BatchKey string
}
func NewTask(file *walk.File, formatters []*Formatter) Task {
// sort by priority in ascending order
slices.SortFunc(formatters, func(a, b *Formatter) int {
priorityA := a.Priority()
priorityB := b.Priority()
result := priorityA - priorityB
if result == 0 {
// formatters with the same priority are sorted lexicographically to ensure a deterministic outcome
result = cmp.Compare(a.Name(), b.Name())
}
return result
})
// construct a batch key which represents the unique sequence of formatters to be applied to file
var key string
for _, f := range formatters {
key += f.name + ":"
}
key = key[:len(key)-1]
return Task{
File: file,
Formatters: formatters,
BatchKey: key,
}
}

View File

@ -24,17 +24,14 @@
settings.formatter = {
deadnix = {
pipeline = "nix";
priority = 1;
};
statix = {
pipeline = "nix";
priority = 2;
};
alejandra = {
pipeline = "nix";
priority = 3;
};

View File

@ -4,11 +4,9 @@
command = "deadnix"
options = ["--edit"]
includes = ["*.nix"]
pipeline = "nix"
priority = 1
[formatter.nixpkgs-fmt]
command = "nixpkgs-fmt"
includes = ["*.nix"]
pipeline = "nix"
priority = 2

View File

@ -31,13 +31,11 @@ command = "alejandra"
includes = ["*.nix"]
# Act as an example on how to exclude specific files
excludes = ["examples/nix/sources.nix"]
pipeline = "nix"
priority = 1
[formatter.deadnix]
command = "deadnix"
includes = ["*.nix"]
pipeline = "nix"
priority = 2
[formatter.ruby]