feat: simplify pipeline model

For each path we determine the list of formatters that are interested in formatting it. From there, we sort
the list of formatters first by priority (lower value, higher priority) and then by name (lexicographically).

With this information we create a batch key which is based on the unique sequence of formatters. When enough paths with the same sequence is ready we apply them in order to each formatter.

By default, with no special configuration, this model guarantees that a given path will only be processed by one formatter at a time.

If a user wishes to influence the order in which formatters are applied they can use the priority field.

Signed-off-by: Brian McGee <brian@bmcgee.ie>
This commit is contained in:
Brian McGee 2024-05-24 11:42:40 +01:00
parent 6ce3d27519
commit ce14ee828f
No known key found for this signature in database
GPG Key ID: D49016E76AD1E8C0
12 changed files with 147 additions and 229 deletions

View File

@ -30,11 +30,11 @@ const (
)
var (
globalExcludes []glob.Glob
formatters map[string]*format.Formatter
pipelines map[string]*format.Pipeline
filesCh chan *walk.File
processedCh chan *walk.File
excludes []glob.Glob
formatters map[string]*format.Formatter
filesCh chan *walk.File
processedCh chan *walk.File
ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled")
)
@ -73,21 +73,18 @@ func (f *Format) Run() (err error) {
}
// compile global exclude globs
if globalExcludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
if excludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
return fmt.Errorf("failed to compile global excludes: %w", err)
}
// initialise pipelines
pipelines = make(map[string]*format.Pipeline)
// initialise formatters
formatters = make(map[string]*format.Formatter)
// iterate the formatters in lexicographical order
for _, name := range cfg.Names {
// init formatter
formatterCfg := cfg.Formatters[name]
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes)
for name, formatterCfg := range cfg.Formatters {
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, excludes)
if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
log.Debugf("formatter not found: %v", name)
log.Debugf("formatter command not found: %v", name)
continue
} else if err != nil {
return fmt.Errorf("%w: failed to initialise formatter: %v", err, name)
@ -95,23 +92,6 @@ func (f *Format) Run() (err error) {
// store formatter by name
formatters[name] = formatter
// If no pipeline is configured, we add the formatter to a nominal pipeline of size 1 with the key being the
// formatter's name. If a pipeline is configured, we add the formatter to a pipeline keyed by
// 'p:<pipeline_name>' in which it is sorted by priority.
if formatterCfg.Pipeline == "" {
pipeline := format.Pipeline{}
pipeline.Add(formatter)
pipelines[name] = &pipeline
} else {
key := fmt.Sprintf("p:%s", formatterCfg.Pipeline)
pipeline, ok := pipelines[key]
if !ok {
pipeline = &format.Pipeline{}
pipelines[key] = pipeline
}
pipeline.Add(formatter)
}
}
// open the cache
@ -304,37 +284,43 @@ func walkFilesystem(ctx context.Context) func() error {
func applyFormatters(ctx context.Context) func() error {
// create our own errgroup for concurrent formatting tasks
fg, ctx := errgroup.WithContext(ctx)
// simple optimization to avoid too many concurrent formatting tasks
// we can queue them up faster than the formatters can process them, this paces things a bit
fg.SetLimit(runtime.NumCPU())
// pre-initialise batches keyed by pipeline
batches := make(map[string][]*walk.File)
for key := range pipelines {
batches[key] = make([]*walk.File, 0, BatchSize)
}
// track batches of formatting task based on their batch keys, which are determined by the unique sequence of
// formatters which should be applied to their respective files
batches := make(map[string][]*format.Task)
// for a given pipeline key, add the provided file to the current batch and trigger a format if the batch size has
// been reached
tryApply := func(key string, file *walk.File) {
// append to batch
batches[key] = append(batches[key], file)
// check if the batch is full
apply := func(key string, flush bool) {
// lookup the batch and exit early if it's empty
batch := batches[key]
if len(batch) == BatchSize {
// get the pipeline
pipeline := pipelines[key]
if len(batch) == 0 {
return
}
// copy the batch
files := make([]*walk.File, len(batch))
copy(files, batch)
// process the batch if it's full, or we've been asked to flush partial batches
if flush || len(batch) == BatchSize {
// apply to the pipeline
// copy the batch as we re-use it for the next batch
tasks := make([]*format.Task, len(batch))
copy(tasks, batch)
// asynchronously apply the sequence formatters to the batch
fg.Go(func() error {
if err := pipeline.Apply(ctx, files); err != nil {
return err
// iterate the formatters, applying them in sequence to the batch of tasks
// we get the formatters list from the first task since they have all the same formatters list
for _, f := range tasks[0].Formatters {
if err := f.Apply(ctx, tasks); err != nil {
return err
}
}
for _, path := range files {
processedCh <- path
// pass each file to the processed channel
for _, task := range tasks {
processedCh <- task.File
}
return nil
})
@ -343,25 +329,12 @@ func applyFormatters(ctx context.Context) func() error {
}
}
// format any partial batches
flushBatches := func() {
for key, pipeline := range pipelines {
batch := batches[key]
pipeline := pipeline // capture for closure
if len(batch) > 0 {
fg.Go(func() error {
if err := pipeline.Apply(ctx, batch); err != nil {
return fmt.Errorf("%s failure: %w", key, err)
}
for _, path := range batch {
processedCh <- path
}
return nil
})
}
}
tryApply := func(task *format.Task) {
// append to batch
key := task.BatchKey
batches[key] = append(batches[key], task)
// try to apply
apply(key, false)
}
return func() error {
@ -370,35 +343,38 @@ func applyFormatters(ctx context.Context) func() error {
close(processedCh)
}()
// iterate the files channel, checking if any pipeline wants it, and attempting to apply if so.
// iterate the files channel
for file := range filesCh {
var matches []string
for key, pipeline := range pipelines {
if !pipeline.Wants(file) {
continue
// determine a list of formatters that are interested in file
var matches []*format.Formatter
for _, formatter := range formatters {
if formatter.Wants(file) {
matches = append(matches, formatter)
}
matches = append(matches, key)
tryApply(key, file)
}
switch len(matches) {
case 0:
log.Debugf("no match found: %s", file.Path)
if len(matches) == 0 {
// no match, so we send it direct to the processed channel
log.Debugf("no match found: %s", file.Path)
processedCh <- file
case 1:
} else {
// record the match
stats.Add(stats.Matched, 1)
default:
return fmt.Errorf("path '%s' matched multiple formatters/pipelines %v", file.Path, matches)
// create a new format task, add it to a batch based on its batch key and try to apply if the batch is full
task := format.NewTask(file, matches)
tryApply(&task)
}
}
// flush any partial batches which remain
flushBatches()
for key := range batches {
apply(key, true)
}
// wait for all outstanding formatting tasks to complete
if err := fg.Wait(); err != nil {
return fmt.Errorf("pipeline processing failure: %w", err)
return fmt.Errorf("formatting failure: %w", err)
}
return nil
}

View File

@ -196,37 +196,6 @@ func TestIncludesAndExcludes(t *testing.T) {
assertStats(t, as, 31, 31, 2, 0)
}
func TestMatchingMultiplePipelines(t *testing.T) {
as := require.New(t)
tempDir := test.TempExamples(t)
configPath := tempDir + "/multiple.toml"
cfg := config2.Config{
Formatters: map[string]*config2.Formatter{
"echo": {
Command: "echo",
Includes: []string{"*"},
},
"touch": {
Command: "touch",
Includes: []string{"*"},
},
},
}
test.WriteConfig(t, configPath, cfg)
_, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
as.ErrorContains(err, "matched multiple formatters/pipelines")
as.ErrorContains(err, "echo")
as.ErrorContains(err, "touch")
// run with only one formatter
test.WriteConfig(t, configPath, cfg)
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "-f", "echo")
as.NoError(err)
}
func TestCache(t *testing.T) {
as := require.New(t)
@ -604,25 +573,23 @@ func TestDeterministicOrderingInPipeline(t *testing.T) {
test.WriteConfig(t, configPath, config2.Config{
Formatters: map[string]*config2.Formatter{
// a and b should execute in lexicographical order as they have default priority 0, with c last since it has
// priority 1
// a and b have no priority set, which means they default to 0 and should execute first
// a and b should execute in lexicographical order
// c should execute first since it has a priority of 1
"fmt-a": {
Command: "test-fmt",
Options: []string{"fmt-a"},
Includes: []string{"*.py"},
Pipeline: "foo",
},
"fmt-b": {
Command: "test-fmt",
Options: []string{"fmt-b"},
Includes: []string{"*.py"},
Pipeline: "foo",
},
"fmt-c": {
Command: "test-fmt",
Options: []string{"fmt-c"},
Includes: []string{"*.py"},
Pipeline: "foo",
Priority: 1,
},
},

View File

@ -2,7 +2,6 @@ package config
import (
"fmt"
"sort"
"github.com/BurntSushi/toml"
)
@ -13,7 +12,6 @@ type Config struct {
// Excludes is an optional list of glob patterns used to exclude certain files from all formatters.
Excludes []string
}
Names []string `toml:"-"`
Formatters map[string]*Formatter `toml:"formatter"`
}
@ -40,12 +38,5 @@ func ReadFile(path string, names []string) (cfg *Config, err error) {
cfg.Formatters = filtered
}
// sort the formatter names so that, as we construct pipelines, we add formatters in a determinstic fashion. This
// ensures a deterministic order even when all priority values are the same e.g. 0
for name := range cfg.Formatters {
cfg.Names = append(cfg.Names, name)
}
sort.Strings(cfg.Names)
return
}

View File

@ -59,7 +59,6 @@ func TestReadConfigFile(t *testing.T) {
as.Nil(alejandra.Options)
as.Equal([]string{"*.nix"}, alejandra.Includes)
as.Equal([]string{"examples/nix/sources.nix"}, alejandra.Excludes)
as.Equal("nix", alejandra.Pipeline)
as.Equal(1, alejandra.Priority)
// deadnix
@ -69,7 +68,6 @@ func TestReadConfigFile(t *testing.T) {
as.Nil(deadnix.Options)
as.Equal([]string{"*.nix"}, deadnix.Includes)
as.Nil(deadnix.Excludes)
as.Equal("nix", deadnix.Pipeline)
as.Equal(2, deadnix.Priority)
// ruby

View File

@ -9,8 +9,6 @@ type Formatter struct {
Includes []string
// Excludes is an optional list of glob patterns used to exclude certain files from this Formatter.
Excludes []string
// Indicates this formatter should be executed as part of a group of formatters all sharing the same pipeline key.
Pipeline string
// Indicates the order of precedence when executing as part of a pipeline.
// Indicates the order of precedence when executing this Formatter in a sequence of Formatters.
Priority int
}

View File

@ -23,19 +23,19 @@ includes = ["*.go"]
command = "black"
includes = ["*.py"]
# use the priority field to control the order of execution
# run shellcheck first
[formatter.shellcheck]
command = "shellcheck"
includes = ["*.sh"]
pipeline = "sh"
priority = 0
priority = 0 # default is 0, but we set it here for clarity
# shfmt second
[formatter.shfmt]
command = "shfmt"
options = ["-s", "-w"]
includes = ["*.sh"]
pipeline = "sh"
priority = 1
```
@ -49,13 +49,21 @@ priority = 1
- `options` - an optional list of args to be passed to `command`.
- `includes` - a list of glob patterns used to determine whether the formatter should be applied against a given path.
- `excludes` - an optional list of glob patterns used to exclude certain files from this formatter.
- `pipeline` - an optional key used to group related formatters together, ensuring they are executed sequentially
against a given path.
- `priority` - indicates the order of execution when this formatter is operating as part of a pipeline. Greater
precedence is given to lower numbers, with the default being `0`.
- `priority` - influences the order of execution. Greater precedence is given to lower numbers, with the default being `0`.
> When two or more formatters in a pipeline have the same priority, they are executed in lexicographical order to
> ensure deterministic behaviour over multiple executions.
## Same file, multiple formatters?
For each file, `treefmt` determines a list of formatters based on the configured `includes` / `excludes` rules. This list is
then sorted, first by priority (lower the value, higher the precedence) and secondly by formatter name (lexicographically).
The resultant sequence of formatters is used to create a batch key, and similarly matched files get added to that batch
until it is full, at which point the files are passed to each formatter in turn.
This means that `treefmt` **guarantees only one formatter will be operating on a given file at any point in time**.
Another consequence is that formatting is deterministic for a given file and a given `treefmt` configuration.
By setting the priority fields appropriately, you can control the order in which those formatters are applied for any
files they _both happen to match on_.
## Supported Formatters

View File

@ -40,43 +40,28 @@ func (f *Formatter) Executable() string {
return f.executable
}
func (f *Formatter) Apply(ctx context.Context, files []*walk.File, filter bool) error {
func (f *Formatter) Name() string {
return f.name
}
func (f *Formatter) Priority() int {
return f.config.Priority
}
func (f *Formatter) Apply(ctx context.Context, tasks []*Task) error {
start := time.Now()
// construct args, starting with config
args := f.config.Options
// If filter is true it indicates we are executing as part of a pipeline.
// In such a scenario each formatter must sub filter the paths provided as different formatters might want different
// files in a pipeline.
if filter {
// reset the batch
f.batch = f.batch[:0]
// exit early if nothing to process
if len(tasks) == 0 {
return nil
}
// filter paths
for _, file := range files {
if f.Wants(file) {
f.batch = append(f.batch, file.RelPath)
}
}
// exit early if nothing to process
if len(f.batch) == 0 {
return nil
}
// append paths to the args
args = append(args, f.batch...)
} else {
// exit early if nothing to process
if len(files) == 0 {
return nil
}
// append paths to the args
for _, file := range files {
args = append(args, file.RelPath)
}
// append paths to the args
for _, task := range tasks {
args = append(args, task.File.RelPath)
}
// execute the command
@ -95,7 +80,7 @@ func (f *Formatter) Apply(ctx context.Context, files []*walk.File, filter bool)
//
f.log.Infof("%v files processed in %v", len(files), time.Now().Sub(start))
f.log.Infof("%v files processed in %v", len(tasks), time.Now().Sub(start))
return nil
}
@ -136,10 +121,10 @@ func NewFormatter(
f.executable = executable
// initialise internal state
if cfg.Pipeline == "" {
f.log = log.WithPrefix(fmt.Sprintf("format | %s", name))
if cfg.Priority > 0 {
f.log = log.WithPrefix(fmt.Sprintf("format | %s[%d]", name, cfg.Priority))
} else {
f.log = log.WithPrefix(fmt.Sprintf("format | %s[%s]", cfg.Pipeline, name))
f.log = log.WithPrefix(fmt.Sprintf("format | %s", name))
}
f.includes, err = CompileGlobs(cfg.Includes)

View File

@ -1,40 +0,0 @@
package format
import (
"context"
"slices"
"git.numtide.com/numtide/treefmt/walk"
)
type Pipeline struct {
sequence []*Formatter
}
func (p *Pipeline) Add(f *Formatter) {
p.sequence = append(p.sequence, f)
// sort by priority in ascending order
slices.SortFunc(p.sequence, func(a, b *Formatter) int {
return a.config.Priority - b.config.Priority
})
}
func (p *Pipeline) Wants(path *walk.File) bool {
var match bool
for _, f := range p.sequence {
match = f.Wants(path)
if match {
break
}
}
return match
}
func (p *Pipeline) Apply(ctx context.Context, paths []*walk.File) error {
for _, f := range p.sequence {
if err := f.Apply(ctx, paths, len(p.sequence) > 1); err != nil {
return err
}
}
return nil
}

42
format/task.go Normal file
View File

@ -0,0 +1,42 @@
package format
import (
"cmp"
"slices"
"git.numtide.com/numtide/treefmt/walk"
)
type Task struct {
File *walk.File
Formatters []*Formatter
BatchKey string
}
func NewTask(file *walk.File, formatters []*Formatter) Task {
// sort by priority in ascending order
slices.SortFunc(formatters, func(a, b *Formatter) int {
priorityA := a.Priority()
priorityB := b.Priority()
result := priorityA - priorityB
if result == 0 {
// formatters with the same priority are sorted lexicographically to ensure a deterministic outcome
result = cmp.Compare(a.Name(), b.Name())
}
return result
})
// construct a batch key which represents the unique sequence of formatters to be applied to file
var key string
for _, f := range formatters {
key += f.name + ":"
}
key = key[:len(key)-1]
return Task{
File: file,
Formatters: formatters,
BatchKey: key,
}
}

View File

@ -24,17 +24,14 @@
settings.formatter = {
deadnix = {
pipeline = "nix";
priority = 1;
};
statix = {
pipeline = "nix";
priority = 2;
};
alejandra = {
pipeline = "nix";
priority = 3;
};

View File

@ -4,11 +4,9 @@
command = "deadnix"
options = ["--edit"]
includes = ["*.nix"]
pipeline = "nix"
priority = 1
[formatter.nixpkgs-fmt]
command = "nixpkgs-fmt"
includes = ["*.nix"]
pipeline = "nix"
priority = 2

View File

@ -31,13 +31,11 @@ command = "alejandra"
includes = ["*.nix"]
# Act as an example on how to exclude specific files
excludes = ["examples/nix/sources.nix"]
pipeline = "nix"
priority = 1
[formatter.deadnix]
command = "deadnix"
includes = ["*.nix"]
pipeline = "nix"
priority = 2
[formatter.ruby]