mirror of
https://github.com/neilotoole/sq.git
synced 2024-12-01 03:14:02 +03:00
4ffaae925f
* CSV now renames duplicate ingest headers * Fix broken test * xlsx ingester now handles duplicate col names * Update CHANGELOG * Additional tests for ingest.column.rename * Removed dead comment in grammar
277 lines
8.1 KiB
Go
277 lines
8.1 KiB
Go
package cli
|
|
|
|
import (
|
|
"context"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
"github.com/neilotoole/sq/cli/run"
|
|
|
|
"github.com/neilotoole/sq/cli/config/yamlstore"
|
|
v0_34_0 "github.com/neilotoole/sq/cli/config/yamlstore/upgrades/v0.34.0"
|
|
"github.com/neilotoole/sq/libsq/core/lg/slogbuf"
|
|
"github.com/neilotoole/sq/libsq/core/options"
|
|
|
|
"github.com/neilotoole/sq/cli/config"
|
|
"github.com/neilotoole/sq/cli/flag"
|
|
"github.com/neilotoole/sq/drivers/csv"
|
|
"github.com/neilotoole/sq/drivers/json"
|
|
"github.com/neilotoole/sq/drivers/mysql"
|
|
"github.com/neilotoole/sq/drivers/postgres"
|
|
"github.com/neilotoole/sq/drivers/sqlite3"
|
|
"github.com/neilotoole/sq/drivers/sqlserver"
|
|
"github.com/neilotoole/sq/drivers/userdriver"
|
|
"github.com/neilotoole/sq/drivers/userdriver/xmlud"
|
|
"github.com/neilotoole/sq/drivers/xlsx"
|
|
"github.com/neilotoole/sq/libsq/core/cleanup"
|
|
"github.com/neilotoole/sq/libsq/core/errz"
|
|
"github.com/neilotoole/sq/libsq/core/lg"
|
|
"github.com/neilotoole/sq/libsq/core/lg/lga"
|
|
"github.com/neilotoole/sq/libsq/driver"
|
|
"github.com/neilotoole/sq/libsq/source"
|
|
"github.com/spf13/cobra"
|
|
"golang.org/x/exp/slog"
|
|
)
|
|
|
|
// getRun is a convenience function for getting Run
|
|
// from the cmd.Context().
|
|
func getRun(cmd *cobra.Command) *run.Run {
|
|
ru := run.FromContext(cmd.Context())
|
|
if ru.Cmd == nil {
|
|
// ru.Cmd is usually set by the cmd.preRun that is added
|
|
// by addCmd. But some commands (I'm looking at you __complete) don't
|
|
// interact with that mechanism. So, we set the field here for those
|
|
// odd cases.
|
|
ru.Cmd = cmd
|
|
}
|
|
return ru
|
|
}
|
|
|
|
// newRun returns a run.Run configured with standard values for logging,
|
|
// config, etc. This effectively is the bootstrap mechanism for sq.
|
|
// Note that the run.Run is not fully configured for use by a command
|
|
// until preRun is executed on it.
|
|
//
|
|
// Note: This func always returns a Run, even if an error occurs during
|
|
// bootstrap of the Run (for example if there's a config error). We do this
|
|
// to provide enough framework so that such an error can be logged or
|
|
// printed per the normal mechanisms, if at all possible.
|
|
func newRun(ctx context.Context, stdin *os.File, stdout, stderr io.Writer, args []string,
|
|
) (*run.Run, *slog.Logger, error) {
|
|
// logbuf holds log records until defaultLogging is completed.
|
|
log, logbuf := slogbuf.New()
|
|
log = log.With(lga.Pid, os.Getpid())
|
|
|
|
ru := &run.Run{
|
|
Stdin: stdin,
|
|
Out: stdout,
|
|
ErrOut: stderr,
|
|
OptionsRegistry: &options.Registry{},
|
|
}
|
|
|
|
RegisterDefaultOpts(ru.OptionsRegistry)
|
|
|
|
upgrades := yamlstore.UpgradeRegistry{
|
|
v0_34_0.Version: v0_34_0.Upgrade,
|
|
}
|
|
|
|
ctx = lg.NewContext(ctx, log)
|
|
|
|
var configErr error
|
|
ru.Config, ru.ConfigStore, configErr = yamlstore.Load(ctx,
|
|
args, ru.OptionsRegistry, upgrades)
|
|
|
|
log, logHandler, logCloser, logErr := defaultLogging(ctx, args, ru.Config)
|
|
ru.Cleanup = cleanup.New().AddE(logCloser)
|
|
if logErr != nil {
|
|
stderrLog, h := stderrLogger()
|
|
_ = logbuf.Flush(ctx, h)
|
|
return ru, stderrLog, logErr
|
|
}
|
|
|
|
if logHandler != nil {
|
|
if err := logbuf.Flush(ctx, logHandler); err != nil {
|
|
return ru, log, err
|
|
}
|
|
}
|
|
|
|
if log == nil {
|
|
log = lg.Discard()
|
|
}
|
|
|
|
log = log.With(lga.Pid, os.Getpid())
|
|
|
|
if ru.Config == nil {
|
|
ru.Config = config.New()
|
|
}
|
|
|
|
if configErr != nil {
|
|
// configErr is more important, return that first
|
|
return ru, log, configErr
|
|
}
|
|
|
|
return ru, log, nil
|
|
}
|
|
|
|
// FinishRunInit finishes setting up ru.
|
|
//
|
|
// TODO: This run.Run initialization mechanism is a bit of a mess.
|
|
// There's logic in newRun, preRun, FinishRunInit, as well as testh.Helper.init.
|
|
// Surely the init logic can be consolidated.
|
|
func FinishRunInit(ctx context.Context, ru *run.Run) error {
|
|
if ru.Cleanup == nil {
|
|
ru.Cleanup = cleanup.New()
|
|
}
|
|
|
|
cfg, log := ru.Config, lg.FromContext(ctx)
|
|
|
|
var scratchSrcFunc driver.ScratchSrcFunc
|
|
|
|
// scratchSrc could be nil, and that's ok
|
|
scratchSrc := cfg.Collection.Scratch()
|
|
if scratchSrc == nil {
|
|
scratchSrcFunc = sqlite3.NewScratchSource
|
|
} else {
|
|
scratchSrcFunc = func(_ context.Context, name string) (src *source.Source, clnup func() error, err error) {
|
|
return scratchSrc, nil, nil
|
|
}
|
|
}
|
|
|
|
var err error
|
|
if ru.Files == nil {
|
|
ru.Files, err = source.NewFiles(ctx)
|
|
if err != nil {
|
|
lg.WarnIfFuncError(log, lga.Cleanup, ru.Cleanup.Run)
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Note: it's important that files.Close is invoked
|
|
// after databases.Close (hence added to clnup first),
|
|
// because databases could depend upon the existence of
|
|
// files (such as a sqlite db file).
|
|
ru.Cleanup.AddE(ru.Files.Close)
|
|
ru.Files.AddDriverDetectors(source.DetectMagicNumber)
|
|
|
|
ru.DriverRegistry = driver.NewRegistry(log)
|
|
dr := ru.DriverRegistry
|
|
|
|
ru.Databases = driver.NewDatabases(log, dr, scratchSrcFunc)
|
|
ru.Cleanup.AddC(ru.Databases)
|
|
|
|
dr.AddProvider(sqlite3.Type, &sqlite3.Provider{Log: log})
|
|
dr.AddProvider(postgres.Type, &postgres.Provider{Log: log})
|
|
dr.AddProvider(sqlserver.Type, &sqlserver.Provider{Log: log})
|
|
dr.AddProvider(mysql.Type, &mysql.Provider{Log: log})
|
|
csvp := &csv.Provider{Log: log, Scratcher: ru.Databases, Files: ru.Files}
|
|
dr.AddProvider(csv.TypeCSV, csvp)
|
|
dr.AddProvider(csv.TypeTSV, csvp)
|
|
ru.Files.AddDriverDetectors(csv.DetectCSV, csv.DetectTSV)
|
|
|
|
jsonp := &json.Provider{Log: log, Scratcher: ru.Databases, Files: ru.Files}
|
|
dr.AddProvider(json.TypeJSON, jsonp)
|
|
dr.AddProvider(json.TypeJSONA, jsonp)
|
|
dr.AddProvider(json.TypeJSONL, jsonp)
|
|
sampleSize := driver.OptIngestSampleSize.Get(cfg.Options)
|
|
ru.Files.AddDriverDetectors(
|
|
json.DetectJSON(sampleSize),
|
|
json.DetectJSONA(sampleSize),
|
|
json.DetectJSONL(sampleSize),
|
|
)
|
|
|
|
dr.AddProvider(xlsx.Type, &xlsx.Provider{Log: log, Scratcher: ru.Databases, Files: ru.Files})
|
|
ru.Files.AddDriverDetectors(xlsx.DetectXLSX)
|
|
// One day we may have more supported user driver genres.
|
|
userDriverImporters := map[string]userdriver.ImportFunc{
|
|
xmlud.Genre: xmlud.Import,
|
|
}
|
|
|
|
for i, userDriverDef := range cfg.Ext.UserDrivers {
|
|
userDriverDef := userDriverDef
|
|
|
|
errs := userdriver.ValidateDriverDef(userDriverDef)
|
|
if len(errs) > 0 {
|
|
err := errz.Combine(errs...)
|
|
err = errz.Wrapf(err, "failed validation of user driver definition [%d] {%s} from config",
|
|
i, userDriverDef.Name)
|
|
return err
|
|
}
|
|
|
|
importFn, ok := userDriverImporters[userDriverDef.Genre]
|
|
if !ok {
|
|
return errz.Errorf("unsupported genre {%s} for user driver {%s} specified via config",
|
|
userDriverDef.Genre, userDriverDef.Name)
|
|
}
|
|
|
|
// For each user driver definition, we register a
|
|
// distinct userdriver.Provider instance.
|
|
udp := &userdriver.Provider{
|
|
Log: log,
|
|
DriverDef: userDriverDef,
|
|
ImportFn: importFn,
|
|
Scratcher: ru.Databases,
|
|
Files: ru.Files,
|
|
}
|
|
|
|
ru.DriverRegistry.AddProvider(source.DriverType(userDriverDef.Name), udp)
|
|
ru.Files.AddDriverDetectors(udp.Detectors()...)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// preRun is invoked by cobra prior to the command's RunE being
|
|
// invoked. It sets up the driver registry, databases, writers and related
|
|
// fundamental components. Subsequent invocations of this method
|
|
// are no-op.
|
|
func preRun(cmd *cobra.Command, ru *run.Run) error {
|
|
if ru == nil {
|
|
return errz.New("Run is nil")
|
|
}
|
|
|
|
if ru.Writers != nil {
|
|
// If ru.Writers is already set, then this function has already been
|
|
// called on ru. That's ok, just return.
|
|
return nil
|
|
}
|
|
|
|
if ru.Cleanup == nil {
|
|
ru.Cleanup = cleanup.New()
|
|
}
|
|
|
|
ctx := cmd.Context()
|
|
// If the --output=/some/file flag is set, then we need to
|
|
// override ru.Out (which is typically stdout) to point it at
|
|
// the output destination file.
|
|
if cmdFlagChanged(ru.Cmd, flag.Output) {
|
|
fpath, _ := ru.Cmd.Flags().GetString(flag.Output)
|
|
fpath, err := filepath.Abs(fpath)
|
|
if err != nil {
|
|
return errz.Wrapf(err, "failed to get absolute path for --%s", flag.Output)
|
|
}
|
|
|
|
// Ensure the parent dir exists
|
|
err = os.MkdirAll(filepath.Dir(fpath), os.ModePerm)
|
|
if err != nil {
|
|
return errz.Wrapf(err, "failed to make parent dir for --%s", flag.Output)
|
|
}
|
|
|
|
f, err := os.Create(fpath)
|
|
if err != nil {
|
|
return errz.Wrapf(err, "failed to open file specified by flag --%s", flag.Output)
|
|
}
|
|
|
|
ru.Cleanup.AddC(f) // Make sure the file gets closed eventually
|
|
ru.Out = f
|
|
}
|
|
|
|
cmdOpts, err := getOptionsFromCmd(ru.Cmd)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ru.Writers, ru.Out, ru.ErrOut = newWriters(ru.Cmd, cmdOpts, ru.Out, ru.ErrOut)
|
|
|
|
return FinishRunInit(ctx, ru)
|
|
}
|