#229: Yet more sq diff implementation. (#237)

* sq config edit: fixed glaring bug that prevented editing a source

* Refine sq diff
This commit is contained in:
Neil O'Toole 2023-05-24 17:36:10 -06:00 committed by GitHub
parent c3a543aa84
commit 76d48154e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 212 additions and 118 deletions

View File

@ -33,6 +33,17 @@ The major feature is the long-gestating `sq diff`.
complex data in some property values.
- CSV format now renders byte sequences as `[777 bytes]` instead of dumping
the raw bytes.
- ☢️ TSV format (`--tsv`) no longer has a shorthand form `-T`. Apparently that
shorthand wasn't used much, and `-T` is needed elsewhere.
- ☢️ Likewise, `--xml` no longer has shorthand `-X`.
- In addition to the format flags `--text`, `--json`, etc., there is now
a `--format=FORMAT` flag, e.g. `--format=json`. This will allow `sq` to
continue to expand the number of output formats, without needing to have
a dedicated flag for each format.
## Fixed
- `sq config edit @source` was failing to save any edits.
## [v0.35.0] - 2023-05-10

View File

@ -247,6 +247,7 @@ func hasMatchingChildCommand(cmd *cobra.Command, s string) bool {
// addCmd adds the command returned by cmdFn to parentCmd.
func addCmd(ru *run.Run, parentCmd, cmd *cobra.Command) *cobra.Command {
cmd.DisableFlagsInUseLine = true
cmd.Flags().SortFlags = false
cmd.PersistentFlags().SortFlags = false

View File

@ -180,7 +180,7 @@ func execConfigEditSource(cmd *cobra.Command, args []string) error {
}
src2 := &source.Source{}
if err = ioz.UnmarshallYAML(after, &src2); err != nil {
if err = ioz.UnmarshallYAML(after, src2); err != nil {
return err
}

View File

@ -25,11 +25,11 @@ var OptDiffNumLines = options.NewInt(
)
var OptDiffDataFormat = format.NewOpt(
"diff.data-format",
"data-format",
"diff.data.format",
"format",
'f',
format.Text,
"Diff data format",
"Output format (json, csv…) when comparing data",
`Specify the output format to use when comparing table data.
Available formats:
@ -49,8 +49,8 @@ var diffFormats = []format.Format{
var allDiffElementsFlags = []string{
flag.DiffAll,
flag.DiffSummary,
flag.DiffTable,
flag.DiffOverview,
flag.DiffSchema,
flag.DiffDBProps,
flag.DiffRowCount,
flag.DiffData,
@ -58,66 +58,103 @@ var allDiffElementsFlags = []string{
func newDiffCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "diff @HANDLE1[.TABLE] @HANDLE2[.TABLE]",
Short: "Compare sources, or tables",
Long: `BETA: Compare sources, or tables.
Use: "diff @HANDLE1[.TABLE] @HANDLE2[.TABLE] [--data]",
Short: "BETA: Compare sources, or tables",
Long: `BETA: Compare the metadata or row data of sources or tables.
When comparing sources, by default the source summary, table structure,
and table row counts are compared.
CAUTION: This feature is in BETA testing. Please report any issues:
When comparing tables, by default the table structure and table row counts
are compared.
https://github.com/neilotoole/sq/issues/new/choose
When comparing sources ("source diff"), by default the source overview, schema,
and table row counts are compared. Row data is not compared.
When comparing tables ("table diff"), by default the table schema and table
row counts are compared. Row data is not compared.
Use flags to specify the elements you want to compare. See the examples.
Note that --summary and --dbprops only apply to source diff, not table diff.
Flag --data diffs the values of each row in the compared tables. Use with
caution with large tables.
Use --format with --data to specify the format to render the diff records.
Line-based formats (e.g. "text" or "jsonl") are often the most ergonomic,
although "yaml" may be preferable for comparing column values. The
available formats are:
text, csv, tsv,
json, jsona, jsonl,
markdown, html, xml, yaml
The default format can be changed via:
$ sq config set diff.data.format
The --format flag only applies with data diffs (--data). Metadata diffs are
always output in YAML.
Note that --overview and --dbprops only apply to source diffs, not table diffs.
Flag --unified (-U) controls the number of lines to show surrounding a diff.
The default can be changed via "sq config set diff.lines".`,
The default (3) can be changed via:
$ sq config set diff.lines`,
Args: cobra.ExactArgs(2),
ValidArgsFunction: (&handleTableCompleter{
handleRequired: true,
max: 2,
}).complete,
RunE: execDiff,
Example: ` # Diff sources (compare default elements).
Example: `
Metadata diff
-------------
# Diff sources (compare default elements).
$ sq diff @prod/sakila @staging/sakila
# As above, but show 7 lines surrounding each diff.
$ sq diff @prod/sakila @staging/sakila -U7
# Diff sources, but only compare source summary.
$ sq diff @prod/sakila @staging/sakila --summary
# Diff sources, but only compare source overview.
$ sq diff @prod/sakila @staging/sakila --overview
# Compare source summary, and DB properties.
$ sq diff @prod/sakila @staging/sakila -sp
# Diff sources, but only DB properties.
$ sq diff @prod/sakila @staging/sakila --dbprops
# Compare source overview, and DB properties.
$ sq diff @prod/sakila @staging/sakila -OP
# Diff sources, but only compare schema.
$ sq diff @prod/sakila @staging/sakila --schema
# Compare schema table structure, and row counts.
$ sq diff @prod/sakila @staging/sakila --Tc
$ sq diff @prod/sakila @staging/sakila --SN
# Compare everything, including table data. Caution: this can be slow.
# Compare the data of each table. Caution: may be slow.
$ sq diff @prod/sakila @staging/sakila --data
# Compare everything, including table data. Caution: can be slow.
$ sq diff @prod/sakila @staging/sakila --all
# Compare actor table in prod vs staging
# Compare metadata of actor table in prod vs staging
$ sq diff @prod/sakila.actor @staging/sakila.actor
# Compare data in the actor tables. Caution: this can be slow.
$ sq diff @prod/sakila.actor @staging/sakila.actor --data`,
Row data diff
-------------
# Compare data in the actor tables. Caution: can be slow.
$ sq diff @prod/sakila.actor @staging/sakila.actor --data
# Compare data in the actor tables, but output in JSONL.
$ sq diff @prod/sakila.actor @staging/sakila.actor --data --format jsonl`,
}
addOptionFlag(cmd.Flags(), OptDiffNumLines)
addOptionFlag(cmd.Flags(), OptDiffDataFormat)
panicOn(cmd.RegisterFlagCompletionFunc(
OptDiffDataFormat.Flag(),
completeStrings(-1, stringz.Strings(diffFormats)...),
))
cmd.Flags().BoolP(flag.Header, flag.HeaderShort, true, flag.HeaderUsage)
cmd.Flags().BoolP(flag.NoHeader, flag.NoHeaderShort, false, flag.NoHeaderUsage)
cmd.MarkFlagsMutuallyExclusive(flag.Header, flag.NoHeader)
cmd.Flags().BoolP(flag.DiffSummary, flag.DiffSummaryShort, false, flag.DiffSummaryUsage)
cmd.Flags().BoolP(flag.DiffOverview, flag.DiffOverviewShort, false, flag.DiffOverviewUsage)
cmd.Flags().BoolP(flag.DiffDBProps, flag.DiffDBPropsShort, false, flag.DiffDBPropsUsage)
cmd.Flags().BoolP(flag.DiffTable, flag.DiffTableShort, false, flag.DiffTableUsage)
cmd.Flags().BoolP(flag.DiffSchema, flag.DiffSchemaShort, false, flag.DiffSchemaUsage)
cmd.Flags().BoolP(flag.DiffRowCount, flag.DiffRowCountShort, false, flag.DiffRowCountUsage)
cmd.Flags().BoolP(flag.DiffData, flag.DiffDataShort, false, flag.DiffDataUsage)
cmd.Flags().BoolP(flag.DiffAll, flag.DiffAllShort, false, flag.DiffAllUsage)
@ -128,6 +165,11 @@ The default can be changed via "sq config set diff.lines".`,
cmd.MarkFlagsMutuallyExclusive(flag.DiffAll, nonAllFlags[i])
}
panicOn(cmd.RegisterFlagCompletionFunc(
OptDiffDataFormat.Flag(),
completeStrings(-1, stringz.Strings(diffFormats)...),
))
return cmd
}
@ -184,9 +226,9 @@ func getDiffSourceElements(cmd *cobra.Command) *diff.Elements {
if !isAnyDiffElementsFlagChanged(cmd) {
// Default
return &diff.Elements{
Summary: true,
Overview: true,
DBProperties: false,
Table: true,
Schema: true,
RowCount: true,
Data: false,
}
@ -194,18 +236,18 @@ func getDiffSourceElements(cmd *cobra.Command) *diff.Elements {
if cmdFlagChanged(cmd, flag.DiffAll) {
return &diff.Elements{
Summary: true,
Overview: true,
DBProperties: true,
Table: true,
Schema: true,
RowCount: true,
Data: true,
}
}
return &diff.Elements{
Summary: cmdFlagIsSetTrue(cmd, flag.DiffSummary),
Overview: cmdFlagIsSetTrue(cmd, flag.DiffOverview),
DBProperties: cmdFlagIsSetTrue(cmd, flag.DiffDBProps),
Table: cmdFlagIsSetTrue(cmd, flag.DiffTable),
Schema: cmdFlagIsSetTrue(cmd, flag.DiffSchema),
RowCount: cmdFlagIsSetTrue(cmd, flag.DiffRowCount),
Data: cmdFlagIsSetTrue(cmd, flag.DiffData),
}
@ -215,21 +257,21 @@ func getDiffTableElements(cmd *cobra.Command) *diff.Elements {
if !isAnyDiffElementsFlagChanged(cmd) {
// Default
return &diff.Elements{
Table: true,
Schema: true,
RowCount: true,
}
}
if cmdFlagChanged(cmd, flag.DiffAll) {
return &diff.Elements{
Table: true,
Schema: true,
RowCount: true,
Data: true,
}
}
return &diff.Elements{
Table: cmdFlagIsSetTrue(cmd, flag.DiffTable),
Schema: cmdFlagIsSetTrue(cmd, flag.DiffSchema),
RowCount: cmdFlagIsSetTrue(cmd, flag.DiffRowCount),
Data: cmdFlagIsSetTrue(cmd, flag.DiffData),
}

View File

@ -73,7 +73,7 @@ The exit code is 1 if ping fails for any of the sources.`,
addTextFlags(cmd)
cmd.Flags().BoolP(flag.JSON, flag.JSONShort, false, flag.JSONUsage)
cmd.Flags().BoolP(flag.CSV, flag.CSVShort, false, flag.CSVUsage)
cmd.Flags().BoolP(flag.TSV, flag.TSVShort, false, flag.TSVUsage)
cmd.Flags().Bool(flag.TSV, false, flag.TSVUsage)
cmd.Flags().BoolP(flag.Compact, flag.CompactShort, false, flag.CompactUsage)
cmd.Flags().Duration(flag.PingTimeout, time.Second*10, flag.PingTimeoutUsage)

View File

@ -14,72 +14,75 @@ func newRootCmd() *cobra.Command {
Short: "sq",
Long: `sq is a swiss-army knife for wrangling data.
$ sq '@sakila_pg | .actor | .first_name, .last_name | .[0:10]' --json
Use sq to query Postgres, SQLite, SQLServer, MySQL, CSV, Excel, etc,
and output in text, JSON, CSV, Excel and so on, or
write output to a database table.
and output in text, JSON, CSV, Excel and so on, or write output to a
database table.
You can query using sq's own jq-like syntax, or in native SQL.
Use "sq inspect" to view schema metadata. Use the "sq tbl" commands
to copy, truncate and drop tables.
to copy, truncate and drop tables. Use "sq diff" to compare source metadata
and row data.
See docs and more: https://sq.io`,
Example: ` # pipe an Excel file and output the first 10 rows from sheet1
$ cat data.xlsx | sq '.sheet1 | .[0:10]'
# add Postgres source identified by handle @sakila_pg
Example: `# Add Postgres source identified by handle @sakila_pg
$ sq add --handle=@sakila_pg 'postgres://user:pass@localhost:5432/sakila'
# add SQL Server source; will have generated handle @sakila_mssql
$ sq add 'sqlserver://user:pass@localhost?database=sakila'
# list available data sources
# List available data sources.
$ sq ls
# ping all data sources
$ sq ping all
# set active data source
# Set active data source.
$ sq src @sakila_pg
# get specified cols from table address in active data source
$ sq '.address | .address_id, .city, .country'
# Get specified cols from table address in active data source.
$ sq '.actor | .actor_id, .first_name, .last_name'
# get metadata (schema, stats etc) for data source
# Ping a data source.
$ sq ping @sakila_pg
# View metadata (schema, stats etc) for data source.
$ sq inspect @sakila_pg
# get metadata for a table
$ sq inspect @pg1.person
# View metadata for a table.
$ sq inspect @sakila_pg.actor
# output in JSON
$ sq -j '.person | .uid, .username, .email'
# Output all rows from 'actor' table in JSON.
$ sq -j .actor
# output in text format (with header)
$ sq -th '.person | .uid, .username, .email'
# Alternative way to specify format.
$ sq --format json .actor
# output in text format (no header)
$ sq -t '.person | .uid, .username, .email'
# Output in text format (with header).
$ sq -th .actor
# output to a HTML file
$ sq --html '@sakila_sl3.actor' -o actor.html
# Output in text format (no header).
$ sq -tH .actor
# join across data sources
# Output to a HTML file.
$ sq --html '@sakila_pg.actor' -o actor.html
# Join across data sources.
$ sq '@my1.person, @pg1.address | join(.uid) | .username, .email, .city'
# insert query results into a table in another data source
# Insert query results into a table in another data source.
$ sq --insert=@pg1.person '@my1.person | .username, .email'
# execute a database-native SQL query, specifying the source
# Execute a database-native SQL query, specifying the source.
$ sq sql --src=@pg1 'SELECT uid, username, email FROM person LIMIT 2'
# copy a table (in the same source)
$ sq tbl copy @sakila_sl3.actor .actor2
# Copy a table (in the same source).
$ sq tbl copy @sakila_pg.actor .actor2
# truncate tables
$ sq tbl truncate @sakila_sl3.actor2
# Truncate table.
$ sq tbl truncate @sakila_pg.actor2
# drop table
$ sq tbl drop @sakila_sl3.actor2`,
# Drop table.
$ sq tbl drop @sakila_pg.actor2
# Pipe an Excel file and output the first 10 rows from sheet1
$ cat data.xlsx | sq '.sheet1 | .[0:10]'`,
}
cmd.Flags().SortFlags = false

View File

@ -5,6 +5,8 @@ import (
"fmt"
"strings"
"github.com/neilotoole/sq/cli/output/format"
"github.com/neilotoole/sq/cli/run"
"github.com/neilotoole/sq/libsq/core/lg/lga"
@ -347,21 +349,18 @@ func addTextFlags(cmd *cobra.Command) {
cmd.MarkFlagsMutuallyExclusive(flag.Header, flag.NoHeader)
}
// addQueryCmdFlags sets the common flags for the slq/sql commands.
// addQueryCmdFlags sets the common flags for the slq and sql commands.
func addQueryCmdFlags(cmd *cobra.Command) {
addTextFlags(cmd)
cmd.Flags().BoolP(flag.JSON, flag.JSONShort, false, flag.JSONUsage)
cmd.Flags().BoolP(flag.JSONA, flag.JSONAShort, false, flag.JSONAUsage)
cmd.Flags().BoolP(flag.JSONL, flag.JSONLShort, false, flag.JSONLUsage)
cmd.Flags().BoolP(flag.CSV, flag.CSVShort, false, flag.CSVUsage)
cmd.Flags().BoolP(flag.TSV, flag.TSVShort, false, flag.TSVUsage)
cmd.Flags().Bool(flag.HTML, false, flag.HTMLUsage)
cmd.Flags().Bool(flag.Markdown, false, flag.MarkdownUsage)
cmd.Flags().BoolP(flag.Raw, flag.RawShort, false, flag.RawUsage)
cmd.Flags().BoolP(flag.XLSX, flag.XLSXShort, false, flag.XLSXUsage)
cmd.Flags().BoolP(flag.XML, flag.XMLShort, false, flag.XMLUsage)
cmd.Flags().BoolP(flag.YAML, flag.YAMLShort, false, flag.YAMLUsage)
cmd.Flags().BoolP(flag.Compact, flag.CompactShort, false, flag.CompactUsage)
addOptionFlag(cmd.Flags(), OptFormat)
panicOn(cmd.RegisterFlagCompletionFunc(
OptFormat.Flag(),
completeStrings(-1, stringz.Strings(format.All())...),
))
addResultFormatFlags(cmd)
cmd.MarkFlagsMutuallyExclusive(append(
[]string{OptFormat.Flag()},
flag.OutputFormatFlags...,
)...)
addTimeFormatOptsFlags(cmd)
@ -384,6 +383,28 @@ func addQueryCmdFlags(cmd *cobra.Command) {
panicOn(cmd.RegisterFlagCompletionFunc(flag.CSVDelim, completeStrings(-1, csv.NamedDelims()...)))
}
// addResultFormatFlags adds the individual flags that control result
// output format, e.g. --text, --json, --csv, etc. It does not add
// the --format flag, because not every command treats that flag the same.
func addResultFormatFlags(cmd *cobra.Command) {
addTextFlags(cmd)
cmd.Flags().BoolP(flag.JSON, flag.JSONShort, false, flag.JSONUsage)
cmd.Flags().BoolP(flag.JSONA, flag.JSONAShort, false, flag.JSONAUsage)
cmd.Flags().BoolP(flag.JSONL, flag.JSONLShort, false, flag.JSONLUsage)
cmd.Flags().BoolP(flag.CSV, flag.CSVShort, false, flag.CSVUsage)
cmd.Flags().Bool(flag.TSV, false, flag.TSVUsage)
cmd.Flags().Bool(flag.HTML, false, flag.HTMLUsage)
cmd.Flags().Bool(flag.Markdown, false, flag.MarkdownUsage)
cmd.Flags().BoolP(flag.Raw, flag.RawShort, false, flag.RawUsage)
cmd.Flags().BoolP(flag.XLSX, flag.XLSXShort, false, flag.XLSXUsage)
cmd.Flags().Bool(flag.XML, false, flag.XMLUsage)
cmd.Flags().BoolP(flag.YAML, flag.YAMLShort, false, flag.YAMLUsage)
cmd.MarkFlagsMutuallyExclusive(flag.OutputFormatFlags...)
cmd.Flags().BoolP(flag.Compact, flag.CompactShort, false, flag.CompactUsage)
}
// extractFlagArgsValues returns a map {key:value} of predefined variables
// as supplied via --arg. For example:
//

View File

@ -32,16 +32,16 @@ type Config struct {
// Elements determines what source elements to compare.
type Elements struct {
// Summary compares a summary of the sources.
Summary bool
// Overview compares a summary of the sources.
Overview bool
// DBProperties compares DB properties.
DBProperties bool
// Table compares table structure.
Table bool
// Schema compares table/schema structure.
Schema bool
// RowCount compares table row count.
// RowCount compares table row count when comparing schemata.
RowCount bool
// Data compares each row in a table. Caution: this can be slow.

View File

@ -43,7 +43,7 @@ func ExecSourceDiff(ctx context.Context, ru *run.Run, cfg *Config,
return err
}
if elems.Summary {
if elems.Overview {
srcDiff, err := buildSourceSummaryDiff(cfg, sd1, sd2)
if err != nil {
return err
@ -64,7 +64,7 @@ func ExecSourceDiff(ctx context.Context, ru *run.Run, cfg *Config,
}
}
if elems.Table {
if elems.Schema {
tblDiffs, err := buildSourceTableDiffs(ctx, cfg, elems.RowCount, sd1, sd2)
if err != nil {
return err

View File

@ -29,7 +29,7 @@ func ExecTableDiff(ctx context.Context, ru *run.Run, cfg *Config, elems *Element
return err
}
if elems.Table {
if elems.Schema {
g, gCtx := errgroup.WithContext(ctx)
g.Go(func() error {
var gErr error

View File

@ -87,7 +87,6 @@ const (
SQLQueryUsage = "Execute the SQL as a query (as opposed to statement)"
TSV = "tsv"
TSVShort = "T"
TSVUsage = "Output TSV"
Text = "text"
@ -116,7 +115,6 @@ const (
YAMLUsage = "Output YAML"
XML = "xml"
XMLShort = "X"
XMLUsage = "Output XML"
SkipVerify = "skip-verify"
@ -155,21 +153,21 @@ const (
LogLevel = "log.level"
LogLevelUsage = "Log level: one of DEBUG, INFO, WARN, ERROR"
DiffSummary = "summary"
DiffSummaryShort = "s"
DiffSummaryUsage = "Compare source summary"
DiffOverview = "overview"
DiffOverviewShort = "O"
DiffOverviewUsage = "Compare source overview"
DiffTable = "table"
DiffTableShort = "T"
DiffTableUsage = "Compare table structure"
DiffSchema = "schema"
DiffSchemaShort = "S"
DiffSchemaUsage = "Compare schema structure"
DiffDBProps = "dbprops"
DiffDBPropsShort = "p"
DiffDBPropsShort = "P"
DiffDBPropsUsage = "Compare DB properties"
DiffRowCount = "count"
DiffRowCountShort = "c"
DiffRowCountUsage = "When comparing tables, include row counts"
DiffRowCountShort = "N"
DiffRowCountUsage = "When comparing table schema structure, include row counts"
DiffData = "data"
DiffDataShort = "d"
@ -179,3 +177,19 @@ const (
DiffAllShort = "a"
DiffAllUsage = "Compare everything (caution: may be slow)"
)
// OutputFormatFlags is the set of flags that control output format.
var OutputFormatFlags = []string{
Text,
JSON,
JSONA,
JSONL,
CSV,
TSV,
HTML,
Markdown,
Raw,
XLSX,
XML,
YAML,
}

View File

@ -47,8 +47,8 @@ to certain formats, such as "text" or "csv".`,
OptFormat = format.NewOpt(
"format",
"",
0,
"format",
'f',
format.Text,
"Output format",
`Specify the output format. Some formats are only implemented for a subset of

View File

@ -86,7 +86,9 @@ func (w *RecordWriter) WriteRecords(recs []record.Record) error {
if w.needsHeader {
headerRow := w.recMeta.Names()
for i := range headerRow {
headerRow[i] = w.pr.Header.Sprint(headerRow[i])
}
err := w.cw.Write(headerRow)
if err != nil {
return errz.Wrap(err, "failed to write header record")