#229: sq diff ready for beta release (#239)

* sq config edit: fixed glaring bug that prevented editing a source

* Refine sq diff
This commit is contained in:
Neil O'Toole 2023-05-25 21:58:43 -06:00 committed by GitHub
parent 76d48154e7
commit c810d17eec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 79 additions and 50 deletions

View File

Before

Width:  |  Height:  |  Size: 346 KiB

After

Width:  |  Height:  |  Size: 346 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 392 KiB

View File

@ -7,9 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
Breaking changes are annotated with ☢️.
## Upcoming
## [v0.36.0] - 2023-05-25
The major feature is the long-gestating `sq diff`.
The major feature is the long-gestating [`sq diff`](https://sq.io/docs/diff).
## Added
@ -35,7 +35,7 @@ The major feature is the long-gestating `sq diff`.
the raw bytes.
- ☢️ TSV format (`--tsv`) no longer has a shorthand form `-T`. Apparently that
shorthand wasn't used much, and `-T` is needed elsewhere.
- ☢️ Likewise, `--xml` no longer has shorthand `-X`.
- ☢️ Likewise, `--xml` no longer has shorthand `-X`. And `--markdown` has lost alias `--md`.
- In addition to the format flags `--text`, `--json`, etc., there is now
a `--format=FORMAT` flag, e.g. `--format=json`. This will allow `sq` to
continue to expand the number of output formats, without needing to have
@ -524,3 +524,4 @@ make working with lots of sources much easier.
[v0.34.1]: https://github.com/neilotoole/sq/compare/v0.34.0...v0.34.1
[v0.34.2]: https://github.com/neilotoole/sq/compare/v0.34.1...v0.34.2
[v0.35.0]: https://github.com/neilotoole/sq/compare/v0.34.2...v0.35.0
[v0.36.0]: https://github.com/neilotoole/sq/compare/v0.35.0...v0.36.0

View File

@ -9,7 +9,7 @@
`sq` is a command line tool that provides jq-style access to
structured data sources: SQL databases, or document formats like CSV or Excel.
![sq](./splash.png)
![sq](.images/splash.png)
`sq` executes jq-like [queries](https://sq.io/docs/query), or database-native [SQL](https://sq.io/docs/cmd/sql/).
It can perform cross-source [joins](https://sq.io/docs/query/#cross-source-joins).
@ -191,11 +191,17 @@ category.csv customer.csv film_actor.csv film_text.csv payment.csv sale
Note that you can also inspect an individual table:
```shell
$ sq inspect -v @sakila.actor
TABLE ROWS TYPE SIZE NUM COLS COL NAMES COL TYPES
actor 200 table - 4 actor_id, first_name, last_name, last_update numeric, VARCHAR(45), VARCHAR(45), TIMESTAMP
$ sq inspect @sakila.actor
TABLE ROWS TYPE SIZE NUM COLS COL NAMES
actor 200 table - 4 actor_id, first_name, last_name, last_update
```
### Diff
Use [`sq diff`](https://sq.io/docs/diff) to compare source metadata, or row data.
![sq diff](.images/sq_diff_table_data.png)
### Insert query results
`sq` query results can be [output](https://sq.io/docs/output) in various formats

View File

@ -60,19 +60,27 @@ func newDiffCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "diff @HANDLE1[.TABLE] @HANDLE2[.TABLE] [--data]",
Short: "BETA: Compare sources, or tables",
Long: `BETA: Compare the metadata or row data of sources or tables.
Long: `BETA: Compare metadata, or row data, of sources and tables.
CAUTION: This feature is in BETA testing. Please report any issues:
CAUTION: This feature is in beta testing. Please report any issues:
https://github.com/neilotoole/sq/issues/new/choose
When comparing sources ("source diff"), by default the source overview, schema,
and table row counts are compared. Row data is not compared.
When comparing sources ("source diff"), the default behavior is to diff the
source overview, schema, and table row counts. Table row data is not compared.
When comparing tables ("table diff"), by default the table schema and table
row counts are compared. Row data is not compared.
When comparing tables ("table diff"), the default is to diff table schema and
row counts. Table row data is not compared.
Use flags to specify the elements you want to compare. See the examples.
Use flags to specify the elements you want to compare. The available
elements are:
--overview source metadata, without schema (source diff only)
--dbprops database/server properties (source diff only)
--schema schema structure, for database or individual table
--counts show row counts when using --schema
--data row data values
--all all of the above
Flag --data diffs the values of each row in the compared tables. Use with
caution with large tables.
@ -88,7 +96,7 @@ available formats are:
The default format can be changed via:
$ sq config set diff.data.format
$ sq config set diff.data.format FORMAT
The --format flag only applies with data diffs (--data). Metadata diffs are
always output in YAML.
@ -98,7 +106,7 @@ Note that --overview and --dbprops only apply to source diffs, not table diffs.
Flag --unified (-U) controls the number of lines to show surrounding a diff.
The default (3) can be changed via:
$ sq config set diff.lines`,
$ sq config set diff.lines N`,
Args: cobra.ExactArgs(2),
ValidArgsFunction: (&handleTableCompleter{
handleRequired: true,
@ -122,7 +130,7 @@ The default (3) can be changed via:
$ sq diff @prod/sakila @staging/sakila --dbprops
# Compare source overview, and DB properties.
$ sq diff @prod/sakila @staging/sakila -OP
$ sq diff @prod/sakila @staging/sakila -OB
# Diff sources, but only compare schema.
$ sq diff @prod/sakila @staging/sakila --schema
@ -130,23 +138,23 @@ The default (3) can be changed via:
# Compare schema table structure, and row counts.
$ sq diff @prod/sakila @staging/sakila --SN
# Compare the data of each table. Caution: may be slow.
$ sq diff @prod/sakila @staging/sakila --data
# Compare everything, including table data. Caution: can be slow.
$ sq diff @prod/sakila @staging/sakila --all
# Compare metadata of actor table in prod vs staging
# Compare metadata of actor table in prod vs staging.
$ sq diff @prod/sakila.actor @staging/sakila.actor
Row data diff
-------------
# Compare data in the actor tables. Caution: can be slow.
# Compare data in the actor tables.
$ sq diff @prod/sakila.actor @staging/sakila.actor --data
# Compare data in the actor tables, but output in JSONL.
$ sq diff @prod/sakila.actor @staging/sakila.actor --data --format jsonl`,
$ sq diff @prod/sakila.actor @staging/sakila.actor --data --format jsonl
# Compare data in all tables and views. Caution: may be slow.
$ sq diff @prod/sakila @staging/sakila --data`,
}
addOptionFlag(cmd.Flags(), OptDiffNumLines)

View File

@ -29,25 +29,31 @@ database. The flag is disregarded when inspecting a table.
Use the --verbose flag to see more detail in some output formats.
If @HANDLE is not provided, the active data source is assumed.`,
Example: ` # Inspect active data source
Example: ` # Inspect active data source.
$ sq inspect
# Inspect @pg1 data source
# Inspect @pg1 data source.
$ sq inspect @pg1
# Inspect @pg1 data source, showing verbose output
# Inspect @pg1 data source, showing verbose output.
$ sq inspect -v @pg1
# Show DB properties for @pg1
# Show output in JSON (useful for piping to jq).
$ sq inspect --json @pg1
# Show output in YAML.
$ sq inspect --yaml @pg1
# Show DB properties for @pg1.
$ sq inspect --dbprops @pg1
# Inspect 'actor' in @pg1 data source
# Inspect table "actor" in @pg1 data source.
$ sq inspect @pg1.actor
# Inspect 'actor' in active data source
# Inspect "actor" in active data source.
$ sq inspect .actor
# Inspect piped data
# Inspect piped data.
$ cat data.xlsx | sq inspect`,
}

View File

@ -88,8 +88,8 @@ func (td *tableData) clone() *tableData { //nolint:unused // REVISIT: no longer
}
}
// sourceDiff is a container for a source diff.
type sourceDiff struct {
// sourceOverviewDiff is a container for a source overview diff.
type sourceOverviewDiff struct {
sd1, sd2 *sourceData
header string
diff string

View File

@ -43,7 +43,7 @@ func renderSourceMeta2YAML(sm *source.Metadata) (string, error) {
DBVersion: sm.DBVersion,
User: sm.User,
Size: sm.Size,
TableCount: int64(len(sm.Tables)),
TableCount: sm.TableCount,
}
b, err := ioz.MarshalYAML(smr)

View File

@ -44,7 +44,7 @@ func ExecSourceDiff(ctx context.Context, ru *run.Run, cfg *Config,
}
if elems.Overview {
srcDiff, err := buildSourceSummaryDiff(cfg, sd1, sd2)
srcDiff, err := buildSourceOverviewDiff(cfg, sd1, sd2)
if err != nil {
return err
}
@ -123,7 +123,7 @@ func buildSourceTableDiffs(ctx context.Context, cfg *Config, showRowCounts bool,
return diffs, nil
}
func buildSourceSummaryDiff(cfg *Config, sd1, sd2 *sourceData) (*sourceDiff, error) {
func buildSourceOverviewDiff(cfg *Config, sd1, sd2 *sourceData) (*sourceOverviewDiff, error) {
var (
body1, body2 string
err error
@ -148,10 +148,10 @@ func buildSourceSummaryDiff(cfg *Config, sd1, sd2 *sourceData) (*sourceDiff, err
return nil, errz.Err(err)
}
diff := &sourceDiff{
diff := &sourceOverviewDiff{
sd1: sd1,
sd2: sd2,
header: fmt.Sprintf("sq diff --summary %s %s", sd1.handle, sd2.handle),
header: fmt.Sprintf("sq diff --overview %s %s", sd1.handle, sd2.handle),
diff: unified,
}

View File

@ -162,7 +162,7 @@ const (
DiffSchemaUsage = "Compare schema structure"
DiffDBProps = "dbprops"
DiffDBPropsShort = "P"
DiffDBPropsShort = "B"
DiffDBPropsUsage = "Compare DB properties"
DiffRowCount = "count"

View File

@ -295,6 +295,8 @@ func getSourceMetadata(ctx context.Context, src *source.Source, db sqlz.DB) (*so
return nil, err
}
md.TableCount = int64(len(md.Tables))
return md, nil
}

View File

@ -273,6 +273,8 @@ current_setting('server_version'), version(), "current_user"()`
md.Tables = append(md.Tables, tblMetas[i])
}
}
md.TableCount = int64(len(tblNames))
return md, nil
}

View File

@ -815,7 +815,7 @@ func (d *database) TableMetadata(ctx context.Context, tblName string) (*source.T
func (d *database) SourceMetadata(ctx context.Context) (*source.Metadata, error) {
// https://stackoverflow.com/questions/9646353/how-to-find-sqlite-database-file-version
meta := &source.Metadata{Handle: d.src.Handle, Driver: Type, DBDriver: dbDrvr}
md := &source.Metadata{Handle: d.src.Handle, Driver: Type, DBDriver: dbDrvr}
dsn, err := PathFromLocation(d.src)
if err != nil {
@ -824,34 +824,36 @@ func (d *database) SourceMetadata(ctx context.Context) (*source.Metadata, error)
const q = "SELECT sqlite_version(), (SELECT name FROM pragma_database_list ORDER BY seq limit 1);"
err = d.db.QueryRowContext(ctx, q).Scan(&meta.DBVersion, &meta.Schema)
err = d.db.QueryRowContext(ctx, q).Scan(&md.DBVersion, &md.Schema)
if err != nil {
return nil, errw(err)
}
meta.DBProduct = "SQLite3 v" + meta.DBVersion
md.DBProduct = "SQLite3 v" + md.DBVersion
fi, err := os.Stat(dsn)
if err != nil {
return nil, errw(err)
}
meta.Size = fi.Size()
meta.Name = fi.Name()
meta.FQName = fi.Name() + "/" + meta.Schema
meta.Location = d.src.Location
md.Size = fi.Size()
md.Name = fi.Name()
md.FQName = fi.Name() + "/" + md.Schema
md.Location = d.src.Location
meta.Tables, err = getAllTblMeta(ctx, d.db)
md.Tables, err = getAllTblMeta(ctx, d.db)
if err != nil {
return nil, err
}
meta.DBProperties, err = getDBProperties(ctx, d.db)
md.TableCount = int64(len(md.Tables))
md.DBProperties, err = getDBProperties(ctx, d.db)
if err != nil {
return nil, err
}
return meta, nil
return md, nil
}
// Close implements driver.Database.

View File

@ -195,6 +195,8 @@ GROUP BY database_id) AS total_size_bytes`
md.Tables = append(md.Tables, tblMetas[i])
}
}
md.TableCount = int64(len(md.Tables))
return md, nil
}

View File

@ -47,10 +47,10 @@ type Metadata struct {
// Size is the physical size of the source in bytes, e.g. DB file size.
Size int64 `json:"size" yaml:"size"`
// TableCount is the number of talb
// TableCount is the count of tables (including views).
TableCount int64 `json:"table_count" yaml:"table_count"`
// Tables is the metadata for each table in the source.
// Tables is the metadata for each table/view in the source.
Tables []*TableMetadata `json:"tables"`
// DBProperties are name-value pairs from the DB.