diff --git a/.lnav.json b/.lnav.json deleted file mode 100644 index 45071d67..00000000 --- a/.lnav.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "sq_log": { - "title": "sq", - "url": "https://sq.io", - "description": "Log format for sq", - "json": true, - "hide-extra": false, - "file-pattern": "sq.log", - "multiline": true, - "line-format": [ - { - "field": "__timestamp__", - "timestamp-format": "%H:%M:%S.%L" - }, - "\t", - { - "field": "level", - "text-transform": "uppercase" - }, - "\t", - { - "field": "caller", - "max-width": 72, - "min-width": 72, - "overflow": "dot-dot" - }, - " ", - { - "field": "msg" - } - ], - "level-field": "level", - "level": { - "error": "error", - "debug": "debug", - "warning": "warn" - }, - "highlights": { - "caller": { - "pattern": "caller", - "underline": true - } - }, - "timestamp-field": "time", - "body-field": "msg", - "value": { - "time": { - "kind": "string", - "identifier": true - }, - "level": { - "kind": "string", - "identifier": true - }, - "caller": { - "kind": "string", - "identifier": true - }, - "msg": { - "kind": "quoted", - "identifier": false - } - } - } -} \ No newline at end of file diff --git a/README.md b/README.md index bbc1c823..081919a4 100644 --- a/README.md +++ b/README.md @@ -3,19 +3,25 @@ ![Main pipeline](https://github.com/neilotoole/sq/actions/workflows/main.yml/badge.svg) -# sq: data wrangler +# sq data wrangler `sq` is a command line tool that provides `jq`-style access to structured data sources: SQL databases, or document formats like CSV or Excel. ![sq](./splash.png) -`sq` can perform cross-source joins, -execute database-native SQL, and output to a multitude of formats including JSON, -Excel, CSV, HTML, Markdown and XML, or insert directly to a SQL database. -`sq` can also inspect sources to view metadata about the source structure (tables, -columns, size) and has commands for common database operations such as copying -or dropping tables. +`sq` executes jq-like [queries](https://sq.io/docs/query), or database-native [SQL](https://sq.io/docs/cmd/sql/). +It can perform cross-source [joins](https://sq.io/docs/query/#cross-source-joins). + +`sq` outputs to a multitude of formats including [JSON](https://sq.io/docs/output#json), +[Excel](https://sq.io/docs/output#xlsx), [CSV](https://sq.io/docs/output#csv), +[HTML](https://sq.io/docs/output#html), [Markdown](https://sq.io/docs/output#markdown) +and [XML](https://sq.io/docs/output#xml), and can [insert](https://sq.io/docs/output#insert) query +results directly to a SQL database. +`sq` can also [inspect](https://sq.io/docs/cmd/inspect) sources to view metadata about the source structure (tables, +columns, size) and has commands for common database operations to +[copy](https://sq.io/docs/cmd/tbl-copy), [truncate](https://sq.io/docs/cmd/tbl-truncate), +and [drop](https://sq.io/docs/cmd/tbl-drop) tables. Find out more at [sq.io](https://sq.io). @@ -49,64 +55,68 @@ go install github.com/neilotoole/sq See other [install options](https://sq.io/docs/install/). -## Quickstart +## Overview Use `sq help` to see command help. Docs are over at [sq.io](https://sq.io). -Read the [overview](https://sq.io/docs/overview/), and the +Read the [overview](https://sq.io/docs/overview/), and [tutorial](https://sq.io/docs/tutorial/). The [cookbook](https://sq.io/docs/cookbook/) has -recipes for common tasks. +recipes for common tasks, and the [query guide](https://sq.io/docs/query) covers `sq`'s query language. The major concept is: `sq` operates on data sources, which are treated as SQL databases (even if the source is really a CSV or XLSX file etc.). -In a nutshell, you `sq add` a source (giving it a `handle`), and then execute commands against the +In a nutshell, you [`sq add`](https://sq.io/docs/cmd/add) a source (giving it a [`handle`](https://sq.io/docs/concepts#handle)), and then execute commands against the source. ### Sources -Initially there are no sources. +Initially there are no [sources](https://sq.io/docs/source). ```shell $ sq ls ``` -Let's add a source. First we'll add a SQLite database, but this could also be Postgres, -SQL Server, Excel, etc. Download the sample DB, and `sq add` the source. We -use `-h` to specify a _handle_ to use. +Let's [add](https://sq.io/docs/cmd/add) a source. First we'll add a [SQLite](https://sq.io/docs/drivers/sqlite) +database, but this could also be [Postgres](https://sq.io/docs/drivers/postgres), +[SQL Server](https://sq.io/docs/drivers/sqlserver), [Excel](https://sq.io/docs/drivers/xlsx), etc. +Download the sample DB, and `sq add` the source. ```shell $ wget https://sq.io/testdata/sakila.db -$ sq add ./sakila.db -h @sakila_sl3 -@sakila_sl3 sqlite3 sakila.db +$ sq add ./sakila.db +@sakila sqlite3 sakila.db $ sq ls -v -HANDLE DRIVER LOCATION OPTIONS -@sakila_sl3* sqlite3 sqlite3:/root/sakila.db +HANDLE ACTIVE DRIVER LOCATION OPTIONS +@sakila active sqlite3 sqlite3:///Users/demo/sakila.db -$ sq ping @sakila_sl3 -@sakila_sl3 1ms pong +$ sq ping @sakila +@sakila 1ms pong $ sq src -@sakila_sl3 sqlite3 sakila.db +@sakila sqlite3 sakila.db ``` -The `sq ping` command simply pings the source to verify that it's available. +The [`sq ping`](https://sq.io/docs/cmd/ping) command simply pings the source +to verify that it's available. -`sq src` lists the _active source_, which in our case is `@sakila_sl3`. +[`sq src`](https://sq.io/docs/cmd/src) lists the [_active source_](https://sq.io/docs/source#active-source), which in our +case is `@sakila`. You can change the active source using `sq src @other_src`. When there's an active source specified, you can usually omit the handle from `sq` commands. Thus you could instead do: ```shell $ sq ping -@sakila_sl3 1ms pong +@sakila 1ms pong ``` ### Query -Fundamentally, `sq` is for querying data. Using our jq-style syntax: +Fundamentally, `sq` is for querying data. The jq-style syntax is covered in +detail in the [query guide](https://sq.io/docs/query). ```shell $ sq '.actor | .actor_id < 100 | .[0:3]' @@ -117,7 +127,7 @@ actor_id first_name last_name last_update ``` The above query selected some rows from the `actor` table. You could also -use native SQL, e.g.: +use [native SQL](https://sq.io/docs/cmd/sql), e.g.: ```shell $ sq sql 'SELECT * FROM actor WHERE actor_id < 100 LIMIT 3' @@ -131,11 +141,12 @@ But we're flying a bit blind here: how did we know about the `actor` table? ### Inspect -`sq inspect` is your friend (output abbreviated): +[`sq inspect`](https://sq.io/docs/cmd/inspect) is your friend (output abbreviated): ```shell -HANDLE DRIVER NAME FQ NAME SIZE TABLES LOCATION -@sakila_sl3 sqlite3 sakila.db sakila.db/main 5.6MB 21 sqlite3:/Users/neilotoole/work/sq/sq/drivers/sqlite3/testdata/sakila.db +$ sq inspect +HANDLE DRIVER NAME FQ NAME SIZE TABLES LOCATION +@sakila sqlite3 sakila.db sakila.db/main 5.6MB 21 sqlite3:///Users/demo/sakila.db TABLE ROWS COL NAMES actor 200 actor_id, first_name, last_name, last_update @@ -143,36 +154,13 @@ address 603 address_id, address, address2, district, city_id, category 16 category_id, name, last_update ``` -Use the `--verbose` (`-v`) flag to see more detail. And use `--json` (`-j`) to output in JSON (output abbreviated): +Use [`sq inspect -v`](https://sq.io/docs/output#verbose) to see more detail. +Or use [`-j`](https://sq.io/docs/output#json) to get JSON output: -```shell -$ sq inspect -j -{ - "handle": "@sakila_sl3", - "name": "sakila.db", - "driver": "sqlite3", - "db_version": "3.31.1", - "location": "sqlite3:///root/sakila.db", - "size": 5828608, - "tables": [ - { - "name": "actor", - "table_type": "table", - "row_count": 200, - "columns": [ - { - "name": "actor_id", - "position": 0, - "primary_key": true, - "base_type": "numeric", - "column_type": "numeric", - "kind": "decimal", - "nullable": false - } -``` +![sq inspect -j](https://sq.io/images/sq_inspect_sakila_sqlite_json.png) -Combine `sq inspect` with [jq](https://stedolan.github.io/jq/) for some useful capabilities. Here's -how to [list](https://sq-web.netlify.app/docs/cookbook/#list-table-names) +Combine `sq inspect` with [jq](https://stedolan.github.io/jq/) for some useful capabilities. +Here's how to [list](https://sq.io/docs/cookbook/#list-table-names) all the table names in the active source: ```shell @@ -201,33 +189,35 @@ category.csv customer.csv film_actor.csv film_text.csv payment.csv sale Note that you can also inspect an individual table: ```shell -$ sq inspect -v @sakila_sl3.actor +$ sq inspect -v @sakila.actor TABLE ROWS TYPE SIZE NUM COLS COL NAMES COL TYPES actor 200 table - 4 actor_id, first_name, last_name, last_update numeric, VARCHAR(45), VARCHAR(45), TIMESTAMP - ``` -### Insert Output Into Database Source +### Insert query results -`sq` query results can be output in various formats (JSON, XML, CSV, etc), and can also be " -outputted" as an *insert* into database sources. +`sq` query results can be [output](https://sq.io/docs/output) in various formats +(JSON, XML, CSV, etc), and can also be "outputted" as an +[*insert*](https://sq.io/docs/output#insert) into database sources. -That is, you can use `sq` to insert results from a Postgres query into a MySQL table, or copy an -Excel worksheet into a SQLite table, or a push a CSV file into a SQL Server table etc. +That is, you can use `sq` to insert results from a Postgres query into a MySQL table, +or copy an Excel worksheet into a SQLite table, or a push a CSV file into +a SQL Server table etc. -> **Note:** If you want to copy a table inside the same (database) source, use `sq tbl copy` -> instead, which uses the database's native table copy functionality. +> **Note:** If you want to copy a table inside the same (database) source, +> use [`sq tbl copy`](https://sq.io/docs/cmd/tbl-copy) instead, which uses the database's native table copy functionality. -For this example, we'll insert an Excel worksheet into our `@sakila_sl3` SQLite database. First, we +For this example, we'll insert an Excel worksheet into our `@sakila` +SQLite database. First, we download the XLSX file, and `sq add` it as a source. ```shell $ wget https://sq.io/testdata/xl_demo.xlsx -$ sq add ./xl_demo.xlsx --opts header=true -@xl_demo_xlsx xlsx xl_demo.xlsx +$ sq add ./xl_demo.xlsx --ingest.header=true +@xl_demo xlsx xl_demo.xlsx -$ sq @xl_demo_xlsx.person +$ sq @xl_demo.person uid username email address_id 1 neilotoole neilotoole@apache.org 1 2 ksoze kaiser@soze.org 2 @@ -235,18 +225,19 @@ uid username email address_id [...] ``` -Now, execute the same query, but this time `sq` inserts the results into a new table (`person`) -in `@sakila_sl3`: +Now, execute the same query, but this time `sq` inserts the results into a new +table (`person`) +in the SQLite `@sakila` source: ```shell -$ sq @xl_demo_xlsx.person --insert @sakila_sl3.person -Inserted 7 rows into @sakila_sl3.person +$ sq @xl_demo.person --insert @sakila.person +Inserted 7 rows into @sakila.person -$ sq inspect -v @sakila_sl3.person -TABLE ROWS TYPE SIZE NUM COLS COL NAMES COL TYPES -person 7 table - 4 uid, username, email, address_id INTEGER, TEXT, TEXT, INTEGER +$ sq inspect @sakila.person +TABLE ROWS COL NAMES +person 7 uid, username, email, address_id -$ sq @sakila_sl3.person +$ sq @sakila.person uid username email address_id 1 neilotoole neilotoole@apache.org 1 2 ksoze kaiser@soze.org 2 @@ -254,17 +245,11 @@ uid username email address_id [...] ``` -### Cross-Source Join +### Cross-source join -`sq` has rudimentary support for cross-source joins. That is, you can join an Excel worksheet with a +`sq` has rudimentary support for cross-source [joins](https://sq.io/docs/query#join). That is, you can join an Excel worksheet with a CSV file, or Postgres table, etc. -> **Note:** The current mechanism for these joins is highly naive: `sq` copies the joined table from -> each source to a "scratch database" (SQLite by default), and then performs the JOIN using the -> scratch database's SQL interface. Thus, performance is abysmal for larger tables. There are -> massive -> optimizations to be made, but none have been implemented yet. - See the [tutorial](https://sq.io/docs/tutorial/#join) for further details, but given an Excel source `@xl_demo` and a CSV source `@csv_demo`, you can do: @@ -278,26 +263,29 @@ augustus@caesar.org Ulan Bator plato@athens.gr Washington ``` -### Table Commands +### Table commands -`sq` provides several handy commands for working with tables. Note that these commands work directly +`sq` provides several handy commands for working with tables: +[`tbl copy`](/docs/cmd/tbl-copy), [`tbl truncate`](/docs/cmd/tbl-truncate) +and [`tbl drop`](/docs/cmd/tbl-drop). +Note that these commands work directly against SQL database sources, using their native SQL commands. ```shell $ sq tbl copy .actor .actor_copy -Copied table: @sakila_sl3.actor --> @sakila_sl3.actor_copy (200 rows copied) +Copied table: @sakila.actor --> @sakila.actor_copy (200 rows copied) $ sq tbl truncate .actor_copy -Truncated 200 rows from @sakila_sl3.actor_copy +Truncated 200 rows from @sakila.actor_copy $ sq tbl drop .actor_copy -Dropped table @sakila_sl3.actor_copy +Dropped table @sakila.actor_copy ``` -### UNIX Pipes +### UNIX pipes -For file-based sources (such as CSV or XLSX), you can `sq add` the source file, but you can also -pipe it: +For file-based sources (such as CSV or XLSX), you can `sq add` the source file, +but you can also pipe it: ```shell $ cat ./example.xlsx | sq .Sheet1 @@ -309,10 +297,10 @@ Similarly, you can inspect: $ cat ./example.xlsx | sq inspect ``` -## Data Source Drivers +## Drivers -`sq` knows how to deal with a data source type via a _driver_ implementation. To view the -installed/supported drivers: +`sq` knows how to deal with a data source type via a [driver](https://sq.io/docs/drivers) +implementation. To view the installed/supported drivers: ```shell $ sq driver ls @@ -329,22 +317,22 @@ jsonl JSON Lines: LF-delimited JSON objects xlsx Microsoft Excel XLSX ``` -## Output Formats +## Output formats -`sq` has many output formats: +`sq` has many [output formats](https://sq.io/docs/output): -- `--table`: Text/Table -- `--json`: JSON -- `--jsona`: JSON Array -- `--jsonl`: JSON Lines -- `--csv` / `--tsv` : CSV / TSV -- `--xlsx`: XLSX (Microsoft Excel) -- `--html`: HTML -- `--xml`: XML -- `--markdown`: Markdown -- `--raw`: Raw (bytes) +- `--text`: [Text](https://sq.io/docs/output#text) +- `--json`: [JSON](https://sq.io/docs/output#json) +- `--jsona`: [JSON Array](https://sq.io/docs/output#jsona) +- `--jsonl`: [JSON Lines](https://sq.io/docs/output#jsonl) +- `--csv` / `--tsv` : [CSV](https://sq.io/docs/output#csv) / [TSV](https://sq.io/docs/output#tsv) +- `--xlsx`: [XLSX](https://sq.io/docs/output#xlsx) (Microsoft Excel) +- `--html`: [HTML](https://sq.io/docs/output#html) +- `--xml`: [XML](https://sq.io/docs/output#xml) +- `--markdown`: [Markdown](https://sq.io/docs/output#markdown) +- `--raw`: [Raw](https://sq.io/docs/output#raw) (bytes) -## Changelog +## CHANGELOG See [CHANGELOG.md](./CHANGELOG.md). @@ -361,8 +349,9 @@ See [CHANGELOG.md](./CHANGELOG.md). - The [_Sakila_](https://dev.mysql.com/doc/sakila/en/) example databases were lifted from [jOOQ](https://github.com/jooq/jooq), which in turn owe their heritage to earlier work on Sakila. +- Date rendering via [`ncruces/go-strftime`](https://github.com/ncruces/go-strftime). -## Similar / Related / Noteworthy Projects +## Similar, related, or noteworthy projects - [usql](https://github.com/xo/usql) - [textql](https://github.com/dinedal/textql) diff --git a/cli/cli.go b/cli/cli.go index a70eb5fb..bb53c99c 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -278,6 +278,8 @@ func applyFlagAliases(f *pflag.FlagSet, name string) pflag.NormalizedName { case "table": // Legacy: flag --text was once named --table. name = flag.Text + case "md": + name = flag.Markdown default: } return pflag.NormalizedName(name) diff --git a/cli/cli_test.go b/cli/cli_test.go index 7848b15e..571bacb5 100644 --- a/cli/cli_test.go +++ b/cli/cli_test.go @@ -89,6 +89,7 @@ func TestCreateTblTestBytes(t *testing.T) { data := []any{fixt.GopherFilename, fBytes} require.Equal(t, int64(1), th.CreateTable(true, src, tblDef, data)) + t.Logf(src.Location) th.DropTable(src, tblDef.Name) } diff --git a/cli/config/yamlstore/upgrades/v0.34.0/upgrade.go b/cli/config/yamlstore/upgrades/v0.34.0/upgrade.go index b92e585a..3e62b35b 100644 --- a/cli/config/yamlstore/upgrades/v0.34.0/upgrade.go +++ b/cli/config/yamlstore/upgrades/v0.34.0/upgrade.go @@ -40,6 +40,10 @@ func Upgrade(ctx context.Context, before []byte) (after []byte, err error) { m["options"] = m["defaults"] delete(m, "defaults") + if m["options"] == nil { + m["options"] = map[string]any{} + } + opts, ok := m["options"].(map[string]any) if !ok { return nil, errz.Errorf("corrupt config: invalid 'options' field") diff --git a/cli/flag/flag.go b/cli/flag/flag.go index 98f10692..507937c8 100644 --- a/cli/flag/flag.go +++ b/cli/flag/flag.go @@ -53,7 +53,7 @@ const ( JSONLShort = "l" JSONLUsage = "Output LF-delimited JSON objects" - Markdown = "md" + Markdown = "markdown" MarkdownUsage = "Output Markdown" AddActive = "active" diff --git a/cli/writers.go b/cli/writers.go index 2a1a274e..e9bc8d80 100644 --- a/cli/writers.go +++ b/cli/writers.go @@ -57,7 +57,7 @@ command, sq falls back to "text". Available formats: text, csv, tsv, xlsx, json, jsona, jsonl, - markdown, html, xml, yaml, raw`, + markdown, html, xlsx, xml, yaml, raw`, ) OptVerbose = options.NewBool( diff --git a/splash.png b/splash.png index 97be955a..8365a670 100644 Binary files a/splash.png and b/splash.png differ