2020-08-06 20:58:47 +03:00
|
|
|
package cli
|
|
|
|
|
|
|
|
import (
|
2022-12-25 07:04:18 +03:00
|
|
|
"bytes"
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"os"
|
2020-08-06 20:58:47 +03:00
|
|
|
"strings"
|
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
"github.com/neilotoole/sq/drivers/csv"
|
2023-04-26 18:16:42 +03:00
|
|
|
|
2023-04-19 08:28:09 +03:00
|
|
|
"github.com/neilotoole/sq/cli/flag"
|
|
|
|
|
2022-12-25 07:04:18 +03:00
|
|
|
"github.com/neilotoole/sq/cli/output"
|
2020-08-06 20:58:47 +03:00
|
|
|
"github.com/spf13/cobra"
|
2022-12-25 07:04:18 +03:00
|
|
|
"golang.org/x/term"
|
2020-08-06 20:58:47 +03:00
|
|
|
|
|
|
|
"github.com/neilotoole/sq/drivers/sqlite3"
|
2020-08-23 13:42:15 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/errz"
|
|
|
|
"github.com/neilotoole/sq/libsq/core/stringz"
|
2020-08-06 20:58:47 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/source"
|
|
|
|
)
|
|
|
|
|
2021-02-22 10:37:00 +03:00
|
|
|
func newSrcAddCmd() *cobra.Command {
|
2020-08-06 20:58:47 +03:00
|
|
|
cmd := &cobra.Command{
|
2023-04-16 01:28:51 +03:00
|
|
|
Use: "add [--handle @HANDLE] LOCATION",
|
|
|
|
RunE: execSrcAdd,
|
|
|
|
Args: cobra.ExactArgs(1),
|
2023-01-01 06:17:44 +03:00
|
|
|
Example: `
|
|
|
|
When adding a data source, LOCATION is the only required arg.
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2023-04-16 01:28:51 +03:00
|
|
|
$ sq add ./actor.csv
|
|
|
|
@actor csv actor.csv
|
|
|
|
|
|
|
|
Note that sq generated the handle "@actor". But you can explicitly specify
|
|
|
|
a handle.
|
|
|
|
|
|
|
|
# Add a postgres source with handle "@sakila/pg"
|
2023-05-03 15:36:10 +03:00
|
|
|
$ sq add --handle @sakila/pg 'postgres://user:pass@localhost/sakila'
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2023-04-16 01:28:51 +03:00
|
|
|
This handle format "@sakila/pg" includes a group, "sakila". Using a group
|
|
|
|
is entirely optional: it is a way to organize sources. For example:
|
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
$ sq add --handle @dev/pg 'postgres://user:pass@dev.db.example.com/sakila'
|
|
|
|
$ sq add --handle @prod/pg 'postgres://user:pass@prod.db.acme.com/sakila'
|
2023-04-16 01:28:51 +03:00
|
|
|
|
|
|
|
The format of LOCATION is driver-specific, but is generally a DB connection
|
2023-03-19 07:58:00 +03:00
|
|
|
string, a file path, or a URL.
|
2020-08-06 20:58:47 +03:00
|
|
|
|
|
|
|
DRIVER://USER:PASS@HOST:PORT/DBNAME
|
|
|
|
/path/to/local/file.ext
|
|
|
|
https://sq.io/data/test1.xlsx
|
|
|
|
|
|
|
|
If flag --handle is omitted, sq will generate a handle based
|
|
|
|
on LOCATION and the source driver type.
|
|
|
|
|
2022-12-25 07:04:18 +03:00
|
|
|
It's a security hazard to expose the data source password via
|
|
|
|
the LOCATION string. If flag --password (-p) is set, sq prompt the
|
|
|
|
user for the password:
|
|
|
|
|
|
|
|
$ sq add 'postgres://user@localhost/sakila' -p
|
|
|
|
Password: ****
|
|
|
|
|
|
|
|
However, if there's input on stdin, sq will read the password from
|
|
|
|
there instead of prompting the user:
|
|
|
|
|
|
|
|
# Add a source, but read password from an environment variable
|
2023-03-12 07:21:05 +03:00
|
|
|
$ export PASSWD='open:;"_Ses@me'
|
|
|
|
$ sq add 'postgres://user@localhost/sakila' -p <<< $PASSWD
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2022-12-25 07:04:18 +03:00
|
|
|
# Same as above, but instead read password from file
|
|
|
|
$ echo 'open:;"_Ses@me' > password.txt
|
|
|
|
$ sq add 'postgres://user@localhost/sakila' -p < password.txt
|
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
There are various driver-specific options available. For example:
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
$ sq add actor.csv --ingest.header=false --driver.csv.delim=colon
|
2022-12-25 07:04:18 +03:00
|
|
|
|
|
|
|
If flag --driver is omitted, sq will attempt to determine the
|
|
|
|
type from LOCATION via file suffix, content type, etc.. If the result
|
|
|
|
is ambiguous, explicitly specify the driver type.
|
2023-04-19 08:28:09 +03:00
|
|
|
|
2022-12-25 07:04:18 +03:00
|
|
|
$ sq add --driver=tsv ./mystery.data
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2023-03-19 07:58:00 +03:00
|
|
|
Available source driver types can be listed via "sq driver ls". At a
|
2022-12-25 07:04:18 +03:00
|
|
|
minimum, the following drivers are bundled:
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2023-04-19 08:28:09 +03:00
|
|
|
sqlite3 SQLite
|
|
|
|
postgres PostgreSQL
|
|
|
|
sqlserver Microsoft SQL Server / Azure SQL Edge
|
|
|
|
mysql MySQL
|
|
|
|
csv Comma-Separated Values
|
|
|
|
tsv Tab-Separated Values
|
|
|
|
json JSON
|
|
|
|
jsona JSON Array: LF-delimited JSON arrays
|
2021-02-22 10:37:00 +03:00
|
|
|
jsonl JSON Lines: LF-delimited JSON objects
|
2023-04-19 08:28:09 +03:00
|
|
|
xlsx Microsoft Excel XLSX
|
2022-12-25 07:04:18 +03:00
|
|
|
|
2023-03-19 07:58:00 +03:00
|
|
|
If there isn't already an active source, the newly added source becomes the
|
2023-04-16 01:28:51 +03:00
|
|
|
active source (but the active group does not change). Otherwise you can
|
|
|
|
use flag --active to make the new source active.
|
2023-03-19 07:58:00 +03:00
|
|
|
|
2022-12-25 07:04:18 +03:00
|
|
|
More examples:
|
|
|
|
|
|
|
|
# Add a source, but prompt user for password
|
|
|
|
$ sq add 'postgres://user@localhost/sakila' -p
|
|
|
|
Password: ****
|
|
|
|
|
|
|
|
# Explicitly set flags
|
2023-05-03 15:36:10 +03:00
|
|
|
$ sq add --handle @sakila_pg --driver postgres 'postgres://user:pass@localhost/sakila'
|
2022-12-25 07:04:18 +03:00
|
|
|
|
|
|
|
# Same as above, but with short flags
|
2023-05-03 15:36:10 +03:00
|
|
|
$ sq add -n @sakila_pg -d postgres 'postgres://user:pass@localhost/sakila'
|
2022-12-25 07:04:18 +03:00
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
# Add a SQL Server source; will have generated handle @sakila
|
2023-04-19 08:28:09 +03:00
|
|
|
$ sq add 'sqlserver://user:pass@localhost?database=sakila'
|
2023-03-19 09:18:54 +03:00
|
|
|
|
|
|
|
# Add a sqlite db, and immediately make it the active source
|
2023-04-19 08:28:09 +03:00
|
|
|
$ sq add ./testdata/sqlite1.db --active
|
2022-12-25 07:04:18 +03:00
|
|
|
|
|
|
|
# Add an Excel spreadsheet, with options
|
2023-05-03 15:36:10 +03:00
|
|
|
$ sq add ./testdata/test1.xlsx --ingest.header=true
|
2023-04-19 08:28:09 +03:00
|
|
|
|
2022-12-25 07:04:18 +03:00
|
|
|
# Add a CSV source, with options
|
2023-05-03 15:36:10 +03:00
|
|
|
$ sq add ./testdata/person.csv --ingest.header=true
|
2022-12-25 07:04:18 +03:00
|
|
|
|
|
|
|
# Add a CSV source from a URL (will be downloaded)
|
2023-04-16 01:28:51 +03:00
|
|
|
$ sq add https://sq.io/testdata/actor.csv
|
|
|
|
|
|
|
|
# Add a source, and make it the active source (and group)
|
2023-05-03 15:36:10 +03:00
|
|
|
$ sq add ./actor.csv --handle @csv/actor
|
|
|
|
|
|
|
|
# Add a currently unreachable source
|
|
|
|
$ sq add 'postgres://user:pass@db.offline.com/sakila' --skip-verify`,
|
2023-04-16 01:28:51 +03:00
|
|
|
Short: "Add data source",
|
|
|
|
Long: `Add data source specified by LOCATION, optionally identified by @HANDLE.`,
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
cmd.Flags().BoolP(flag.JSON, flag.JSONShort, false, flag.JSONUsage)
|
2023-05-05 17:32:50 +03:00
|
|
|
cmd.Flags().Bool(flag.Pretty, true, flag.PrettyUsage)
|
|
|
|
cmd.Flags().BoolP(flag.YAML, flag.YAMLShort, false, flag.YAMLUsage)
|
2023-05-03 15:36:10 +03:00
|
|
|
|
|
|
|
cmd.Flags().StringP(flag.AddDriver, flag.AddDriverShort, "", flag.AddDriverUsage)
|
|
|
|
panicOn(cmd.RegisterFlagCompletionFunc(flag.AddDriver, completeDriverType))
|
|
|
|
|
2023-04-19 08:28:09 +03:00
|
|
|
cmd.Flags().StringP(flag.Handle, flag.HandleShort, "", flag.HandleUsage)
|
|
|
|
cmd.Flags().BoolP(flag.PasswordPrompt, flag.PasswordPromptShort, false, flag.PasswordPromptUsage)
|
|
|
|
cmd.Flags().Bool(flag.SkipVerify, false, flag.SkipVerifyUsage)
|
|
|
|
cmd.Flags().BoolP(flag.AddActive, flag.AddActiveShort, false, flag.AddActiveUsage)
|
2023-04-26 18:16:42 +03:00
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
cmd.Flags().Bool(flag.IngestHeader, false, flag.IngestHeaderUsage)
|
|
|
|
|
|
|
|
cmd.Flags().Bool(flag.CSVEmptyAsNull, true, flag.CSVEmptyAsNullUsage)
|
|
|
|
cmd.Flags().String(flag.CSVDelim, flag.CSVDelimDefault, flag.CSVDelimUsage)
|
|
|
|
panicOn(cmd.RegisterFlagCompletionFunc(flag.CSVDelim, completeStrings(1, csv.NamedDelims()...)))
|
|
|
|
|
2021-02-22 10:37:00 +03:00
|
|
|
return cmd
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2021-02-22 10:37:00 +03:00
|
|
|
func execSrcAdd(cmd *cobra.Command, args []string) error {
|
|
|
|
rc := RunContextFrom(cmd.Context())
|
2020-08-06 20:58:47 +03:00
|
|
|
cfg := rc.Config
|
2023-01-01 06:17:44 +03:00
|
|
|
|
2020-08-06 20:58:47 +03:00
|
|
|
loc := source.AbsLocation(strings.TrimSpace(args[0]))
|
|
|
|
var err error
|
2023-04-22 06:36:32 +03:00
|
|
|
var typ source.DriverType
|
2020-10-20 18:05:43 +03:00
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
if cmdFlagChanged(cmd, flag.AddDriver) {
|
|
|
|
val, _ := cmd.Flags().GetString(flag.AddDriver)
|
2023-04-22 06:36:32 +03:00
|
|
|
typ = source.DriverType(strings.TrimSpace(val))
|
2020-08-06 20:58:47 +03:00
|
|
|
} else {
|
2023-04-22 06:36:32 +03:00
|
|
|
typ, err = rc.files.DriverType(cmd.Context(), loc)
|
2020-08-06 20:58:47 +03:00
|
|
|
if err != nil {
|
2020-10-20 18:05:43 +03:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
if typ == source.TypeNone {
|
2023-04-22 06:36:32 +03:00
|
|
|
return errz.Errorf("unable to determine driver type: use --driver flag")
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-26 18:16:42 +03:00
|
|
|
if rc.driverReg.ProviderFor(typ) == nil {
|
2023-04-22 06:36:32 +03:00
|
|
|
return errz.Errorf("unsupported driver type {%s}", typ)
|
2020-10-20 18:05:43 +03:00
|
|
|
}
|
|
|
|
|
2020-08-06 20:58:47 +03:00
|
|
|
var handle string
|
2023-04-19 08:28:09 +03:00
|
|
|
if cmdFlagChanged(cmd, flag.Handle) {
|
|
|
|
handle, _ = cmd.Flags().GetString(flag.Handle)
|
2020-08-06 20:58:47 +03:00
|
|
|
} else {
|
2023-04-19 08:28:09 +03:00
|
|
|
handle, err = source.SuggestHandle(rc.Config.Collection, typ, loc)
|
2020-08-06 20:58:47 +03:00
|
|
|
if err != nil {
|
|
|
|
return errz.Wrap(err, "unable to suggest a handle: use --handle flag")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if stringz.InSlice(source.ReservedHandles(), handle) {
|
|
|
|
return errz.Errorf("handle reserved for system use: %s", handle)
|
|
|
|
}
|
|
|
|
|
2023-04-16 01:28:51 +03:00
|
|
|
if err = source.ValidHandle(handle); err != nil {
|
2020-08-06 20:58:47 +03:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-04-19 08:28:09 +03:00
|
|
|
if cfg.Collection.IsExistingSource(handle) {
|
2020-08-06 20:58:47 +03:00
|
|
|
return errz.Errorf("source handle already exists: %s", handle)
|
|
|
|
}
|
|
|
|
|
|
|
|
if typ == sqlite3.Type {
|
2023-01-01 06:17:44 +03:00
|
|
|
// Special handling for SQLite, because it's a file-based DB.
|
|
|
|
loc, err = sqlite3.MungeLocation(loc)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-25 07:04:18 +03:00
|
|
|
// If the -p flag is set, sq looks for password input on stdin,
|
|
|
|
// or sq prompts the user.
|
2023-04-19 08:28:09 +03:00
|
|
|
if cmdFlagTrue(cmd, flag.PasswordPrompt) {
|
2022-12-25 07:09:20 +03:00
|
|
|
var passwd []byte
|
2023-05-03 15:36:10 +03:00
|
|
|
if passwd, err = readPassword(cmd.Context(), rc.Stdin, rc.Out, rc.writers.pr); err != nil {
|
2022-12-25 07:04:18 +03:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
if loc, err = source.LocationWithPassword(loc, string(passwd)); err != nil {
|
2022-12-25 07:04:18 +03:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
o, err := getSrcOptionsFromFlags(cmd.Flags(), rc.OptionsRegistry, typ)
|
2020-08-06 20:58:47 +03:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
src, err := newSource(
|
|
|
|
cmd.Context(),
|
|
|
|
rc.driverReg,
|
|
|
|
typ,
|
|
|
|
handle,
|
|
|
|
loc,
|
|
|
|
o,
|
|
|
|
)
|
2020-08-06 20:58:47 +03:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
if err = cfg.Collection.Add(src); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-04-19 08:28:09 +03:00
|
|
|
if cfg.Collection.Active() == nil || cmdFlagTrue(cmd, flag.AddActive) {
|
2023-03-19 09:18:54 +03:00
|
|
|
// If no current active data source, use this one, OR if
|
|
|
|
// flagAddActive is true.
|
2023-04-19 08:28:09 +03:00
|
|
|
if _, err = cfg.Collection.SetActive(src.Handle, false); err != nil {
|
2020-08-06 20:58:47 +03:00
|
|
|
return err
|
|
|
|
}
|
2023-04-16 01:28:51 +03:00
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
// However, we do not set the active group to be the new src's group.
|
2023-04-16 01:28:51 +03:00
|
|
|
// In UX testing, it led to confused users.
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2023-04-26 18:16:42 +03:00
|
|
|
drvr, err := rc.driverReg.DriverFor(src.Type)
|
2020-08-06 20:58:47 +03:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-04-19 08:28:09 +03:00
|
|
|
if !cmdFlagTrue(cmd, flag.SkipVerify) {
|
2022-12-25 07:04:18 +03:00
|
|
|
// Typically we want to ping the source before adding it.
|
2023-05-03 15:36:10 +03:00
|
|
|
// But, sometimes not, for example if a source is temporarily offline.
|
2022-12-25 07:04:18 +03:00
|
|
|
if err = drvr.Ping(cmd.Context(), src); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2023-04-22 16:37:07 +03:00
|
|
|
if err = rc.ConfigStore.Save(cmd.Context(), rc.Config); err != nil {
|
2020-08-06 20:58:47 +03:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
if src, err = rc.Config.Collection.Get(src.Handle); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-04-19 08:28:09 +03:00
|
|
|
return rc.writers.srcw.Source(rc.Config.Collection, src)
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
2022-12-25 07:04:18 +03:00
|
|
|
|
|
|
|
// readPassword reads a password from stdin pipe, or if nothing on stdin,
|
|
|
|
// it prints a prompt to stdout, and then accepts input (which must be
|
|
|
|
// followed by a return).
|
2023-04-22 06:36:32 +03:00
|
|
|
func readPassword(ctx context.Context, stdin *os.File, stdout io.Writer, pr *output.Printing) ([]byte, error) {
|
2022-12-25 07:09:20 +03:00
|
|
|
resultCh := make(chan []byte)
|
|
|
|
errCh := make(chan error)
|
2022-12-25 07:04:18 +03:00
|
|
|
|
|
|
|
// Check if there is something to read on STDIN.
|
2023-04-16 01:28:51 +03:00
|
|
|
stat, err := stdin.Stat()
|
|
|
|
if err != nil {
|
|
|
|
// Shouldn't happen
|
|
|
|
return nil, errz.Err(err)
|
|
|
|
}
|
2022-12-25 07:04:18 +03:00
|
|
|
if (stat.Mode() & os.ModeCharDevice) == 0 {
|
|
|
|
b, err := io.ReadAll(stdin)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
b = bytes.TrimSuffix(b, []byte("\n"))
|
|
|
|
return b, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Run this is a goroutine so that we can handle ctrl-c.
|
|
|
|
go func() {
|
|
|
|
buf := &bytes.Buffer{}
|
|
|
|
fmt.Fprint(buf, "Password: ")
|
2023-04-22 06:36:32 +03:00
|
|
|
pr.Faint.Fprint(buf, "[ENTER]")
|
2022-12-25 07:04:18 +03:00
|
|
|
fmt.Fprint(buf, " ")
|
|
|
|
stdout.Write(buf.Bytes())
|
|
|
|
|
|
|
|
b, err := term.ReadPassword(int(stdin.Fd()))
|
|
|
|
// Regardless of whether there's an error, we print
|
|
|
|
// newline for presentation.
|
|
|
|
fmt.Fprintln(stdout)
|
|
|
|
if err != nil {
|
|
|
|
errCh <- errz.Err(err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
resultCh <- b
|
|
|
|
}()
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
// Print newline so that cancel msg is printed on its own line.
|
|
|
|
fmt.Fprintln(stdout)
|
|
|
|
return nil, errz.Err(ctx.Err())
|
|
|
|
case err := <-errCh:
|
|
|
|
return nil, err
|
|
|
|
case b := <-resultCh:
|
|
|
|
return b, nil
|
|
|
|
}
|
|
|
|
}
|