2020-08-06 20:58:47 +03:00
|
|
|
// Package csv implements the sq driver for CSV/TSV et al.
|
|
|
|
package csv
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2020-08-23 13:42:15 +03:00
|
|
|
"database/sql"
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2023-04-02 22:49:45 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/lg/lga"
|
|
|
|
|
|
|
|
"github.com/neilotoole/sq/libsq/core/lg/lgm"
|
|
|
|
|
|
|
|
"github.com/neilotoole/sq/libsq/core/lg"
|
|
|
|
|
|
|
|
"golang.org/x/exp/slog"
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2020-08-23 13:42:15 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/errz"
|
2020-08-06 20:58:47 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/driver"
|
|
|
|
"github.com/neilotoole/sq/libsq/source"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// TypeCSV is the CSV driver type.
|
2023-04-22 06:36:32 +03:00
|
|
|
TypeCSV = source.DriverType("csv")
|
2020-08-06 20:58:47 +03:00
|
|
|
|
|
|
|
// TypeTSV is the TSV driver type.
|
2023-04-22 06:36:32 +03:00
|
|
|
TypeTSV = source.DriverType("tsv")
|
2020-08-06 20:58:47 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
// Provider implements driver.Provider.
|
|
|
|
type Provider struct {
|
2023-04-02 22:49:45 +03:00
|
|
|
Log *slog.Logger
|
2020-08-06 20:58:47 +03:00
|
|
|
Scratcher driver.ScratchDatabaseOpener
|
|
|
|
Files *source.Files
|
|
|
|
}
|
|
|
|
|
|
|
|
// DriverFor implements driver.Provider.
|
2023-04-22 06:36:32 +03:00
|
|
|
func (d *Provider) DriverFor(typ source.DriverType) (driver.Driver, error) {
|
2022-12-18 08:16:10 +03:00
|
|
|
switch typ { //nolint:exhaustive
|
2020-08-06 20:58:47 +03:00
|
|
|
case TypeCSV:
|
2020-08-23 13:42:15 +03:00
|
|
|
return &driveri{log: d.Log, typ: TypeCSV, scratcher: d.Scratcher, files: d.Files}, nil
|
2020-08-06 20:58:47 +03:00
|
|
|
case TypeTSV:
|
2020-08-23 13:42:15 +03:00
|
|
|
return &driveri{log: d.Log, typ: TypeTSV, scratcher: d.Scratcher, files: d.Files}, nil
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2023-04-02 22:49:45 +03:00
|
|
|
return nil, errz.Errorf("unsupported driver type {%s}", typ)
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Driver implements driver.Driver.
|
2020-08-23 13:42:15 +03:00
|
|
|
type driveri struct {
|
2023-04-02 22:49:45 +03:00
|
|
|
log *slog.Logger
|
2023-04-22 06:36:32 +03:00
|
|
|
typ source.DriverType
|
2020-08-06 20:58:47 +03:00
|
|
|
scratcher driver.ScratchDatabaseOpener
|
|
|
|
files *source.Files
|
|
|
|
}
|
|
|
|
|
|
|
|
// DriverMetadata implements driver.Driver.
|
2020-08-23 13:42:15 +03:00
|
|
|
func (d *driveri) DriverMetadata() driver.Metadata {
|
2020-08-06 20:58:47 +03:00
|
|
|
md := driver.Metadata{Type: d.typ, Monotable: true}
|
|
|
|
if d.typ == TypeCSV {
|
|
|
|
md.Description = "Comma-Separated Values"
|
|
|
|
md.Doc = "https://en.wikipedia.org/wiki/Comma-separated_values"
|
|
|
|
} else {
|
|
|
|
md.Description = "Tab-Separated Values"
|
|
|
|
md.Doc = "https://en.wikipedia.org/wiki/Tab-separated_values"
|
|
|
|
}
|
|
|
|
return md
|
|
|
|
}
|
|
|
|
|
2023-04-08 21:09:27 +03:00
|
|
|
// Open implements driver.DatabaseOpener.
|
2020-08-23 13:42:15 +03:00
|
|
|
func (d *driveri) Open(ctx context.Context, src *source.Source) (driver.Database, error) {
|
2023-04-30 17:18:56 +03:00
|
|
|
lg.From(ctx).Debug(lgm.OpenSrc, lga.Src, src)
|
|
|
|
|
2020-12-30 21:57:58 +03:00
|
|
|
dbase := &database{
|
2022-12-18 09:07:38 +03:00
|
|
|
log: d.log,
|
|
|
|
src: src,
|
2020-12-30 21:57:58 +03:00
|
|
|
files: d.files,
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2020-12-30 21:57:58 +03:00
|
|
|
var err error
|
2020-08-06 20:58:47 +03:00
|
|
|
dbase.impl, err = d.scratcher.OpenScratch(ctx, src.Handle)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-04-09 17:44:27 +03:00
|
|
|
if err = importCSV(ctx, src, d.files.OpenFunc(src), dbase.impl); err != nil {
|
2020-08-06 20:58:47 +03:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return dbase, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Truncate implements driver.Driver.
|
2023-04-01 11:38:32 +03:00
|
|
|
func (d *driveri) Truncate(_ context.Context, _ *source.Source, _ string, _ bool) (int64, error) {
|
2020-08-06 20:58:47 +03:00
|
|
|
return 0, errz.Errorf("truncate not supported for %s", d.DriverMetadata().Type)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ValidateSource implements driver.Driver.
|
2020-08-23 13:42:15 +03:00
|
|
|
func (d *driveri) ValidateSource(src *source.Source) (*source.Source, error) {
|
2020-08-06 20:58:47 +03:00
|
|
|
if src.Type != d.typ {
|
2023-04-22 06:36:32 +03:00
|
|
|
return nil, errz.Errorf("expected driver type {%s} but got {%s}", d.typ, src.Type)
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if src.Options != nil || len(src.Options) > 0 {
|
2023-04-26 18:16:42 +03:00
|
|
|
d.log.Error("FIXME: need to validate source")
|
|
|
|
// FIXME: Validate source
|
|
|
|
|
|
|
|
// d.log.Debug("Validating source",
|
|
|
|
// lga.Src, src,
|
|
|
|
// lga.Opts, src.Options.Encode(),
|
|
|
|
// )
|
|
|
|
//
|
|
|
|
// key := "header"
|
|
|
|
// v := src.Options.Get(key)
|
|
|
|
//
|
|
|
|
// if v != "" {
|
|
|
|
// _, err := strconv.ParseBool(v)
|
|
|
|
// if err != nil {
|
|
|
|
// return nil, errz.Wrapf(err, "unable to parse option {%s} having value {%s}", key, v)
|
|
|
|
// }
|
|
|
|
// }
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return src, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ping implements driver.Driver.
|
2023-04-01 11:38:32 +03:00
|
|
|
func (d *driveri) Ping(_ context.Context, src *source.Source) error {
|
2020-08-23 13:42:15 +03:00
|
|
|
r, err := d.files.Open(src)
|
2020-08-06 20:58:47 +03:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2023-04-02 22:49:45 +03:00
|
|
|
defer lg.WarnIfCloseError(d.log, lgm.CloseFileReader, r)
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2020-08-23 13:42:15 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// database implements driver.Database.
|
|
|
|
type database struct {
|
2023-04-02 22:49:45 +03:00
|
|
|
log *slog.Logger
|
2022-12-18 09:07:38 +03:00
|
|
|
src *source.Source
|
|
|
|
impl driver.Database
|
2020-08-23 13:42:15 +03:00
|
|
|
files *source.Files
|
|
|
|
}
|
|
|
|
|
|
|
|
// DB implements driver.Database.
|
|
|
|
func (d *database) DB() *sql.DB {
|
|
|
|
return d.impl.DB()
|
|
|
|
}
|
|
|
|
|
|
|
|
// SQLDriver implements driver.Database.
|
|
|
|
func (d *database) SQLDriver() driver.SQLDriver {
|
|
|
|
return d.impl.SQLDriver()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Source implements driver.Database.
|
|
|
|
func (d *database) Source() *source.Source {
|
|
|
|
return d.src
|
|
|
|
}
|
|
|
|
|
|
|
|
// TableMetadata implements driver.Database.
|
|
|
|
func (d *database) TableMetadata(ctx context.Context, tblName string) (*source.TableMetadata, error) {
|
|
|
|
if tblName != source.MonotableName {
|
|
|
|
return nil, errz.Errorf("table name should be %s for CSV/TSV etc., but got: %s",
|
|
|
|
source.MonotableName, tblName)
|
|
|
|
}
|
|
|
|
|
|
|
|
srcMeta, err := d.SourceMetadata(ctx)
|
2020-08-06 20:58:47 +03:00
|
|
|
if err != nil {
|
2020-08-23 13:42:15 +03:00
|
|
|
return nil, err
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2020-08-23 13:42:15 +03:00
|
|
|
// There will only ever be one table for CSV.
|
|
|
|
return srcMeta.Tables[0], nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// SourceMetadata implements driver.Database.
|
|
|
|
func (d *database) SourceMetadata(ctx context.Context) (*source.Metadata, error) {
|
|
|
|
md, err := d.impl.SourceMetadata(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
md.Handle = d.src.Handle
|
|
|
|
md.Location = d.src.Location
|
|
|
|
md.SourceType = d.src.Type
|
|
|
|
|
|
|
|
md.Name, err = source.LocationFileName(d.src)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
md.Size, err = d.files.Size(d.src)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
md.FQName = md.Name
|
|
|
|
return md, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close implements driver.Database.
|
|
|
|
func (d *database) Close() error {
|
2023-04-02 22:49:45 +03:00
|
|
|
d.log.Debug(lgm.CloseDB, lga.Src, d.src)
|
2020-08-23 13:42:15 +03:00
|
|
|
|
2020-12-30 21:57:58 +03:00
|
|
|
return errz.Err(d.impl.Close())
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|