2020-08-23 13:42:15 +03:00
|
|
|
// Package json implements the sq driver for JSON. There are three
|
|
|
|
// supported types:
|
|
|
|
// - JSON: plain old JSON
|
|
|
|
// - JSONA: JSON Array, where each record is an array of JSON values on its own line.
|
|
|
|
// - JSONL: JSON Lines, where each record a JSON object on its own line.
|
|
|
|
package json
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"database/sql"
|
2024-01-25 07:01:24 +03:00
|
|
|
"io"
|
2023-08-12 21:54:14 +03:00
|
|
|
"log/slog"
|
2020-08-23 13:42:15 +03:00
|
|
|
|
|
|
|
"github.com/neilotoole/sq/libsq/core/cleanup"
|
|
|
|
"github.com/neilotoole/sq/libsq/core/errz"
|
2023-11-20 04:06:36 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/lg"
|
|
|
|
"github.com/neilotoole/sq/libsq/core/lg/lga"
|
|
|
|
"github.com/neilotoole/sq/libsq/core/lg/lgm"
|
2024-01-15 04:45:34 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/options"
|
2020-08-23 13:42:15 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/driver"
|
2024-01-25 09:29:55 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/files"
|
2020-08-23 13:42:15 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/source"
|
2023-11-21 00:42:38 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/source/drivertype"
|
2024-01-25 09:29:55 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/source/location"
|
2023-11-21 00:42:38 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/source/metadata"
|
2020-08-23 13:42:15 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
// Provider implements driver.Provider.
|
|
|
|
type Provider struct {
|
2024-01-15 04:45:34 +03:00
|
|
|
Log *slog.Logger
|
|
|
|
Ingester driver.GripOpenIngester
|
2024-01-25 09:29:55 +03:00
|
|
|
Files *files.Files
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// DriverFor implements driver.Provider.
|
2023-11-21 00:42:38 +03:00
|
|
|
func (d *Provider) DriverFor(typ drivertype.Type) (driver.Driver, error) {
|
2024-01-15 04:45:34 +03:00
|
|
|
var ingestFn ingestFunc
|
2020-08-23 13:42:15 +03:00
|
|
|
|
2022-12-18 03:51:33 +03:00
|
|
|
switch typ { //nolint:exhaustive
|
2024-01-25 09:29:55 +03:00
|
|
|
case drivertype.JSON:
|
2024-01-15 04:45:34 +03:00
|
|
|
ingestFn = ingestJSON
|
2024-01-25 09:29:55 +03:00
|
|
|
case drivertype.JSONA:
|
2024-01-15 04:45:34 +03:00
|
|
|
ingestFn = ingestJSONA
|
2024-01-25 09:29:55 +03:00
|
|
|
case drivertype.JSONL:
|
2024-01-15 04:45:34 +03:00
|
|
|
ingestFn = ingestJSONL
|
2020-08-23 13:42:15 +03:00
|
|
|
default:
|
2023-04-02 22:49:45 +03:00
|
|
|
return nil, errz.Errorf("unsupported driver type {%s}", typ)
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return &driveri{
|
2024-01-15 04:45:34 +03:00
|
|
|
typ: typ,
|
|
|
|
ingester: d.Ingester,
|
|
|
|
files: d.Files,
|
|
|
|
ingestFn: ingestFn,
|
2020-08-23 13:42:15 +03:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Driver implements driver.Driver.
|
|
|
|
type driveri struct {
|
2024-01-15 04:45:34 +03:00
|
|
|
ingester driver.GripOpenIngester
|
2024-01-27 10:11:24 +03:00
|
|
|
ingestFn ingestFunc
|
2024-01-25 09:29:55 +03:00
|
|
|
files *files.Files
|
2024-01-27 10:11:24 +03:00
|
|
|
typ drivertype.Type
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// DriverMetadata implements driver.Driver.
|
|
|
|
func (d *driveri) DriverMetadata() driver.Metadata {
|
|
|
|
md := driver.Metadata{Type: d.typ, Monotable: true}
|
|
|
|
|
2022-12-18 03:51:33 +03:00
|
|
|
switch d.typ { //nolint:exhaustive
|
2024-01-25 09:29:55 +03:00
|
|
|
case drivertype.JSON:
|
2020-08-23 13:42:15 +03:00
|
|
|
md.Description = "JSON"
|
|
|
|
md.Doc = "https://en.wikipedia.org/wiki/JSON"
|
2024-01-25 09:29:55 +03:00
|
|
|
case drivertype.JSONA:
|
2020-08-23 13:42:15 +03:00
|
|
|
md.Description = "JSON Array: LF-delimited JSON arrays"
|
|
|
|
md.Doc = "https://en.wikipedia.org/wiki/JSON"
|
2024-01-25 09:29:55 +03:00
|
|
|
case drivertype.JSONL:
|
2020-08-23 13:42:15 +03:00
|
|
|
md.Description = "JSON Lines: LF-delimited JSON objects"
|
|
|
|
md.Doc = "https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON"
|
|
|
|
}
|
|
|
|
|
|
|
|
return md
|
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
// Open implements driver.Driver.
|
|
|
|
func (d *driveri) Open(ctx context.Context, src *source.Source) (driver.Grip, error) {
|
|
|
|
log := lg.FromContext(ctx)
|
|
|
|
log.Debug(lgm.OpenSrc, lga.Src, src)
|
2023-04-30 17:18:56 +03:00
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
g := &grip{
|
|
|
|
log: log,
|
|
|
|
src: src,
|
|
|
|
clnup: cleanup.New(),
|
|
|
|
files: d.files,
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
allowCache := driver.OptIngestCache.Get(options.FromContext(ctx))
|
2020-08-23 13:42:15 +03:00
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
ingestFn := func(ctx context.Context, destGrip driver.Grip) error {
|
2024-01-25 19:21:56 +03:00
|
|
|
job := &ingestJob{
|
2024-01-25 07:01:24 +03:00
|
|
|
fromSrc: src,
|
|
|
|
newRdrFn: func(ctx context.Context) (io.ReadCloser, error) {
|
|
|
|
log.Debug("JSON ingest job newRdrFn", lga.Src, src)
|
|
|
|
return d.files.NewReader(ctx, src, false)
|
|
|
|
},
|
2024-01-15 04:45:34 +03:00
|
|
|
destGrip: destGrip,
|
|
|
|
sampleSize: driver.OptIngestSampleSize.Get(src.Options),
|
|
|
|
flatten: true,
|
2024-01-25 19:21:56 +03:00
|
|
|
stmtCache: map[string]*driver.StmtExecer{},
|
2024-01-15 04:45:34 +03:00
|
|
|
}
|
2020-10-20 18:05:43 +03:00
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
return d.ingestFn(ctx, job)
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
var err error
|
|
|
|
if g.impl, err = d.ingester.OpenIngest(ctx, src, allowCache, ingestFn); err != nil {
|
2020-08-23 13:42:15 +03:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
return g, nil
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// ValidateSource implements driver.Driver.
|
|
|
|
func (d *driveri) ValidateSource(src *source.Source) (*source.Source, error) {
|
|
|
|
if src.Type != d.typ {
|
2023-04-22 06:36:32 +03:00
|
|
|
return nil, errz.Errorf("expected driver type {%s} but got {%s}", d.typ, src.Type)
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return src, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ping implements driver.Driver.
|
2024-01-15 04:45:34 +03:00
|
|
|
func (d *driveri) Ping(ctx context.Context, src *source.Source) error {
|
|
|
|
return d.files.Ping(ctx, src)
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
// grip implements driver.Grip.
|
|
|
|
type grip struct {
|
2023-04-02 22:49:45 +03:00
|
|
|
log *slog.Logger
|
2020-08-23 13:42:15 +03:00
|
|
|
src *source.Source
|
2024-01-15 04:45:34 +03:00
|
|
|
impl driver.Grip
|
2020-08-23 13:42:15 +03:00
|
|
|
clnup *cleanup.Cleanup
|
2024-01-25 09:29:55 +03:00
|
|
|
files *files.Files
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
// DB implements driver.Grip.
|
|
|
|
func (g *grip) DB(ctx context.Context) (*sql.DB, error) {
|
|
|
|
return g.impl.DB(ctx)
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
// SQLDriver implements driver.Grip.
|
|
|
|
func (g *grip) SQLDriver() driver.SQLDriver {
|
|
|
|
return g.impl.SQLDriver()
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
// Source implements driver.Grip.
|
|
|
|
func (g *grip) Source() *source.Source {
|
|
|
|
return g.src
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
// TableMetadata implements driver.Grip.
|
|
|
|
func (g *grip) TableMetadata(ctx context.Context, tblName string) (*metadata.Table, error) {
|
2020-08-23 13:42:15 +03:00
|
|
|
if tblName != source.MonotableName {
|
|
|
|
return nil, errz.Errorf("table name should be %s for CSV/TSV etc., but got: %s",
|
|
|
|
source.MonotableName, tblName)
|
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
srcMeta, err := g.SourceMetadata(ctx, false)
|
2020-08-23 13:42:15 +03:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// There will only ever be one table for CSV.
|
|
|
|
return srcMeta.Tables[0], nil
|
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
// SourceMetadata implements driver.Grip.
|
|
|
|
func (g *grip) SourceMetadata(ctx context.Context, noSchema bool) (*metadata.Source, error) {
|
|
|
|
md, err := g.impl.SourceMetadata(ctx, noSchema)
|
2020-08-23 13:42:15 +03:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
md.Handle = g.src.Handle
|
|
|
|
md.Location = g.src.Location
|
|
|
|
md.Driver = g.src.Type
|
2020-08-23 13:42:15 +03:00
|
|
|
|
2024-01-25 09:29:55 +03:00
|
|
|
md.Name, err = location.Filename(g.src.Location)
|
2020-08-23 13:42:15 +03:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
md.Size, err = g.files.Filesize(ctx, g.src)
|
2020-08-23 13:42:15 +03:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
md.FQName = md.Name
|
|
|
|
return md, nil
|
|
|
|
}
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
// Close implements driver.Grip.
|
|
|
|
func (g *grip) Close() error {
|
|
|
|
g.log.Debug(lgm.CloseDB, lga.Handle, g.src.Handle)
|
2020-08-23 13:42:15 +03:00
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
return errz.Append(g.impl.Close(), g.clnup.Run())
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|