mirror of
https://github.com/neilotoole/sq.git
synced 2024-12-18 13:41:49 +03:00
206 lines
5.0 KiB
Go
206 lines
5.0 KiB
Go
|
package json
|
||
|
|
||
|
import (
|
||
|
"bufio"
|
||
|
"context"
|
||
|
stdj "encoding/json"
|
||
|
"io"
|
||
|
"math"
|
||
|
|
||
|
"github.com/neilotoole/lg"
|
||
|
|
||
|
"github.com/neilotoole/sq/libsq/core/errz"
|
||
|
"github.com/neilotoole/sq/libsq/core/kind"
|
||
|
"github.com/neilotoole/sq/libsq/driver"
|
||
|
"github.com/neilotoole/sq/libsq/source"
|
||
|
)
|
||
|
|
||
|
type importFunc func(ctx context.Context, log lg.Log, src *source.Source, openFn source.FileOpenFunc, scratchDB driver.Database) error
|
||
|
|
||
|
var (
|
||
|
_ importFunc = importJSON
|
||
|
_ importFunc = importJSONA
|
||
|
_ importFunc = importJSONL
|
||
|
)
|
||
|
|
||
|
func importJSON(ctx context.Context, log lg.Log, src *source.Source, openFn source.FileOpenFunc, scratchDB driver.Database) error {
|
||
|
log.Warn("not implemented")
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func importJSONA(ctx context.Context, log lg.Log, src *source.Source, openFn source.FileOpenFunc, scratchDB driver.Database) error {
|
||
|
log.Warn("not implemented")
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func predictColKindsJSONA(ctx context.Context, r io.Reader) ([]kind.Kind, error) {
|
||
|
var (
|
||
|
err error
|
||
|
totalLineCount int
|
||
|
// jLineCount is the number of JSONA lines (totalLineCount minus empty lines)
|
||
|
jLineCount int
|
||
|
line []byte
|
||
|
kinds []kind.Kind
|
||
|
detectors []*kind.Detector
|
||
|
)
|
||
|
|
||
|
sc := bufio.NewScanner(r)
|
||
|
for sc.Scan() {
|
||
|
select {
|
||
|
case <-ctx.Done():
|
||
|
return nil, ctx.Err()
|
||
|
default:
|
||
|
}
|
||
|
|
||
|
if jLineCount > sampleSize {
|
||
|
break
|
||
|
}
|
||
|
|
||
|
if err = sc.Err(); err != nil {
|
||
|
return nil, errz.Err(err)
|
||
|
}
|
||
|
|
||
|
line = sc.Bytes()
|
||
|
totalLineCount++
|
||
|
if len(line) == 0 {
|
||
|
// Probably want to skip blank lines? Maybe
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
jLineCount++
|
||
|
|
||
|
// Each line of JSONA must open with left bracket
|
||
|
if line[0] != '[' {
|
||
|
return nil, errz.New("line does not begin with left bracket '['")
|
||
|
}
|
||
|
|
||
|
// If the line is JSONA, it should marshall into []interface{}
|
||
|
var vals []interface{}
|
||
|
err = stdj.Unmarshal(line, &vals)
|
||
|
if err != nil {
|
||
|
return nil, errz.Err(err)
|
||
|
}
|
||
|
|
||
|
if len(vals) == 0 {
|
||
|
return nil, errz.Errorf("zero field count at line %d", totalLineCount)
|
||
|
}
|
||
|
|
||
|
if kinds == nil {
|
||
|
kinds = make([]kind.Kind, len(vals))
|
||
|
detectors = make([]*kind.Detector, len(vals))
|
||
|
for i := range detectors {
|
||
|
detectors[i] = kind.NewDetector()
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if len(vals) != len(kinds) {
|
||
|
return nil, errz.Errorf("inconsistent field count: expected %d but got %d at line %d",
|
||
|
len(kinds), len(vals), totalLineCount)
|
||
|
}
|
||
|
|
||
|
for i, val := range vals {
|
||
|
// Special case: The decoder can decode an int into a float.
|
||
|
// If the float has a zero after the decimal point '.' (that
|
||
|
// is to say, it's really a round int), we convert the float
|
||
|
// to an int
|
||
|
fVal, ok := val.(float64)
|
||
|
if ok {
|
||
|
floor := math.Floor(fVal)
|
||
|
if fVal-floor == 0 {
|
||
|
val = int64(floor)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
detectors[i].Sample(val)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if jLineCount == 0 {
|
||
|
return nil, errz.New("empty JSONA input")
|
||
|
}
|
||
|
|
||
|
for i := range kinds {
|
||
|
kinds[i], _, err = detectors[i].Detect() // FIXME: deal with the mungeFn
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return kinds, nil
|
||
|
}
|
||
|
|
||
|
func importJSONL(ctx context.Context, log lg.Log, src *source.Source, openFn source.FileOpenFunc, scratchDB driver.Database) error {
|
||
|
return errz.New("not implemented")
|
||
|
|
||
|
//const optPredictKind bool = true
|
||
|
//
|
||
|
//var err error
|
||
|
//var r io.ReadCloser
|
||
|
//
|
||
|
//r, err = openFn()
|
||
|
//if err != nil {
|
||
|
// return err
|
||
|
//}
|
||
|
//
|
||
|
//// We add the CR filter reader to deal with CSV files exported
|
||
|
//// from Excel which can have the DOS-style \r EOL markers.
|
||
|
//cr := csv.NewReader(&crFilterReader{r: r})
|
||
|
//cr.Comma, err = getDelimiter(src)
|
||
|
//if err != nil {
|
||
|
// return err
|
||
|
//}
|
||
|
//
|
||
|
//// readAheadRecs temporarily holds records read from r for the purpose
|
||
|
//// of determining CSV metadata such as column headers, data kind etc.
|
||
|
//// These records will later be written to recordCh.
|
||
|
//readAheadRecs := make([][]string, 0, readAheadBufferSize)
|
||
|
//
|
||
|
//colNames, err := getColNames(cr, src, &readAheadRecs)
|
||
|
//if err != nil {
|
||
|
// return err
|
||
|
//}
|
||
|
//
|
||
|
//var expectFieldCount = len(colNames)
|
||
|
//
|
||
|
//var colKinds []kind.Kind
|
||
|
//if optPredictKind {
|
||
|
// colKinds, err = predictColKinds(expectFieldCount, cr, &readAheadRecs, readAheadBufferSize)
|
||
|
// if err != nil {
|
||
|
// return err
|
||
|
// }
|
||
|
//} else {
|
||
|
// // If we're not predicting col kind, then we use kind.Text.
|
||
|
// colKinds = make([]kind.Kind, expectFieldCount)
|
||
|
// for i := range colKinds {
|
||
|
// colKinds[i] = kind.Text
|
||
|
// }
|
||
|
//}
|
||
|
//
|
||
|
//// And now we need to create the dest table in scratchDB
|
||
|
//tblDef := createTblDef(source.MonotableName, colNames, colKinds)
|
||
|
//
|
||
|
//err = scratchDB.SQLDriver().CreateTable(ctx, scratchDB.DB(), tblDef)
|
||
|
//if err != nil {
|
||
|
// return core.errz.Wrap(err, "csv: failed to create dest scratch table")
|
||
|
//}
|
||
|
//
|
||
|
//recMeta, err := getRecMeta(ctx, scratchDB, tblDef)
|
||
|
//if err != nil {
|
||
|
// return err
|
||
|
//}
|
||
|
//
|
||
|
//insertWriter := libsq.NewDBWriter(log, scratchDB, tblDef.Name, insertChSize)
|
||
|
//err = execInsert(ctx, insertWriter, recMeta, readAheadRecs, cr)
|
||
|
//if err != nil {
|
||
|
// return err
|
||
|
//}
|
||
|
//
|
||
|
//inserted, err := insertWriter.Wait()
|
||
|
//if err != nil {
|
||
|
// return err
|
||
|
//}
|
||
|
//
|
||
|
//log.Debugf("Inserted %d rows to %s.%s", inserted, scratchDB.Source().Handle, tblDef.Name)
|
||
|
//return nil
|
||
|
}
|