mirror of
https://github.com/neilotoole/sq.git
synced 2024-11-28 03:53:07 +03:00
fixed some panics with oddly-shaped xlsx imports (#71)
This commit is contained in:
parent
93f0de8614
commit
06900f8c84
@ -1,14 +1,14 @@
|
||||
// Package buildinfo hosts build info variables populated via ldflags.
|
||||
package buildinfo
|
||||
|
||||
// DefaultVersion is the default value for Version if not
|
||||
// defaultVersion is the default value for Version if not
|
||||
// set via ldflags.
|
||||
const DefaultVersion = "v0.0.0-dev"
|
||||
const defaultVersion = "v0.0.0-dev"
|
||||
|
||||
var (
|
||||
// Version is the build version. If not set at build time via
|
||||
// ldflags, Version takes the value of DefaultVersion.
|
||||
Version = DefaultVersion
|
||||
// ldflags, Version takes the value of defaultVersion.
|
||||
Version = defaultVersion
|
||||
|
||||
// Commit is the commit hash.
|
||||
Commit string
|
||||
|
@ -9,6 +9,7 @@ import (
|
||||
|
||||
"github.com/neilotoole/sq/drivers/csv"
|
||||
"github.com/neilotoole/sq/drivers/sqlite3"
|
||||
"github.com/neilotoole/sq/drivers/xlsx"
|
||||
"github.com/neilotoole/sq/libsq/source"
|
||||
"github.com/neilotoole/sq/testh"
|
||||
"github.com/neilotoole/sq/testh/proj"
|
||||
@ -16,6 +17,62 @@ import (
|
||||
)
|
||||
|
||||
func TestCmdInspect(t *testing.T) {
|
||||
testCases := []struct {
|
||||
handle string
|
||||
wantErr bool
|
||||
wantType source.Type
|
||||
wantTbls []string
|
||||
}{
|
||||
{
|
||||
handle: sakila.CSVActor,
|
||||
wantType: csv.TypeCSV,
|
||||
wantTbls: []string{source.MonotableName},
|
||||
},
|
||||
{
|
||||
handle: sakila.TSVActor,
|
||||
wantType: csv.TypeTSV,
|
||||
wantTbls: []string{source.MonotableName},
|
||||
},
|
||||
{
|
||||
handle: sakila.XLSX,
|
||||
wantType: xlsx.Type,
|
||||
wantTbls: sakila.AllTbls(),
|
||||
},
|
||||
{
|
||||
handle: sakila.SL3,
|
||||
wantType: sqlite3.Type,
|
||||
wantTbls: sakila.AllTblsViews(),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
tc := tc
|
||||
|
||||
t.Run(tc.handle, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
th := testh.New(t)
|
||||
src := th.Source(tc.handle)
|
||||
|
||||
ru := newRun(t).add(*src)
|
||||
|
||||
err := ru.exec("inspect", "--json")
|
||||
if tc.wantErr {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
md := &source.Metadata{}
|
||||
require.NoError(t, json.Unmarshal(ru.out.Bytes(), md))
|
||||
require.Equal(t, tc.wantType, md.SourceType)
|
||||
require.Equal(t, src.Handle, md.Handle)
|
||||
require.Equal(t, src.Location, md.Location)
|
||||
require.Equal(t, tc.wantTbls, md.TableNames())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCmdInspectSmoke(t *testing.T) {
|
||||
th := testh.New(t)
|
||||
src := th.Source(sakila.SL3)
|
||||
|
||||
|
@ -91,7 +91,7 @@ func execSLQ(rc *RunContext, cmd *cobra.Command, args []string) error {
|
||||
return execSLQInsert(rc, destSrc, destTbl)
|
||||
}
|
||||
|
||||
// execSQLInsert executes the SQL and inserts resulting records
|
||||
// execSQLInsert executes the SLQ and inserts resulting records
|
||||
// into destTbl in destSrc.
|
||||
func execSLQInsert(rc *RunContext, destSrc *source.Source, destTbl string) error {
|
||||
args, srcs, dbases := rc.Args, rc.Config.Sources, rc.databases
|
||||
@ -114,20 +114,21 @@ func execSLQInsert(rc *RunContext, destSrc *source.Source, destTbl string) error
|
||||
// stack.
|
||||
|
||||
inserter := libsq.NewDBWriter(rc.Log, destDB, destTbl, driver.Tuning.RecordChSize)
|
||||
err = libsq.ExecuteSLQ(ctx, rc.Log, rc.databases, rc.databases, srcs, slq, inserter)
|
||||
if err != nil {
|
||||
return errz.Wrapf(err, "insert %s.%s failed", destSrc.Handle, destTbl)
|
||||
execErr := libsq.ExecuteSLQ(ctx, rc.Log, rc.databases, rc.databases, srcs, slq, inserter)
|
||||
affected, waitErr := inserter.Wait() // Wait for the writer to finish processing
|
||||
if execErr != nil {
|
||||
return errz.Wrapf(execErr, "insert %s.%s failed", destSrc.Handle, destTbl)
|
||||
}
|
||||
|
||||
affected, err := inserter.Wait() // Wait for the writer to finish processing
|
||||
if err != nil {
|
||||
return errz.Wrapf(err, "insert %s.%s failed", destSrc.Handle, destTbl)
|
||||
if waitErr != nil {
|
||||
return errz.Wrapf(waitErr, "insert %s.%s failed", destSrc.Handle, destTbl)
|
||||
}
|
||||
|
||||
fmt.Fprintf(rc.Out, stringz.Plu("Inserted %d row(s) into %s.%s\n", int(affected)), affected, destSrc.Handle, destTbl)
|
||||
return nil
|
||||
}
|
||||
|
||||
// execSLQPrint executes the SLQ query, and prints output to writer.
|
||||
func execSLQPrint(rc *RunContext) error {
|
||||
slq, err := preprocessUserSLQ(rc, rc.Args)
|
||||
if err != nil {
|
||||
@ -135,17 +136,13 @@ func execSLQPrint(rc *RunContext) error {
|
||||
}
|
||||
|
||||
recw := output.NewRecordWriterAdapter(rc.writers.recordw)
|
||||
err = libsq.ExecuteSLQ(rc.Context, rc.Log, rc.databases, rc.databases, rc.Config.Sources, slq, recw)
|
||||
if err != nil {
|
||||
return err
|
||||
execErr := libsq.ExecuteSLQ(rc.Context, rc.Log, rc.databases, rc.databases, rc.Config.Sources, slq, recw)
|
||||
_, waitErr := recw.Wait()
|
||||
if execErr != nil {
|
||||
return execErr
|
||||
}
|
||||
|
||||
_, err = recw.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return err
|
||||
return waitErr
|
||||
}
|
||||
|
||||
// preprocessUserSLQ does a bit of validation and munging on the
|
||||
|
@ -113,11 +113,13 @@ func TestCmdSQL_StdinQuery(t *testing.T) {
|
||||
testCases := []struct {
|
||||
fpath string
|
||||
tbl string
|
||||
srcHeader bool
|
||||
wantCount int
|
||||
wantErr bool
|
||||
}{
|
||||
{fpath: proj.Abs(sakila.PathCSVActorNoHeader), tbl: source.MonotableName, wantCount: sakila.TblActorCount},
|
||||
{fpath: proj.Abs(sakila.PathXLSXSubset), tbl: sakila.TblActor, wantCount: sakila.TblActorCount + 1}, // +1 is for the header row in the XLSX file
|
||||
{fpath: proj.Abs(sakila.PathXLSXActorHeader), srcHeader: true, tbl: sakila.TblActor, wantCount: sakila.TblActorCount},
|
||||
{fpath: proj.Abs(sakila.PathXLSXSubset), srcHeader: true, tbl: sakila.TblActor, wantCount: sakila.TblActorCount},
|
||||
{fpath: proj.Abs("README.md"), wantErr: true},
|
||||
}
|
||||
|
||||
@ -133,7 +135,12 @@ func TestCmdSQL_StdinQuery(t *testing.T) {
|
||||
ru := newRun(t).hush()
|
||||
ru.rc.Stdin = f
|
||||
|
||||
err = ru.exec("sql", "--header=false", "SELECT * FROM "+tc.tbl)
|
||||
args := []string{"sql", "--header=false", "SELECT * FROM " + tc.tbl}
|
||||
if tc.srcHeader {
|
||||
args = append(args, "--opts=header=true")
|
||||
}
|
||||
|
||||
err = ru.exec(args...)
|
||||
if tc.wantErr {
|
||||
require.Error(t, err)
|
||||
return
|
||||
|
@ -155,15 +155,19 @@ func (p *processor) buildSchemaFlat() (*importSchema, error) {
|
||||
for _, field := range e.fieldNames {
|
||||
if detector, ok := e.detectors[field]; ok {
|
||||
// If it has a detector, it's a regular field
|
||||
kind, mungeFn, err := detector.Detect()
|
||||
k, mungeFn, err := detector.Detect()
|
||||
if err != nil {
|
||||
return errz.Err(err)
|
||||
}
|
||||
|
||||
if k == kind.Null {
|
||||
k = kind.Text
|
||||
}
|
||||
|
||||
colDef := &sqlmodel.ColDef{
|
||||
Name: p.calcColName(e, field),
|
||||
Table: tblDef,
|
||||
Kind: kind,
|
||||
Kind: k,
|
||||
}
|
||||
|
||||
colDefs = append(colDefs, colDef)
|
||||
|
@ -299,6 +299,9 @@ func detectColKindsJSONA(ctx context.Context, r io.Reader) ([]kind.Kind, []kind.
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if kinds[i] == kind.Null {
|
||||
kinds[i] = kind.Text
|
||||
}
|
||||
}
|
||||
|
||||
return kinds, mungeFns, nil
|
||||
|
@ -84,6 +84,10 @@ func importSheetToTable(ctx context.Context, log lg.Log, sheet *xlsx.Sheet, hasH
|
||||
continue
|
||||
}
|
||||
|
||||
if isEmptyRow(row) {
|
||||
continue
|
||||
}
|
||||
|
||||
rec := rowToRecord(log, destColKinds, row, sheet.Name, i)
|
||||
err = bi.Munge(rec)
|
||||
if err != nil {
|
||||
@ -120,6 +124,22 @@ func importSheetToTable(ctx context.Context, log lg.Log, sheet *xlsx.Sheet, hasH
|
||||
return nil
|
||||
}
|
||||
|
||||
// isEmptyRow returns true if row is nil or has zero cells, or if
|
||||
// every cell value is empty string.
|
||||
func isEmptyRow(row *xlsx.Row) bool {
|
||||
if row == nil || len(row.Cells) == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
for i := range row.Cells {
|
||||
if row.Cells[i].Value != "" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// buildTblDefsForSheets returns a TableDef for each sheet.
|
||||
func buildTblDefsForSheets(ctx context.Context, log lg.Log, sheets []*xlsx.Sheet, hasHeader bool) ([]*sqlmodel.TableDef, error) {
|
||||
tblDefs := make([]*sqlmodel.TableDef, len(sheets))
|
||||
@ -148,22 +168,21 @@ func buildTblDefsForSheets(ctx context.Context, log lg.Log, sheets []*xlsx.Sheet
|
||||
// buildTblDefForSheet creates a table for the given sheet, and returns
|
||||
// a model of the table, or an error.
|
||||
func buildTblDefForSheet(log lg.Log, sheet *xlsx.Sheet, hasHeader bool) (*sqlmodel.TableDef, error) {
|
||||
numCells := getRowsMaxCellCount(sheet)
|
||||
if numCells == 0 {
|
||||
maxCols := getRowsMaxCellCount(sheet)
|
||||
if maxCols == 0 {
|
||||
return nil, errz.Errorf("sheet %q has no columns", sheet.Name)
|
||||
}
|
||||
|
||||
colNames := make([]string, numCells)
|
||||
colKinds := make([]kind.Kind, numCells)
|
||||
colNames := make([]string, maxCols)
|
||||
colKinds := make([]kind.Kind, maxCols)
|
||||
|
||||
firstDataRow := 0
|
||||
|
||||
if len(sheet.Rows) == 0 {
|
||||
// TODO: is this even reachable? That is, if sheet.Rows is empty,
|
||||
// then sheet.cols (checked for above) will also be empty?
|
||||
|
||||
// sheet has no rows
|
||||
for i := 0; i < numCells; i++ {
|
||||
for i := 0; i < maxCols; i++ {
|
||||
colKinds[i] = kind.Text
|
||||
colNames[i] = stringz.GenerateAlphaColName(i, false)
|
||||
}
|
||||
@ -174,11 +193,11 @@ func buildTblDefForSheet(log lg.Log, sheet *xlsx.Sheet, hasHeader bool) (*sqlmod
|
||||
if hasHeader {
|
||||
firstDataRow = 1
|
||||
headerCells := sheet.Rows[0].Cells
|
||||
for i := 0; i < numCells; i++ {
|
||||
for i := 0; i < len(headerCells); i++ {
|
||||
colNames[i] = headerCells[i].Value
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < numCells; i++ {
|
||||
for i := 0; i < maxCols; i++ {
|
||||
colNames[i] = stringz.GenerateAlphaColName(i, false)
|
||||
}
|
||||
}
|
||||
@ -186,16 +205,22 @@ func buildTblDefForSheet(log lg.Log, sheet *xlsx.Sheet, hasHeader bool) (*sqlmod
|
||||
// Set up the column types
|
||||
if firstDataRow >= len(sheet.Rows) {
|
||||
// the sheet contains only one row (the header row). Let's
|
||||
// explicitly set the column type none the less
|
||||
for i := 0; i < numCells; i++ {
|
||||
// explicitly set the column type nonetheless
|
||||
for i := 0; i < maxCols; i++ {
|
||||
colKinds[i] = kind.Text
|
||||
}
|
||||
} else {
|
||||
// we have at least one data row, let's get the column types
|
||||
colKinds = getKindsFromCells(sheet.Rows[firstDataRow].Cells)
|
||||
var err error
|
||||
colKinds, err = calcKindsForRows(firstDataRow, sheet.Rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
colNames, colKinds = syncColNamesKinds(colNames, colKinds)
|
||||
|
||||
tblDef := &sqlmodel.TableDef{Name: sheet.Name}
|
||||
cols := make([]*sqlmodel.ColDef, len(colNames))
|
||||
for i, colName := range colNames {
|
||||
@ -207,9 +232,65 @@ func buildTblDefForSheet(log lg.Log, sheet *xlsx.Sheet, hasHeader bool) (*sqlmod
|
||||
return tblDef, nil
|
||||
}
|
||||
|
||||
// syncColNamesKinds ensures that column names and kinds are in
|
||||
// a working state vis-a-vis each other. Notably if a colName is
|
||||
// empty and its equivalent kind is kind.Null, that element
|
||||
// is filtered out.
|
||||
func syncColNamesKinds(colNames []string, colKinds []kind.Kind) (names []string, kinds []kind.Kind) {
|
||||
// Allow for the case of "phantom" columns. That is,
|
||||
// columns with entirely empty data.
|
||||
// Note: not sure if this scenario is now reachable
|
||||
if len(colKinds) < len(colNames) {
|
||||
colNames = colNames[0:len(colKinds)]
|
||||
}
|
||||
|
||||
for i := range colNames {
|
||||
// Filter out the case where the column name is empty
|
||||
// and the kind is kind.Null or kind.Unknown.
|
||||
if colNames[i] == "" && (colKinds[i] == kind.Null || colKinds[i] == kind.Unknown) {
|
||||
continue
|
||||
}
|
||||
|
||||
names = append(names, colNames[i])
|
||||
kinds = append(kinds, colKinds[i])
|
||||
}
|
||||
|
||||
colNames = names
|
||||
colKinds = kinds
|
||||
|
||||
// Check that we don't have any unnamed columns (empty header)
|
||||
for i := 0; i < len(colNames); i++ {
|
||||
if colNames[i] == "" {
|
||||
// Empty col name... possibly we should just throw
|
||||
// an error, but instead we'll try to generate a col name.
|
||||
colName := stringz.GenerateAlphaColName(i, false)
|
||||
for stringz.InSlice(colNames[0:i], colName) {
|
||||
// If colName already exists, just append an
|
||||
// underscore and try again.
|
||||
colName += "_"
|
||||
}
|
||||
colNames[i] = colName
|
||||
}
|
||||
}
|
||||
|
||||
for i := range colKinds {
|
||||
if colKinds[i] == kind.Null || colKinds[i] == kind.Unknown {
|
||||
colKinds[i] = kind.Text
|
||||
}
|
||||
}
|
||||
|
||||
return colNames, colKinds
|
||||
}
|
||||
|
||||
func rowToRecord(log lg.Log, destColKinds []kind.Kind, row *xlsx.Row, sheetName string, rowIndex int) []interface{} {
|
||||
vals := make([]interface{}, len(row.Cells))
|
||||
vals := make([]interface{}, len(destColKinds))
|
||||
for j, cell := range row.Cells {
|
||||
if j >= len(vals) {
|
||||
log.Warnf("sheet %s[%d:%d]: skipping additional cells because there's more cells than expected (%d)",
|
||||
sheetName, rowIndex, j, len(destColKinds))
|
||||
continue
|
||||
}
|
||||
|
||||
typ := cell.Type()
|
||||
switch typ {
|
||||
case xlsx.CellTypeBool:
|
||||
@ -271,42 +352,100 @@ func rowToRecord(log lg.Log, destColKinds []kind.Kind, row *xlsx.Row, sheetName
|
||||
return vals
|
||||
}
|
||||
|
||||
func getKindsFromCells(cells []*xlsx.Cell) []kind.Kind {
|
||||
vals := make([]kind.Kind, len(cells))
|
||||
for i, cell := range cells {
|
||||
typ := cell.Type()
|
||||
switch typ {
|
||||
case xlsx.CellTypeBool:
|
||||
vals[i] = kind.Bool
|
||||
case xlsx.CellTypeNumeric:
|
||||
if cell.IsTime() {
|
||||
vals[i] = kind.Datetime
|
||||
continue
|
||||
}
|
||||
// readCellValue reads the value of a cell, returning a value of
|
||||
// type that most matches the sq kind.
|
||||
func readCellValue(cell *xlsx.Cell) interface{} {
|
||||
if cell == nil || cell.Value == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
_, err := cell.Int64()
|
||||
var val interface{}
|
||||
|
||||
switch cell.Type() {
|
||||
case xlsx.CellTypeBool:
|
||||
val = cell.Bool()
|
||||
return val
|
||||
case xlsx.CellTypeNumeric:
|
||||
if cell.IsTime() {
|
||||
t, err := cell.GetTime(false)
|
||||
if err == nil {
|
||||
vals[i] = kind.Int
|
||||
continue
|
||||
return t
|
||||
}
|
||||
_, err = cell.Float()
|
||||
|
||||
t, err = cell.GetTime(true)
|
||||
if err == nil {
|
||||
vals[i] = kind.Float
|
||||
continue
|
||||
return t
|
||||
}
|
||||
// it's not an int, it's not a float
|
||||
vals[i] = kind.Decimal
|
||||
|
||||
case xlsx.CellTypeDate:
|
||||
// TODO: support time values here?
|
||||
vals[i] = kind.Datetime
|
||||
// Otherwise we have an error, just return the value
|
||||
val, _ = cell.FormattedValue()
|
||||
return val
|
||||
}
|
||||
|
||||
default:
|
||||
vals[i] = kind.Text
|
||||
intVal, err := cell.Int64()
|
||||
if err == nil {
|
||||
val = intVal
|
||||
return val
|
||||
}
|
||||
|
||||
floatVal, err := cell.Float()
|
||||
if err == nil {
|
||||
val = floatVal
|
||||
return val
|
||||
}
|
||||
|
||||
val, _ = cell.FormattedValue()
|
||||
return val
|
||||
|
||||
case xlsx.CellTypeString:
|
||||
val = cell.String()
|
||||
case xlsx.CellTypeDate:
|
||||
// TODO: parse into a time.Time value here?
|
||||
val, _ = cell.FormattedValue()
|
||||
default:
|
||||
val, _ = cell.FormattedValue()
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
||||
|
||||
// calcKindsForRows calculates the lowest-common-denominator kind
|
||||
// for the cells of rows. The returned slice will have length
|
||||
// equal to the longest row.
|
||||
func calcKindsForRows(firstDataRow int, rows []*xlsx.Row) ([]kind.Kind, error) {
|
||||
if firstDataRow > len(rows) {
|
||||
return nil, errz.Errorf("rows are empty")
|
||||
}
|
||||
|
||||
var detectors []*kind.Detector
|
||||
|
||||
for i := firstDataRow; i < len(rows); i++ {
|
||||
if isEmptyRow(rows[i]) {
|
||||
continue
|
||||
}
|
||||
|
||||
for j := len(detectors); j < len(rows[i].Cells); j++ {
|
||||
detectors = append(detectors, kind.NewDetector())
|
||||
}
|
||||
|
||||
for j := range rows[i].Cells {
|
||||
val := readCellValue(rows[i].Cells[j])
|
||||
detectors[j].Sample(val)
|
||||
}
|
||||
}
|
||||
|
||||
return vals
|
||||
kinds := make([]kind.Kind, len(detectors))
|
||||
|
||||
for j := range detectors {
|
||||
knd, _, err := detectors[j].Detect()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
kinds[j] = knd
|
||||
}
|
||||
|
||||
return kinds, nil
|
||||
}
|
||||
|
||||
// getColNames returns column names for the sheet. If hasHeader is true and there's
|
||||
@ -318,22 +457,23 @@ func getColNames(sheet *xlsx.Sheet, hasHeader bool) []string {
|
||||
|
||||
if len(sheet.Rows) > 0 && hasHeader {
|
||||
row := sheet.Rows[0]
|
||||
for i := 0; i < numCells; i++ {
|
||||
colNames[i] = row.Cells[i].Value
|
||||
for i := 0; i < len(row.Cells); i++ {
|
||||
colNames[i] = row.Cells[i].String()
|
||||
}
|
||||
return colNames
|
||||
}
|
||||
|
||||
for i := 0; i < numCells; i++ {
|
||||
colNames[i] = stringz.GenerateAlphaColName(i, false)
|
||||
for i := range colNames {
|
||||
if colNames[i] == "" {
|
||||
colNames[i] = stringz.GenerateAlphaColName(i, false)
|
||||
}
|
||||
}
|
||||
|
||||
return colNames
|
||||
}
|
||||
|
||||
// getColTypes returns the xlsx column types for the sheet, determined from
|
||||
// getCellColumnTypes returns the xlsx cell types for the sheet, determined from
|
||||
// the values of the first data row (after any header row).
|
||||
func getColTypes(sheet *xlsx.Sheet, hasHeader bool) []xlsx.CellType {
|
||||
func getCellColumnTypes(sheet *xlsx.Sheet, hasHeader bool) []xlsx.CellType {
|
||||
types := make([]*xlsx.CellType, getRowsMaxCellCount(sheet))
|
||||
firstDataRow := 0
|
||||
if hasHeader {
|
||||
|
BIN
drivers/xlsx/testdata/sakila_actor_header.xlsx
vendored
Normal file
BIN
drivers/xlsx/testdata/sakila_actor_header.xlsx
vendored
Normal file
Binary file not shown.
@ -190,6 +190,11 @@ func (d *database) TableMetadata(ctx context.Context, tblName string) (*source.T
|
||||
}
|
||||
|
||||
// SourceMetadata implements driver.Database.
|
||||
// TODO: the implementation of SourceMetadata is out
|
||||
// of sync with the way we import data. For example, empty
|
||||
// rows are filtered out during import, and empty columns
|
||||
// are discarded. Thus SourceMetadata needs an overhaul to
|
||||
// bring its reporting into line with import.
|
||||
func (d *database) SourceMetadata(ctx context.Context) (*source.Metadata, error) {
|
||||
meta := &source.Metadata{Handle: d.src.Handle}
|
||||
|
||||
@ -231,7 +236,9 @@ func (d *database) SourceMetadata(ctx context.Context) (*source.Metadata, error)
|
||||
}
|
||||
|
||||
colNames := getColNames(sheet, hasHeader)
|
||||
colTypes := getColTypes(sheet, hasHeader)
|
||||
|
||||
// TODO: Should move over to using kind.Detector
|
||||
colTypes := getCellColumnTypes(sheet, hasHeader)
|
||||
|
||||
for i, colType := range colTypes {
|
||||
col := &source.ColMetadata{}
|
||||
|
@ -198,6 +198,7 @@ func (d *Detector) Sample(v interface{}) {
|
||||
return
|
||||
case time.Time:
|
||||
d.retain(Time, Date, Datetime)
|
||||
return
|
||||
case stdj.Number:
|
||||
// JSON number
|
||||
d.foundString = true
|
||||
@ -274,7 +275,7 @@ func (d *Detector) Sample(v interface{}) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
s, ok := val.(string)
|
||||
s, ok = val.(string)
|
||||
if !ok {
|
||||
return nil, errz.Errorf("expected %T to be string", val)
|
||||
}
|
||||
@ -283,7 +284,8 @@ func (d *Detector) Sample(v interface{}) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
t, err := time.Parse(format, s)
|
||||
var t time.Time
|
||||
t, err = time.Parse(format, s)
|
||||
if err != nil {
|
||||
return nil, errz.Err(err)
|
||||
}
|
||||
@ -307,7 +309,7 @@ func (d *Detector) Sample(v interface{}) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
s, ok := val.(string)
|
||||
s, ok = val.(string)
|
||||
if !ok {
|
||||
return nil, errz.Errorf("expected %T to be string", val)
|
||||
}
|
||||
@ -316,7 +318,8 @@ func (d *Detector) Sample(v interface{}) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
t, err := time.Parse(format, s)
|
||||
var t time.Time
|
||||
t, err = time.Parse(format, s)
|
||||
if err != nil {
|
||||
return nil, errz.Err(err)
|
||||
}
|
||||
@ -362,7 +365,8 @@ func (d *Detector) Sample(v interface{}) {
|
||||
}
|
||||
}
|
||||
|
||||
// Detect returns the detected Kind. If ambiguous, Text is returned.
|
||||
// Detect returns the detected Kind. If ambiguous, Text is returned,
|
||||
// unless all sampled values were nil, in which case Null is returned.
|
||||
// If the returned mungeFn is non-nil, it can be used to convert input
|
||||
// values to their canonical form. For example, for Datetime the MungeFunc
|
||||
// would accept string "2020-06-11T02:50:54Z" and return a time.Time,
|
||||
@ -370,8 +374,12 @@ func (d *Detector) Sample(v interface{}) {
|
||||
// and always return a string in the canonicalized form "1970-01-01".
|
||||
func (d *Detector) Detect() (kind Kind, mungeFn MungeFunc, err error) {
|
||||
if !d.dirty {
|
||||
if d.foundString {
|
||||
return Text, nil, nil
|
||||
}
|
||||
|
||||
// If we haven't filtered any kinds, default to Text.
|
||||
return Text, nil, nil
|
||||
return Null, nil, nil
|
||||
}
|
||||
|
||||
switch len(d.kinds) {
|
||||
@ -381,7 +389,15 @@ func (d *Detector) Detect() (kind Kind, mungeFn MungeFunc, err error) {
|
||||
for k := range d.kinds {
|
||||
return k, d.mungeFns[k], nil
|
||||
}
|
||||
default:
|
||||
}
|
||||
|
||||
// NOTE: this logic below about detecting the remaining type
|
||||
// is a bit sketchy. If you're debugging this code, it's
|
||||
// probably the case that the code below is faulty.
|
||||
if d.has(Time, Date, Datetime) {
|
||||
// If all three time types are left, use the most
|
||||
// general, i.e. Datetime.
|
||||
return Datetime, d.mungeFns[Datetime], nil
|
||||
}
|
||||
|
||||
if d.has(Time) {
|
||||
|
@ -88,10 +88,10 @@ func TestKindDetector(t *testing.T) {
|
||||
wantMunge bool
|
||||
wantErr bool
|
||||
}{
|
||||
{in: nil, want: kind.Text},
|
||||
{in: []interface{}{}, want: kind.Text},
|
||||
{in: nil, want: kind.Null},
|
||||
{in: []interface{}{}, want: kind.Null},
|
||||
{in: []interface{}{""}, want: kind.Text},
|
||||
{in: []interface{}{nil}, want: kind.Text},
|
||||
{in: []interface{}{nil}, want: kind.Null},
|
||||
{in: []interface{}{nil, ""}, want: kind.Text},
|
||||
{in: []interface{}{int(1), int8(8), int16(16), int32(32), int64(64)}, want: kind.Int},
|
||||
{in: []interface{}{1, "2", "3"}, want: kind.Decimal},
|
||||
@ -100,7 +100,7 @@ func TestKindDetector(t *testing.T) {
|
||||
{in: []interface{}{1, "2", stdj.Number("1000")}, want: kind.Decimal},
|
||||
{in: []interface{}{1.0, "2.0"}, want: kind.Decimal},
|
||||
{in: []interface{}{1, float64(2.0), float32(7.7), int32(3)}, want: kind.Float},
|
||||
{in: []interface{}{nil, nil, nil}, want: kind.Text},
|
||||
{in: []interface{}{nil, nil, nil}, want: kind.Null},
|
||||
{in: []interface{}{"1.0", "2.0", "3.0", "4", nil, int64(6)}, want: kind.Decimal},
|
||||
{in: []interface{}{true, false, nil, "true", "false", "yes", "no", ""}, want: kind.Bool},
|
||||
{in: []interface{}{"0", "1"}, want: kind.Decimal},
|
||||
@ -145,7 +145,7 @@ func TestKindDetector(t *testing.T) {
|
||||
return
|
||||
}
|
||||
|
||||
require.Equal(t, tc.want.String(), gotKind.String())
|
||||
require.Equal(t, tc.want.String(), gotKind.String(), tc.in)
|
||||
|
||||
if !tc.wantMunge {
|
||||
require.Nil(t, gotMungeFn)
|
||||
|
@ -29,7 +29,7 @@ const (
|
||||
MS = MS17
|
||||
)
|
||||
|
||||
// AllHandles returns all the sakila handles. It does not
|
||||
// AllHandles returns all the typical sakila handles. It does not
|
||||
// include monotable handles such as @sakila_csv_actor.
|
||||
func AllHandles() []string {
|
||||
return []string{SL3, Pg9, Pg10, Pg11, Pg12, My56, My57, My8, MS17, XLSX}
|
||||
@ -145,6 +145,7 @@ const (
|
||||
PathSL3 = "drivers/sqlite3/testdata/sakila.db"
|
||||
PathXLSX = "drivers/xlsx/testdata/sakila.xlsx"
|
||||
PathXLSXSubset = "drivers/xlsx/testdata/sakila_subset.xlsx"
|
||||
PathXLSXActorHeader = "drivers/xlsx/testdata/sakila_actor_header.xlsx"
|
||||
PathCSVActor = "drivers/csv/testdata/sakila-csv/actor.csv"
|
||||
PathCSVActorNoHeader = "drivers/csv/testdata/sakila-csv-noheader/actor.csv"
|
||||
PathTSVActor = "drivers/csv/testdata/sakila-tsv/actor.tsv"
|
||||
|
Loading…
Reference in New Issue
Block a user