fixed some panics with oddly-shaped xlsx imports (#71)

2024-11-28 03:53:07 +03:00 · 2020-11-02 10:40:29 -07:00 · 2020-11-02 10:40:29 -07:00 · 06900f8c84
commit 06900f8c84
parent 93f0de8614
12 changed files with 315 additions and 83 deletions
--- a/cli/buildinfo/buildinfo.go
+++ b/cli/buildinfo/buildinfo.go
@ -1,14 +1,14 @@
 // Package buildinfo hosts build info variables populated via ldflags.
 package buildinfo

-// DefaultVersion is the default value for Version if not
+// defaultVersion is the default value for Version if not
 // set via ldflags.
-const DefaultVersion = "v0.0.0-dev"
+const defaultVersion = "v0.0.0-dev"

 var (
 	// Version is the build version. If not set at build time via
-	// ldflags, Version takes the value of DefaultVersion.
-	Version = DefaultVersion
+	// ldflags, Version takes the value of defaultVersion.
+	Version = defaultVersion

 	// Commit is the commit hash.
 	Commit string
--- a/cli/cmd_inspect_test.go
+++ b/cli/cmd_inspect_test.go
@ -9,6 +9,7 @@ import (

 	"github.com/neilotoole/sq/drivers/csv"
 	"github.com/neilotoole/sq/drivers/sqlite3"
+	"github.com/neilotoole/sq/drivers/xlsx"
 	"github.com/neilotoole/sq/libsq/source"
 	"github.com/neilotoole/sq/testh"
 	"github.com/neilotoole/sq/testh/proj"
@ -16,6 +17,62 @@ import (
 )

 func TestCmdInspect(t *testing.T) {
+	testCases := []struct {
+		handle   string
+		wantErr  bool
+		wantType source.Type
+		wantTbls []string
+	}{
+		{
+			handle:   sakila.CSVActor,
+			wantType: csv.TypeCSV,
+			wantTbls: []string{source.MonotableName},
+		},
+		{
+			handle:   sakila.TSVActor,
+			wantType: csv.TypeTSV,
+			wantTbls: []string{source.MonotableName},
+		},
+		{
+			handle:   sakila.XLSX,
+			wantType: xlsx.Type,
+			wantTbls: sakila.AllTbls(),
+		},
+		{
+			handle:   sakila.SL3,
+			wantType: sqlite3.Type,
+			wantTbls: sakila.AllTblsViews(),
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+
+		t.Run(tc.handle, func(t *testing.T) {
+			t.Parallel()
+
+			th := testh.New(t)
+			src := th.Source(tc.handle)
+
+			ru := newRun(t).add(*src)
+
+			err := ru.exec("inspect", "--json")
+			if tc.wantErr {
+				require.Error(t, err)
+				return
+			}
+
+			md := &source.Metadata{}
+			require.NoError(t, json.Unmarshal(ru.out.Bytes(), md))
+			require.Equal(t, tc.wantType, md.SourceType)
+			require.Equal(t, src.Handle, md.Handle)
+			require.Equal(t, src.Location, md.Location)
+			require.Equal(t, tc.wantTbls, md.TableNames())
+		})
+	}
+}
+
+func TestCmdInspectSmoke(t *testing.T) {
 	th := testh.New(t)
 	src := th.Source(sakila.SL3)

--- a/cli/cmd_slq.go
+++ b/cli/cmd_slq.go
@ -91,7 +91,7 @@ func execSLQ(rc *RunContext, cmd *cobra.Command, args []string) error {
 	return execSLQInsert(rc, destSrc, destTbl)
 }

-// execSQLInsert executes the SQL and inserts resulting records
+// execSQLInsert executes the SLQ and inserts resulting records
 // into destTbl in destSrc.
 func execSLQInsert(rc *RunContext, destSrc *source.Source, destTbl string) error {
 	args, srcs, dbases := rc.Args, rc.Config.Sources, rc.databases
@ -114,20 +114,21 @@ func execSLQInsert(rc *RunContext, destSrc *source.Source, destTbl string) error
 	// stack.

 	inserter := libsq.NewDBWriter(rc.Log, destDB, destTbl, driver.Tuning.RecordChSize)
-	err = libsq.ExecuteSLQ(ctx, rc.Log, rc.databases, rc.databases, srcs, slq, inserter)
-	if err != nil {
-		return errz.Wrapf(err, "insert %s.%s failed", destSrc.Handle, destTbl)
+	execErr := libsq.ExecuteSLQ(ctx, rc.Log, rc.databases, rc.databases, srcs, slq, inserter)
+	affected, waitErr := inserter.Wait() // Wait for the writer to finish processing
+	if execErr != nil {
+		return errz.Wrapf(execErr, "insert %s.%s failed", destSrc.Handle, destTbl)
 	}

-	affected, err := inserter.Wait() // Wait for the writer to finish processing
-	if err != nil {
-		return errz.Wrapf(err, "insert %s.%s failed", destSrc.Handle, destTbl)
+	if waitErr != nil {
+		return errz.Wrapf(waitErr, "insert %s.%s failed", destSrc.Handle, destTbl)
 	}

 	fmt.Fprintf(rc.Out, stringz.Plu("Inserted %d row(s) into %s.%s\n", int(affected)), affected, destSrc.Handle, destTbl)
 	return nil
 }

+// execSLQPrint executes the SLQ query, and prints output to writer.
 func execSLQPrint(rc *RunContext) error {
 	slq, err := preprocessUserSLQ(rc, rc.Args)
 	if err != nil {
@ -135,17 +136,13 @@ func execSLQPrint(rc *RunContext) error {
 	}

 	recw := output.NewRecordWriterAdapter(rc.writers.recordw)
-	err = libsq.ExecuteSLQ(rc.Context, rc.Log, rc.databases, rc.databases, rc.Config.Sources, slq, recw)
-	if err != nil {
-		return err
+	execErr := libsq.ExecuteSLQ(rc.Context, rc.Log, rc.databases, rc.databases, rc.Config.Sources, slq, recw)
+	_, waitErr := recw.Wait()
+	if execErr != nil {
+		return execErr
 	}

-	_, err = recw.Wait()
-	if err != nil {
-		return err
-	}
-
-	return err
+	return waitErr
 }

 // preprocessUserSLQ does a bit of validation and munging on the
--- a/cli/cmd_sql_test.go
+++ b/cli/cmd_sql_test.go
@ -113,11 +113,13 @@ func TestCmdSQL_StdinQuery(t *testing.T) {
 	testCases := []struct {
 		fpath     string
 		tbl       string
+		srcHeader bool
 		wantCount int
 		wantErr   bool
 	}{
 		{fpath: proj.Abs(sakila.PathCSVActorNoHeader), tbl: source.MonotableName, wantCount: sakila.TblActorCount},
-		{fpath: proj.Abs(sakila.PathXLSXSubset), tbl: sakila.TblActor, wantCount: sakila.TblActorCount + 1}, // +1 is for the header row in the XLSX file
+		{fpath: proj.Abs(sakila.PathXLSXActorHeader), srcHeader: true, tbl: sakila.TblActor, wantCount: sakila.TblActorCount},
+		{fpath: proj.Abs(sakila.PathXLSXSubset), srcHeader: true, tbl: sakila.TblActor, wantCount: sakila.TblActorCount},
 		{fpath: proj.Abs("README.md"), wantErr: true},
 	}

@ -133,7 +135,12 @@ func TestCmdSQL_StdinQuery(t *testing.T) {
 			ru := newRun(t).hush()
 			ru.rc.Stdin = f

-			err = ru.exec("sql", "--header=false", "SELECT * FROM "+tc.tbl)
+			args := []string{"sql", "--header=false", "SELECT * FROM " + tc.tbl}
+			if tc.srcHeader {
+				args = append(args, "--opts=header=true")
+			}
+
+			err = ru.exec(args...)
 			if tc.wantErr {
 				require.Error(t, err)
 				return
--- a/drivers/json/import.go
+++ b/drivers/json/import.go
@ -155,15 +155,19 @@ func (p *processor) buildSchemaFlat() (*importSchema, error) {
 		for _, field := range e.fieldNames {
 			if detector, ok := e.detectors[field]; ok {
 				// If it has a detector, it's a regular field
-				kind, mungeFn, err := detector.Detect()
+				k, mungeFn, err := detector.Detect()
 				if err != nil {
 					return errz.Err(err)
 				}

+				if k == kind.Null {
+					k = kind.Text
+				}
+
 				colDef := &sqlmodel.ColDef{
 					Name:  p.calcColName(e, field),
 					Table: tblDef,
-					Kind:  kind,
+					Kind:  k,
 				}

 				colDefs = append(colDefs, colDef)
--- a/drivers/json/import_jsona.go
+++ b/drivers/json/import_jsona.go
@ -299,6 +299,9 @@ func detectColKindsJSONA(ctx context.Context, r io.Reader) ([]kind.Kind, []kind.
 		if err != nil {
 			return nil, nil, err
 		}
+		if kinds[i] == kind.Null {
+			kinds[i] = kind.Text
+		}
 	}

 	return kinds, mungeFns, nil
--- a/drivers/xlsx/import.go
+++ b/drivers/xlsx/import.go
@ -84,6 +84,10 @@ func importSheetToTable(ctx context.Context, log lg.Log, sheet *xlsx.Sheet, hasH
 			continue
 		}

+		if isEmptyRow(row) {
+			continue
+		}
+
 		rec := rowToRecord(log, destColKinds, row, sheet.Name, i)
 		err = bi.Munge(rec)
 		if err != nil {
@ -120,6 +124,22 @@ func importSheetToTable(ctx context.Context, log lg.Log, sheet *xlsx.Sheet, hasH
 	return nil
 }

+// isEmptyRow returns true if row is nil or has zero cells, or if
+// every cell value is empty string.
+func isEmptyRow(row *xlsx.Row) bool {
+	if row == nil || len(row.Cells) == 0 {
+		return true
+	}
+
+	for i := range row.Cells {
+		if row.Cells[i].Value != "" {
+			return false
+		}
+	}
+
+	return true
+}
+
 // buildTblDefsForSheets returns a TableDef for each sheet.
 func buildTblDefsForSheets(ctx context.Context, log lg.Log, sheets []*xlsx.Sheet, hasHeader bool) ([]*sqlmodel.TableDef, error) {
 	tblDefs := make([]*sqlmodel.TableDef, len(sheets))
@ -148,22 +168,21 @@ func buildTblDefsForSheets(ctx context.Context, log lg.Log, sheets []*xlsx.Sheet
 // buildTblDefForSheet creates a table for the given sheet, and returns
 // a model of the table, or an error.
 func buildTblDefForSheet(log lg.Log, sheet *xlsx.Sheet, hasHeader bool) (*sqlmodel.TableDef, error) {
-	numCells := getRowsMaxCellCount(sheet)
-	if numCells == 0 {
+	maxCols := getRowsMaxCellCount(sheet)
+	if maxCols == 0 {
 		return nil, errz.Errorf("sheet %q has no columns", sheet.Name)
 	}

-	colNames := make([]string, numCells)
-	colKinds := make([]kind.Kind, numCells)
+	colNames := make([]string, maxCols)
+	colKinds := make([]kind.Kind, maxCols)

 	firstDataRow := 0
-
 	if len(sheet.Rows) == 0 {
 		// TODO: is this even reachable? That is, if sheet.Rows is empty,
 		//  then sheet.cols (checked for above) will also be empty?

 		// sheet has no rows
-		for i := 0; i < numCells; i++ {
+		for i := 0; i < maxCols; i++ {
 			colKinds[i] = kind.Text
 			colNames[i] = stringz.GenerateAlphaColName(i, false)
 		}
@ -174,11 +193,11 @@ func buildTblDefForSheet(log lg.Log, sheet *xlsx.Sheet, hasHeader bool) (*sqlmod
 		if hasHeader {
 			firstDataRow = 1
 			headerCells := sheet.Rows[0].Cells
-			for i := 0; i < numCells; i++ {
+			for i := 0; i < len(headerCells); i++ {
 				colNames[i] = headerCells[i].Value
 			}
 		} else {
-			for i := 0; i < numCells; i++ {
+			for i := 0; i < maxCols; i++ {
 				colNames[i] = stringz.GenerateAlphaColName(i, false)
 			}
 		}
@ -186,16 +205,22 @@ func buildTblDefForSheet(log lg.Log, sheet *xlsx.Sheet, hasHeader bool) (*sqlmod
 		// Set up the column types
 		if firstDataRow >= len(sheet.Rows) {
 			// the sheet contains only one row (the header row). Let's
-			// explicitly set the column type none the less
-			for i := 0; i < numCells; i++ {
+			// explicitly set the column type nonetheless
+			for i := 0; i < maxCols; i++ {
 				colKinds[i] = kind.Text
 			}
 		} else {
 			// we have at least one data row, let's get the column types
-			colKinds = getKindsFromCells(sheet.Rows[firstDataRow].Cells)
+			var err error
+			colKinds, err = calcKindsForRows(firstDataRow, sheet.Rows)
+			if err != nil {
+				return nil, err
+			}
 		}
 	}

+	colNames, colKinds = syncColNamesKinds(colNames, colKinds)
+
 	tblDef := &sqlmodel.TableDef{Name: sheet.Name}
 	cols := make([]*sqlmodel.ColDef, len(colNames))
 	for i, colName := range colNames {
@ -207,9 +232,65 @@ func buildTblDefForSheet(log lg.Log, sheet *xlsx.Sheet, hasHeader bool) (*sqlmod
 	return tblDef, nil
 }

+// syncColNamesKinds ensures that column names and kinds are in
+// a working state vis-a-vis each other. Notably if a colName is
+// empty and its equivalent kind is kind.Null, that element
+// is filtered out.
+func syncColNamesKinds(colNames []string, colKinds []kind.Kind) (names []string, kinds []kind.Kind) {
+	// Allow for the case of "phantom" columns. That is,
+	// columns with entirely empty data.
+	// Note: not sure if this scenario is now reachable
+	if len(colKinds) < len(colNames) {
+		colNames = colNames[0:len(colKinds)]
+	}
+
+	for i := range colNames {
+		// Filter out the case where the column name is empty
+		// and the kind is kind.Null or kind.Unknown.
+		if colNames[i] == "" && (colKinds[i] == kind.Null || colKinds[i] == kind.Unknown) {
+			continue
+		}
+
+		names = append(names, colNames[i])
+		kinds = append(kinds, colKinds[i])
+	}
+
+	colNames = names
+	colKinds = kinds
+
+	// Check that we don't have any unnamed columns (empty header)
+	for i := 0; i < len(colNames); i++ {
+		if colNames[i] == "" {
+			// Empty col name... possibly we should just throw
+			// an error, but instead we'll try to generate a col name.
+			colName := stringz.GenerateAlphaColName(i, false)
+			for stringz.InSlice(colNames[0:i], colName) {
+				// If colName already exists, just append an
+				// underscore and try again.
+				colName += "_"
+			}
+			colNames[i] = colName
+		}
+	}
+
+	for i := range colKinds {
+		if colKinds[i] == kind.Null || colKinds[i] == kind.Unknown {
+			colKinds[i] = kind.Text
+		}
+	}
+
+	return colNames, colKinds
+}
+
 func rowToRecord(log lg.Log, destColKinds []kind.Kind, row *xlsx.Row, sheetName string, rowIndex int) []interface{} {
-	vals := make([]interface{}, len(row.Cells))
+	vals := make([]interface{}, len(destColKinds))
 	for j, cell := range row.Cells {
+		if j >= len(vals) {
+			log.Warnf("sheet %s[%d:%d]: skipping additional cells because there's more cells than expected (%d)",
+				sheetName, rowIndex, j, len(destColKinds))
+			continue
+		}
+
 		typ := cell.Type()
 		switch typ {
 		case xlsx.CellTypeBool:
@ -271,42 +352,100 @@ func rowToRecord(log lg.Log, destColKinds []kind.Kind, row *xlsx.Row, sheetName
 	return vals
 }

-func getKindsFromCells(cells []*xlsx.Cell) []kind.Kind {
-	vals := make([]kind.Kind, len(cells))
-	for i, cell := range cells {
-		typ := cell.Type()
-		switch typ {
-		case xlsx.CellTypeBool:
-			vals[i] = kind.Bool
-		case xlsx.CellTypeNumeric:
-			if cell.IsTime() {
-				vals[i] = kind.Datetime
-				continue
-			}
+// readCellValue reads the value of a cell, returning a value of
+// type that most matches the sq kind.
+func readCellValue(cell *xlsx.Cell) interface{} {
+	if cell == nil || cell.Value == "" {
+		return nil
+	}

-			_, err := cell.Int64()
+	var val interface{}
+
+	switch cell.Type() {
+	case xlsx.CellTypeBool:
+		val = cell.Bool()
+		return val
+	case xlsx.CellTypeNumeric:
+		if cell.IsTime() {
+			t, err := cell.GetTime(false)
 			if err == nil {
-				vals[i] = kind.Int
-				continue
+				return t
 			}
-			_, err = cell.Float()
+
+			t, err = cell.GetTime(true)
 			if err == nil {
-				vals[i] = kind.Float
-				continue
+				return t
 			}
-			// it's not an int, it's not a float
-			vals[i] = kind.Decimal

-		case xlsx.CellTypeDate:
-			// TODO: support time values here?
-			vals[i] = kind.Datetime
+			// Otherwise we have an error, just return the value
+			val, _ = cell.FormattedValue()
+			return val
+		}

-		default:
-			vals[i] = kind.Text
+		intVal, err := cell.Int64()
+		if err == nil {
+			val = intVal
+			return val
+		}
+
+		floatVal, err := cell.Float()
+		if err == nil {
+			val = floatVal
+			return val
+		}
+
+		val, _ = cell.FormattedValue()
+		return val
+
+	case xlsx.CellTypeString:
+		val = cell.String()
+	case xlsx.CellTypeDate:
+		// TODO: parse into a time.Time value here?
+		val, _ = cell.FormattedValue()
+	default:
+		val, _ = cell.FormattedValue()
+	}
+
+	return val
+}
+
+// calcKindsForRows calculates the lowest-common-denominator kind
+// for the cells of rows. The returned slice will have length
+// equal to the longest row.
+func calcKindsForRows(firstDataRow int, rows []*xlsx.Row) ([]kind.Kind, error) {
+	if firstDataRow > len(rows) {
+		return nil, errz.Errorf("rows are empty")
+	}
+
+	var detectors []*kind.Detector
+
+	for i := firstDataRow; i < len(rows); i++ {
+		if isEmptyRow(rows[i]) {
+			continue
+		}
+
+		for j := len(detectors); j < len(rows[i].Cells); j++ {
+			detectors = append(detectors, kind.NewDetector())
+		}
+
+		for j := range rows[i].Cells {
+			val := readCellValue(rows[i].Cells[j])
+			detectors[j].Sample(val)
 		}
 	}

-	return vals
+	kinds := make([]kind.Kind, len(detectors))
+
+	for j := range detectors {
+		knd, _, err := detectors[j].Detect()
+		if err != nil {
+			return nil, err
+		}
+
+		kinds[j] = knd
+	}
+
+	return kinds, nil
 }

 // getColNames returns column names for the sheet. If hasHeader is true and there's
@ -318,22 +457,23 @@ func getColNames(sheet *xlsx.Sheet, hasHeader bool) []string {

 	if len(sheet.Rows) > 0 && hasHeader {
 		row := sheet.Rows[0]
-		for i := 0; i < numCells; i++ {
-			colNames[i] = row.Cells[i].Value
+		for i := 0; i < len(row.Cells); i++ {
+			colNames[i] = row.Cells[i].String()
 		}
-		return colNames
 	}

-	for i := 0; i < numCells; i++ {
-		colNames[i] = stringz.GenerateAlphaColName(i, false)
+	for i := range colNames {
+		if colNames[i] == "" {
+			colNames[i] = stringz.GenerateAlphaColName(i, false)
+		}
 	}

 	return colNames
 }

-// getColTypes returns the xlsx column types for the sheet, determined from
+// getCellColumnTypes returns the xlsx cell types for the sheet, determined from
 // the values of the first data row (after any header row).
-func getColTypes(sheet *xlsx.Sheet, hasHeader bool) []xlsx.CellType {
+func getCellColumnTypes(sheet *xlsx.Sheet, hasHeader bool) []xlsx.CellType {
 	types := make([]*xlsx.CellType, getRowsMaxCellCount(sheet))
 	firstDataRow := 0
 	if hasHeader {
--- a/drivers/xlsx/testdata/sakila_actor_header.xlsx
+++ b/drivers/xlsx/testdata/sakila_actor_header.xlsx
--- a/drivers/xlsx/xlsx.go
+++ b/drivers/xlsx/xlsx.go
@ -190,6 +190,11 @@ func (d *database) TableMetadata(ctx context.Context, tblName string) (*source.T
 }

 // SourceMetadata implements driver.Database.
+// TODO: the implementation of SourceMetadata is out
+// of sync with the way we import data. For example, empty
+// rows are filtered out during import, and empty columns
+// are discarded. Thus SourceMetadata needs an overhaul to
+// bring its reporting into line with import.
 func (d *database) SourceMetadata(ctx context.Context) (*source.Metadata, error) {
 	meta := &source.Metadata{Handle: d.src.Handle}

@ -231,7 +236,9 @@ func (d *database) SourceMetadata(ctx context.Context) (*source.Metadata, error)
 		}

 		colNames := getColNames(sheet, hasHeader)
-		colTypes := getColTypes(sheet, hasHeader)
+
+		// TODO: Should move over to using kind.Detector
+		colTypes := getCellColumnTypes(sheet, hasHeader)

 		for i, colType := range colTypes {
 			col := &source.ColMetadata{}
--- a/libsq/core/kind/kind.go
+++ b/libsq/core/kind/kind.go
@ -198,6 +198,7 @@ func (d *Detector) Sample(v interface{}) {
 		return
 	case time.Time:
 		d.retain(Time, Date, Datetime)
+		return
 	case stdj.Number:
 		// JSON number
 		d.foundString = true
@ -274,7 +275,7 @@ func (d *Detector) Sample(v interface{}) {
 					return nil, nil
 				}

-				s, ok := val.(string)
+				s, ok = val.(string)
 				if !ok {
 					return nil, errz.Errorf("expected %T to be string", val)
 				}
@ -283,7 +284,8 @@ func (d *Detector) Sample(v interface{}) {
 					return nil, nil
 				}

-				t, err := time.Parse(format, s)
+				var t time.Time
+				t, err = time.Parse(format, s)
 				if err != nil {
 					return nil, errz.Err(err)
 				}
@ -307,7 +309,7 @@ func (d *Detector) Sample(v interface{}) {
 					return nil, nil
 				}

-				s, ok := val.(string)
+				s, ok = val.(string)
 				if !ok {
 					return nil, errz.Errorf("expected %T to be string", val)
 				}
@ -316,7 +318,8 @@ func (d *Detector) Sample(v interface{}) {
 					return nil, nil
 				}

-				t, err := time.Parse(format, s)
+				var t time.Time
+				t, err = time.Parse(format, s)
 				if err != nil {
 					return nil, errz.Err(err)
 				}
@ -362,7 +365,8 @@ func (d *Detector) Sample(v interface{}) {
 	}
 }

-// Detect returns the detected Kind. If ambiguous, Text is returned.
+// Detect returns the detected Kind. If ambiguous, Text is returned,
+// unless all sampled values were nil, in which case Null is returned.
 // If the returned mungeFn is non-nil, it can be used to convert input
 // values to their canonical form. For example, for Datetime the MungeFunc
 // would accept string "2020-06-11T02:50:54Z" and return a time.Time,
@ -370,8 +374,12 @@ func (d *Detector) Sample(v interface{}) {
 // and always return a string in the canonicalized form "1970-01-01".
 func (d *Detector) Detect() (kind Kind, mungeFn MungeFunc, err error) {
 	if !d.dirty {
+		if d.foundString {
+			return Text, nil, nil
+		}
+
 		// If we haven't filtered any kinds, default to Text.
-		return Text, nil, nil
+		return Null, nil, nil
 	}

 	switch len(d.kinds) {
@ -381,7 +389,15 @@ func (d *Detector) Detect() (kind Kind, mungeFn MungeFunc, err error) {
 		for k := range d.kinds {
 			return k, d.mungeFns[k], nil
 		}
-	default:
+	}
+
+	// NOTE: this logic below about detecting the remaining type
+	//  is a bit sketchy. If you're debugging this code, it's
+	//  probably the case that the code below is faulty.
+	if d.has(Time, Date, Datetime) {
+		// If all three time types are left, use the most
+		// general, i.e. Datetime.
+		return Datetime, d.mungeFns[Datetime], nil
 	}

 	if d.has(Time) {
--- a/libsq/core/kind/kind_test.go
+++ b/libsq/core/kind/kind_test.go
@ -88,10 +88,10 @@ func TestKindDetector(t *testing.T) {
 		wantMunge bool
 		wantErr   bool
 	}{
-		{in: nil, want: kind.Text},
-		{in: []interface{}{}, want: kind.Text},
+		{in: nil, want: kind.Null},
+		{in: []interface{}{}, want: kind.Null},
 		{in: []interface{}{""}, want: kind.Text},
-		{in: []interface{}{nil}, want: kind.Text},
+		{in: []interface{}{nil}, want: kind.Null},
 		{in: []interface{}{nil, ""}, want: kind.Text},
 		{in: []interface{}{int(1), int8(8), int16(16), int32(32), int64(64)}, want: kind.Int},
 		{in: []interface{}{1, "2", "3"}, want: kind.Decimal},
@ -100,7 +100,7 @@ func TestKindDetector(t *testing.T) {
 		{in: []interface{}{1, "2", stdj.Number("1000")}, want: kind.Decimal},
 		{in: []interface{}{1.0, "2.0"}, want: kind.Decimal},
 		{in: []interface{}{1, float64(2.0), float32(7.7), int32(3)}, want: kind.Float},
-		{in: []interface{}{nil, nil, nil}, want: kind.Text},
+		{in: []interface{}{nil, nil, nil}, want: kind.Null},
 		{in: []interface{}{"1.0", "2.0", "3.0", "4", nil, int64(6)}, want: kind.Decimal},
 		{in: []interface{}{true, false, nil, "true", "false", "yes", "no", ""}, want: kind.Bool},
 		{in: []interface{}{"0", "1"}, want: kind.Decimal},
@ -145,7 +145,7 @@ func TestKindDetector(t *testing.T) {
 				return
 			}

-			require.Equal(t, tc.want.String(), gotKind.String())
+			require.Equal(t, tc.want.String(), gotKind.String(), tc.in)

 			if !tc.wantMunge {
 				require.Nil(t, gotMungeFn)
--- a/testh/sakila/sakila.go
+++ b/testh/sakila/sakila.go
@ -29,7 +29,7 @@ const (
 	MS               = MS17
 )

-// AllHandles returns all the sakila handles. It does not
+// AllHandles returns all the typical sakila handles. It does not
 // include monotable handles such as @sakila_csv_actor.
 func AllHandles() []string {
 	return []string{SL3, Pg9, Pg10, Pg11, Pg12, My56, My57, My8, MS17, XLSX}
@ -145,6 +145,7 @@ const (
 	PathSL3              = "drivers/sqlite3/testdata/sakila.db"
 	PathXLSX             = "drivers/xlsx/testdata/sakila.xlsx"
 	PathXLSXSubset       = "drivers/xlsx/testdata/sakila_subset.xlsx"
+	PathXLSXActorHeader  = "drivers/xlsx/testdata/sakila_actor_header.xlsx"
 	PathCSVActor         = "drivers/csv/testdata/sakila-csv/actor.csv"
 	PathCSVActorNoHeader = "drivers/csv/testdata/sakila-csv-noheader/actor.csv"
 	PathTSVActor         = "drivers/csv/testdata/sakila-tsv/actor.tsv"