Test ingest Excel file format variants (#303)

* Tests for additional formats

* Tests for ingest Excel file format variants
This commit is contained in:
Neil O'Toole 2023-08-20 07:22:24 -06:00 committed by GitHub
parent 754d528ce9
commit 611ad30f60
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 88 additions and 29 deletions

View File

@ -4,7 +4,7 @@ import (
"fmt"
"strings"
"github.com/neilotoole/sq/cli/output/excelw"
"github.com/neilotoole/sq/cli/output/xlsxw"
"github.com/neilotoole/sq/cli/run"
@ -142,9 +142,9 @@ func RegisterDefaultOpts(reg *options.Registry) {
OptDateFormatAsNumber,
OptTimeFormat,
OptTimeFormatAsNumber,
excelw.OptDatetimeFormat,
excelw.OptDateFormat,
excelw.OptTimeFormat,
xlsxw.OptDatetimeFormat,
xlsxw.OptDateFormat,
xlsxw.OptTimeFormat,
driver.OptResultColRename,
OptVerbose,
OptPrintHeader,
@ -271,7 +271,7 @@ func addTimeFormatOptsFlags(cmd *cobra.Command) {
key = addOptionFlag(cmd.Flags(), OptTimeFormatAsNumber)
panicOn(cmd.RegisterFlagCompletionFunc(key, completeBool))
key = addOptionFlag(cmd.Flags(), excelw.OptDatetimeFormat)
key = addOptionFlag(cmd.Flags(), xlsxw.OptDatetimeFormat)
panicOn(cmd.RegisterFlagCompletionFunc(key, completeStrings(
-1,
"yyyy-mm-dd hh:mm",
@ -279,7 +279,7 @@ func addTimeFormatOptsFlags(cmd *cobra.Command) {
"dd-mmm-yy h:mm:ss AM/PM",
)))
key = addOptionFlag(cmd.Flags(), excelw.OptDateFormat)
key = addOptionFlag(cmd.Flags(), xlsxw.OptDateFormat)
panicOn(cmd.RegisterFlagCompletionFunc(key, completeStrings(
-1,
"yyyy-mm-dd",
@ -287,7 +287,7 @@ func addTimeFormatOptsFlags(cmd *cobra.Command) {
"dd-mmm-yy",
)))
key = addOptionFlag(cmd.Flags(), excelw.OptTimeFormat)
key = addOptionFlag(cmd.Flags(), xlsxw.OptTimeFormat)
panicOn(cmd.RegisterFlagCompletionFunc(key, completeStrings(
-1,
"hh:mm:ss",

View File

@ -6,9 +6,9 @@ import (
"os"
"strings"
"github.com/neilotoole/sq/libsq/core/errz"
"github.com/neilotoole/sq/cli/output/xlsxw"
"github.com/neilotoole/sq/cli/output/excelw"
"github.com/neilotoole/sq/libsq/core/errz"
"github.com/neilotoole/sq/libsq/core/stringz"
@ -329,8 +329,7 @@ func getRecordWriterFunc(f format.Format) output.NewRecordWriterFunc {
case format.XML:
return xmlw.NewRecordWriter
case format.XLSX:
// return xlsxw.NewRecordWriter
return excelw.NewRecordWriter
return xlsxw.NewRecordWriter
case format.YAML:
return yamlw.NewRecordWriter
case format.Raw:
@ -358,9 +357,9 @@ func getPrinting(cmd *cobra.Command, opts options.Options, out, errOut io.Writer
pr.FormatDate = timez.FormatFunc(OptDateFormat.Get(opts))
pr.FormatDateAsNumber = OptDateFormatAsNumber.Get(opts)
pr.ExcelDatetimeFormat = excelw.OptDatetimeFormat.Get(opts)
pr.ExcelDateFormat = excelw.OptDateFormat.Get(opts)
pr.ExcelTimeFormat = excelw.OptTimeFormat.Get(opts)
pr.ExcelDatetimeFormat = xlsxw.OptDatetimeFormat.Get(opts)
pr.ExcelDateFormat = xlsxw.OptDateFormat.Get(opts)
pr.ExcelTimeFormat = xlsxw.OptTimeFormat.Get(opts)
pr.Verbose = OptVerbose.Get(opts)
pr.FlushThreshold = OptTuningFlushThreshold.Get(opts)

View File

@ -1,4 +1,4 @@
package excelw
package xlsxw
import (
"math"

View File

@ -1,4 +1,4 @@
package excelw
package xlsxw
// The code below is lifted from the excelize package.

View File

@ -1,7 +1,7 @@
// Package excelw implements output writers for Microsoft Excel.
// Package xlsxw implements output writers for Microsoft Excel.
// It uses the https://github.com/qax-os/excelize library.
// See docs: https://xuri.me/excelize
package excelw
package xlsxw
import (
"encoding/base64"

View File

@ -1,4 +1,4 @@
package excelw_test
package xlsxw_test
import (
"bytes"
@ -9,6 +9,8 @@ import (
"os"
"testing"
"github.com/neilotoole/sq/cli/output/xlsxw"
"github.com/stretchr/testify/assert"
"github.com/neilotoole/sq/cli/testrun"
@ -26,8 +28,6 @@ import (
"github.com/neilotoole/sq/testh/sakila"
"github.com/neilotoole/sq/testh/testsrc"
"github.com/neilotoole/sq/cli/output/excelw"
"github.com/stretchr/testify/require"
)
@ -73,10 +73,10 @@ func TestRecordWriter(t *testing.T) {
buf := &bytes.Buffer{}
pr := output.NewPrinting()
pr.ExcelDatetimeFormat = excelw.OptDatetimeFormat.Default()
pr.ExcelDateFormat = excelw.OptDateFormat.Default()
pr.ExcelTimeFormat = excelw.OptTimeFormat.Default()
w := excelw.NewRecordWriter(buf, pr)
pr.ExcelDatetimeFormat = xlsxw.OptDatetimeFormat.Default()
pr.ExcelDateFormat = xlsxw.OptDateFormat.Default()
pr.ExcelTimeFormat = xlsxw.OptTimeFormat.Default()
w := xlsxw.NewRecordWriter(buf, pr)
require.NoError(t, w.Open(recMeta))
require.NoError(t, w.WriteRecords(recs))
@ -96,7 +96,7 @@ func TestBytesEncodedAsBase64(t *testing.T) {
buf := &bytes.Buffer{}
pr := output.NewPrinting()
w := excelw.NewRecordWriter(buf, pr)
w := xlsxw.NewRecordWriter(buf, pr)
require.NoError(t, w.Open(recMeta))
require.NoError(t, w.WriteRecords(recs))
@ -168,11 +168,11 @@ func TestOptDatetimeFormats(t *testing.T) {
src := th.Source(sakila.Pg)
tr := testrun.New(th.Context, t, nil).Hush().Add(*src)
require.NoError(t, tr.Exec("config", "set", excelw.OptDatetimeFormat.Key(), "yy/mm/dd - hh:mm:ss"))
require.NoError(t, tr.Exec("config", "set", xlsxw.OptDatetimeFormat.Key(), "yy/mm/dd - hh:mm:ss"))
tr = testrun.New(th.Context, t, tr)
require.NoError(t, tr.Exec("config", "set", excelw.OptDateFormat.Key(), "yy/mmm/dd"))
require.NoError(t, tr.Exec("config", "set", xlsxw.OptDateFormat.Key(), "yy/mmm/dd"))
tr = testrun.New(th.Context, t, tr)
require.NoError(t, tr.Exec("config", "set", excelw.OptTimeFormat.Key(), "h:mm am/pm"))
require.NoError(t, tr.Exec("config", "set", xlsxw.OptTimeFormat.Key(), "h:mm am/pm"))
tr = testrun.New(th.Context, t, tr)
require.NoError(t, tr.Exec("sql", "--xlsx", query))

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -139,6 +139,65 @@ func TestSakila_query_cmd(t *testing.T) {
}
}
func TestOpenFileFormats(t *testing.T) {
t.Parallel()
tutil.SkipWindows(t, "Skipping because of slow workflow perf on windows")
tutil.SkipShort(t, true)
testCases := []struct {
filename string
wantErr bool
}{
{"sakila.xlsx", false},
{"sakila.xlam", false},
{"sakila.xlsm", false},
{"sakila.xltm", false},
{"sakila.xltx", false},
{"sakila.strict_openxml.xlsx", false},
// .xls and .xlsb aren't supported. Perhaps one day we'll incorporate
// support via a library such as https://github.com/extrame/xls.
{"sakila.xls", true},
{"sakila.xlsb", true},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.filename, func(t *testing.T) {
t.Parallel()
th := testh.New(t, testh.OptLongOpen())
src := th.Add(&source.Source{
Handle: "@excel",
Type: xlsx.Type,
Location: filepath.Join("testdata", "file_formats", tc.filename),
})
dbase, err := th.Databases().Open(th.Context, src)
require.NoError(t, err)
db, err := dbase.DB(th.Context)
if tc.wantErr {
require.Error(t, err)
return
}
require.NoError(t, err)
require.NoError(t, db.PingContext(th.Context))
sink, err := th.QuerySQL(src, "SELECT * FROM actor")
require.NoError(t, err)
require.Equal(t, sakila.TblActorCols(), sink.RecMeta.MungedNames())
require.Equal(t, sakila.TblActorCount, len(sink.Recs))
require.Equal(t, sakila.TblActorColKinds(), sink.RecMeta.Kinds())
wantRec0 := record.Record{
int64(1), "PENELOPE", "GUINESS",
time.Date(2020, time.February, 15, 6, 59, 28, 0, time.UTC),
}
require.Equal(t, wantRec0, sink.Recs[0])
})
}
}
func TestSakila_query(t *testing.T) {
t.Parallel()
tutil.SkipWindows(t, "Skipping because of slow workflow perf on windows")
@ -197,6 +256,7 @@ func TestSakila_query(t *testing.T) {
require.Equal(t, tc.wantCols, sink.RecMeta.MungedNames())
require.Equal(t, tc.wantCount, len(sink.Recs))
require.Equal(t, tc.wantKinds, sink.RecMeta.Kinds())
require.Equal(t, tc.wantRec0, sink.Recs[0])
})
}
}