Xlsx import handle empty sheets (#79)

* can now import XLSX with empty sheets

* renamed XLSX test article
This commit is contained in:
Neil O'Toole 2021-02-19 23:22:35 -07:00 committed by GitHub
parent 855494570b
commit a1cfd5d525
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 47 additions and 13 deletions

View File

@ -49,9 +49,6 @@ const (
flagMonochromeShort = "M"
flagMonochromeUsage = "Don't colorize output"
flagNotifierLabel = "label"
flagNotifierLabelUsage = "Optional label for the notification destination"
flagOutput = "output"
flagOutputShort = "o"
flagOutputUsage = "Write output to <file> instead of stdout"

View File

@ -36,6 +36,10 @@ func xlsxToScratch(ctx context.Context, log lg.Log, src *source.Source, xlFile *
}
for _, tblDef := range tblDefs {
if tblDef == nil {
// tblDef can be nil if its sheet is empty (has no data).
continue
}
err = scratchDB.SQLDriver().CreateTable(ctx, scratchDB.DB(), tblDef)
if err != nil {
return err
@ -45,15 +49,23 @@ func xlsxToScratch(ctx context.Context, log lg.Log, src *source.Source, xlFile *
log.Debugf("%d tables created (but not yet populated) in %s in %s",
len(tblDefs), scratchDB.Source().Handle, time.Since(start))
var imported, skipped int
for i := range xlFile.Sheets {
if tblDefs[i] == nil {
// tblDef can be nil if its sheet is empty (has no data).
skipped++
continue
}
err = importSheetToTable(ctx, log, xlFile.Sheets[i], hasHeader, scratchDB, tblDefs[i])
if err != nil {
return err
}
imported++
}
log.Debugf("%d sheets imported from %s to %s in %s",
len(xlFile.Sheets), src.Handle, scratchDB.Source().Handle, time.Since(start))
log.Debugf("%d sheets imported (%d sheets skipped) from %s to %s in %s",
imported, skipped, src.Handle, scratchDB.Source().Handle, time.Since(start))
return nil
}
@ -140,7 +152,8 @@ func isEmptyRow(row *xlsx.Row) bool {
return true
}
// buildTblDefsForSheets returns a TableDef for each sheet.
// buildTblDefsForSheets returns a TableDef for each sheet. If the
// sheet is empty (has no data), the TableDef for that sheet will be nil.
func buildTblDefsForSheets(ctx context.Context, log lg.Log, sheets []*xlsx.Sheet, hasHeader bool) ([]*sqlmodel.TableDef, error) {
tblDefs := make([]*sqlmodel.TableDef, len(sheets))
@ -166,11 +179,13 @@ func buildTblDefsForSheets(ctx context.Context, log lg.Log, sheets []*xlsx.Sheet
}
// buildTblDefForSheet creates a table for the given sheet, and returns
// a model of the table, or an error.
// a model of the table, or an error. If the sheet is empty, (nil,nil)
// is returned.
func buildTblDefForSheet(log lg.Log, sheet *xlsx.Sheet, hasHeader bool) (*sqlmodel.TableDef, error) {
maxCols := getRowsMaxCellCount(sheet)
if maxCols == 0 {
return nil, errz.Errorf("sheet %q has no columns", sheet.Name)
log.Warnf("sheet %q is empty: skipping")
return nil, nil
}
colNames := make([]string, maxCols)

Binary file not shown.

View File

@ -38,7 +38,6 @@ func Test_Smoke_Full(t *testing.T) {
func Test_XLSX_BadDateRecognition(t *testing.T) {
t.Parallel()
// https://github.com/neilotoole/sq-preview/issues/2
th := testh.New(t)
src := &source.Source{
@ -57,3 +56,21 @@ func Test_XLSX_BadDateRecognition(t *testing.T) {
require.NoError(t, err)
require.Equal(t, 21, len(sink.Recs))
}
// TestHandleSomeEmptySheets verifies that sq can import XLSX
// when there are some empty sheets.
func TestHandleSomeEmptySheets(t *testing.T) {
t.Parallel()
th := testh.New(t)
src := &source.Source{
Handle: "@xlsx_empty_sheets",
Type: xlsx.Type,
Location: proj.Abs("drivers/xlsx/testdata/test_with_some_empty_sheets.xlsx"),
}
sink, err := th.QuerySQL(src, "SELECT * FROM Sheet1")
require.NoError(t, err)
require.Equal(t, 2, len(sink.Recs))
}

View File

@ -1,16 +1,21 @@
# SLQ Grammar
The query language used by SQ is formally known as _SLQ_. The
The query language used by `sq` is formally known as _SLQ_. The
grammar is defined in `SLQ.g4`, which is an [ANTLR4](https://www.antlr.org/) grammar.
The `antlr4` tool generates the parser / lexer files from the grammar. Being that `antlr4` is Java-based, Java must be installed to regenerate from the grammar. This process is encapsulated in a `mage` target:
The `antlr4` tool generates the parser / lexer files from the grammar.
Being that `antlr4` is Java-based, Java must be installed to regenerate
from the grammar. This process is encapsulated in a `mage` target:
```sh
# from SQ_PROJ_ROOT
mage generateparser
```
The generated .go files ultimately end up in package `libsq/slq`. Files in this directory should not be directly edited. The `libsq/ast.Parse` function takes a _SLQ_ input string and returns an `*ast.AST`. It is the `libsq.ExecuteSLQ` function that invokes `ast.Parse`.
The generated .go files ultimately end up in package `libsq/slq`. Files
in this directory should not be directly edited.
The `libsq/ast.Parse` function takes a `SLQ` input string and returns an `*ast.AST`.
It is the `libsq.ExecuteSLQ` function that invokes `ast.Parse`.