mirror of
https://github.com/neilotoole/sq.git
synced 2024-11-24 03:45:56 +03:00
CSV ingest improvements (#302)
* csv: additional datetime detect formats
This commit is contained in:
parent
edc933d897
commit
db092d5453
17
CHANGELOG.md
17
CHANGELOG.md
@ -40,16 +40,11 @@ discover anything strange, please [open an issue](https://github.com/neilotoole/
|
||||
effectively the ingest counterpart of the existing output option
|
||||
[`result.column.rename`](https://sq.io/docs/config/#resultcolumnrename).
|
||||
|
||||
- [#191]: The [XLSX](https://sq.io/docs/drivers/xlsx) driver now detects header rows, like
|
||||
- [#191]: The [XLSX](https://sq.io/docs/drivers/xlsx) driver now [detects](https://sq.io/docs/drivers/xlsx/#header-row) header rows, like
|
||||
the CSV driver already does. Thus, you now typically don't need to specify
|
||||
the `--ingest.header` flag for Excel files. However, the option remains available
|
||||
in case `sq` can't figure it out for a particular file.
|
||||
|
||||
- There's a new option `error.format` that controls error output format independent
|
||||
of the main [`format`](https://sq.io/docs/config/#format) option
|
||||
([docs](https://sq.io/docs/config/#errorformat)). The `error.format` value
|
||||
must be one of `text` or `json`.
|
||||
|
||||
- The Excel writer has three new config options for controlling date/time output.
|
||||
Note that these format strings are distinct from [`format.datetime`](https://sq.io/docs/config/#formatdatetime)
|
||||
and friends, because Excel has its own format string mechanism.
|
||||
@ -57,9 +52,17 @@ discover anything strange, please [open an issue](https://github.com/neilotoole/
|
||||
- [`format.excel.date`](https://sq.io/docs/config/#formatexceldatetime): Controls date-only format, e.g. `2023-08-03`.
|
||||
- [`format.excel.time`](https://sq.io/docs/config/#formatexceldatetime): Controls time-only format, e.g. `4:07 pm`.
|
||||
|
||||
- The ingest [kind detectors](https://sq.io/docs/detect/#kinds) (e.g. for `CSV` or `XLSX`)
|
||||
now detect more [date & time formats](/docs/detect#datetime-formats) as `kind.Datetime`, `kind.Date`, and `kind.Time`.
|
||||
|
||||
- If an error occurs when the output format is `text`, a stack trace is printed
|
||||
to `stderr` when the command is executed with `--verbose` (`-v`).
|
||||
|
||||
- There's a new option `error.format` that controls error output format independent
|
||||
of the main [`format`](https://sq.io/docs/config/#format) option
|
||||
([docs](https://sq.io/docs/config/#errorformat)). The `error.format` value
|
||||
must be one of `text` or `json`.
|
||||
|
||||
## Changed
|
||||
|
||||
- ☢️ The default Excel date format has changed. Previously
|
||||
@ -94,8 +97,6 @@ discover anything strange, please [open an issue](https://github.com/neilotoole/
|
||||
- ☢️ The XLSX writer now outputs blob (`bytes`) cell data as a base64-encoded string,
|
||||
instead of raw bytes.
|
||||
|
||||
- The XLSX driver now is able to recognize more date and time formats.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed bug where source-specific config wasn't being propagated.
|
||||
|
@ -4,6 +4,17 @@ import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/neilotoole/sq/libsq/core/record"
|
||||
|
||||
"github.com/neilotoole/sq/drivers/csv"
|
||||
|
||||
"github.com/neilotoole/sq/libsq/core/kind"
|
||||
"github.com/neilotoole/sq/libsq/core/loz"
|
||||
"github.com/neilotoole/sq/libsq/source"
|
||||
"github.com/samber/lo"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/neilotoole/sq/libsq/driver"
|
||||
|
||||
@ -41,6 +52,83 @@ func TestSmoke(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSakila_query(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
testCases := []struct {
|
||||
file string
|
||||
wantCols []string
|
||||
wantCount int
|
||||
wantKinds []kind.Kind
|
||||
wantRec0 record.Record
|
||||
}{
|
||||
{
|
||||
file: sakila.TblActor,
|
||||
wantCols: sakila.TblActorCols(),
|
||||
wantCount: sakila.TblActorCount,
|
||||
wantKinds: sakila.TblActorColKinds(),
|
||||
wantRec0: record.Record{
|
||||
int64(1), "PENELOPE", "GUINESS",
|
||||
time.Date(2020, time.February, 15, 6, 59, 28, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
{
|
||||
file: sakila.TblFilmActor,
|
||||
wantCols: sakila.TblFilmActorCols(),
|
||||
wantCount: sakila.TblFilmActorCount,
|
||||
wantKinds: sakila.TblFilmActorColKinds(),
|
||||
wantRec0: record.Record{
|
||||
int64(1), int64(1),
|
||||
time.Date(2020, time.February, 15, 6, 59, 32, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
{
|
||||
file: sakila.TblPayment,
|
||||
wantCols: sakila.TblPaymentCols(),
|
||||
wantCount: sakila.TblPaymentCount,
|
||||
wantKinds: sakila.TblPaymentColKinds(),
|
||||
wantRec0: record.Record{
|
||||
int64(1), int64(1), int64(1), int64(76), "2.99",
|
||||
time.Date(2005, time.May, 25, 11, 30, 37, 0, time.UTC),
|
||||
time.Date(2020, time.February, 15, 6, 59, 47, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, driver := range []source.DriverType{csv.TypeCSV, csv.TypeTSV} {
|
||||
driver := driver
|
||||
|
||||
t.Run(driver.String(), func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
for _, tc := range testCases {
|
||||
tc := tc
|
||||
|
||||
t.Run(tc.file, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
th := testh.New(t, testh.OptLongOpen())
|
||||
src := th.Add(&source.Source{
|
||||
Handle: "@" + tc.file,
|
||||
Type: driver,
|
||||
Location: filepath.Join("testdata", "sakila-"+driver.String(), tc.file+"."+driver.String()),
|
||||
})
|
||||
|
||||
sink, err := th.QuerySLQ(src.Handle+".data", nil)
|
||||
require.NoError(t, err)
|
||||
gotCols, gotKinds := sink.RecMeta.MungedNames(), sink.RecMeta.Kinds()
|
||||
require.Equal(t, tc.wantCols, gotCols)
|
||||
assert.Equal(t, tc.wantKinds, gotKinds)
|
||||
assert.Equal(t, tc.wantCount, len(sink.Recs))
|
||||
if tc.wantRec0 != nil {
|
||||
require.EqualValues(t, tc.wantRec0, sink.Recs[0])
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQuerySQL_Count(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
@ -132,3 +220,119 @@ func TestIngestDuplicateColumns(t *testing.T) {
|
||||
data = tr.BindCSV()
|
||||
require.Equal(t, wantHeaders, data[0])
|
||||
}
|
||||
|
||||
func TestDatetime(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
denver, err := time.LoadLocation("America/Denver")
|
||||
require.NoError(t, err)
|
||||
|
||||
wantDtNanoUTC := time.Date(1989, 11, 9, 15, 17, 59, 123456700, time.UTC)
|
||||
wantDtMilliUTC := wantDtNanoUTC.Truncate(time.Millisecond)
|
||||
wantDtSecUTC := wantDtNanoUTC.Truncate(time.Second)
|
||||
wantDtMinUTC := wantDtNanoUTC.Truncate(time.Minute)
|
||||
wantDtNanoMST := time.Date(1989, 11, 9, 15, 17, 59, 123456700, denver)
|
||||
wantDtMilliMST := wantDtNanoMST.Truncate(time.Millisecond)
|
||||
wantDtSecMST := wantDtNanoMST.Truncate(time.Second)
|
||||
wantDtMinMST := wantDtNanoMST.Truncate(time.Minute)
|
||||
|
||||
testCases := []struct {
|
||||
file string
|
||||
wantHeaders []string
|
||||
wantKinds []kind.Kind
|
||||
wantVals []any
|
||||
}{
|
||||
{
|
||||
file: "test_date",
|
||||
wantHeaders: []string{"Long", "Short", "d-mmm-yy", "mm-dd-yy", "mmmm d, yyyy"},
|
||||
wantKinds: loz.Make(5, kind.Date),
|
||||
wantVals: lo.ToAnySlice(loz.Make(5,
|
||||
time.Date(1989, time.November, 9, 0, 0, 0, 0, time.UTC))),
|
||||
},
|
||||
{
|
||||
file: "test_time",
|
||||
wantHeaders: []string{"time1", "time2", "time3", "time4", "time5", "time6"},
|
||||
wantKinds: loz.Make(6, kind.Time),
|
||||
wantVals: []any{"15:17:00", "15:17:00", "15:17:00", "15:17:00", "15:17:00", "15:17:59"},
|
||||
},
|
||||
{
|
||||
file: "test_datetime",
|
||||
wantHeaders: []string{
|
||||
"ANSIC",
|
||||
"DateHourMinute",
|
||||
"DateHourMinuteSecond",
|
||||
"ISO8601",
|
||||
"ISO8601Z",
|
||||
"RFC1123",
|
||||
"RFC1123Z",
|
||||
"RFC3339",
|
||||
"RFC3339Nano",
|
||||
"RFC3339NanoZ",
|
||||
"RFC3339Z",
|
||||
"RFC8222",
|
||||
"RFC8222Z",
|
||||
"RFC850",
|
||||
"RubyDate",
|
||||
"UnixDate",
|
||||
},
|
||||
wantKinds: loz.Make(20, kind.Datetime),
|
||||
wantVals: lo.ToAnySlice([]time.Time{
|
||||
wantDtSecUTC, // ANSIC
|
||||
wantDtMinUTC, // DateHourMinute
|
||||
wantDtSecUTC, // DateHourMinuteSecond
|
||||
wantDtMilliMST, // ISO8601
|
||||
wantDtMilliUTC, // ISO8601Z
|
||||
wantDtSecMST, // RFC1123
|
||||
wantDtSecMST, // RFC1123Z
|
||||
wantDtSecMST, // RFC3339
|
||||
wantDtNanoMST, // RFC3339Nano
|
||||
wantDtNanoUTC, // RFC3339NanoZ
|
||||
wantDtSecUTC, // RFC3339Z
|
||||
wantDtMinMST, // RFC8222
|
||||
wantDtMinMST, // RFC8222Z
|
||||
wantDtSecMST, // RFC850
|
||||
wantDtSecMST, // RubyDate
|
||||
wantDtSecMST, // UnixDate
|
||||
}),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
tc := tc
|
||||
t.Run(tc.file, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
th := testh.New(t, testh.OptLongOpen())
|
||||
src := &source.Source{
|
||||
Handle: "@tsv/" + tc.file,
|
||||
Type: csv.TypeTSV,
|
||||
Location: filepath.Join("testdata", tc.file+".tsv"),
|
||||
}
|
||||
src = th.Add(src)
|
||||
|
||||
sink, err := th.QuerySLQ(src.Handle+".data", nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Equal(t, tc.wantHeaders, sink.RecMeta.MungedNames())
|
||||
require.Len(t, sink.Recs, 1)
|
||||
t.Log(sink.Recs[0])
|
||||
|
||||
for i, col := range sink.RecMeta.MungedNames() {
|
||||
i, col := i, col
|
||||
t.Run(col, func(t *testing.T) {
|
||||
assert.Equal(t, tc.wantKinds[i].String(), sink.RecMeta.Kinds()[i].String())
|
||||
if gotTime, ok := sink.Recs[0][i].(time.Time); ok {
|
||||
// REVISIT: If it's a time value, we want to compare UTC times.
|
||||
// This may actually be a bug.
|
||||
wantTime, ok := tc.wantVals[i].(time.Time)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, wantTime.Unix(), gotTime.Unix())
|
||||
assert.Equal(t, wantTime.UTC(), gotTime.UTC())
|
||||
} else {
|
||||
assert.EqualValues(t, tc.wantVals[i], sink.Recs[0][i])
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -247,8 +247,11 @@ func getDelimFromOptions(opts options.Options) (r rune, ok bool, err error) {
|
||||
return 0, false, nil
|
||||
}
|
||||
|
||||
val := OptDelim.Get(opts)
|
||||
if !OptDelim.IsSet(opts) {
|
||||
return 0, false, nil
|
||||
}
|
||||
|
||||
val := OptDelim.Get(opts)
|
||||
if len(val) == 1 {
|
||||
r, _ = utf8.DecodeRuneInString(val)
|
||||
return r, true, nil
|
||||
|
2
drivers/csv/testdata/test_date.tsv
vendored
Normal file
2
drivers/csv/testdata/test_date.tsv
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
Long Short d-mmm-yy mm-dd-yy "mmmm d, yyyy"
|
||||
"Thursday, November 9, 1989" 1989-11-09 9-Nov-89 11-09-89 "November 9, 1989"
|
|
2
drivers/csv/testdata/test_datetime.tsv
vendored
Normal file
2
drivers/csv/testdata/test_datetime.tsv
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
ANSIC DateHourMinute DateHourMinuteSecond ISO8601 ISO8601Z RFC1123 RFC1123Z RFC3339 RFC3339Nano RFC3339NanoZ RFC3339Z RFC8222 RFC8222Z RFC850 RubyDate UnixDate
|
||||
Thu Nov 9 15:17:59 1989 1989-11-09 15:17 1989-11-09 15:17:59 1989-11-09T15:17:59.123-07:00 1989-11-09T15:17:59.123Z Thu, 09 Nov 1989 15:17:59 MST Thu, 09 Nov 1989 15:17:59 -0700 1989-11-09T15:17:59-07:00 1989-11-09T15:17:59.1234567-07:00 1989-11-09T15:17:59.1234567Z 1989-11-09T15:17:59Z 09 Nov 89 15:17 MST 09 Nov 89 15:17 -0700 Thursday, 09-Nov-89 15:17:59 MST Thu Nov 09 15:17:59 -0700 1989 Thu Nov 9 15:17:59 MST 1989
|
|
2
drivers/csv/testdata/test_time.tsv
vendored
Normal file
2
drivers/csv/testdata/test_time.tsv
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
time1 time2 time3 time4 time5 time6
|
||||
3:17 PM 3:17PM 3:17 PM 3:17pm 15:17 15:17:59
|
|
@ -135,6 +135,7 @@ func TestKindFromDBTypeName(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
//nolint:lll
|
||||
func TestRecordMetadata(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
@ -156,22 +157,10 @@ func TestRecordMetadata(t *testing.T) {
|
||||
sqlz.RTypeNullTime,
|
||||
},
|
||||
colsMeta: []*source.ColMetadata{
|
||||
{
|
||||
Name: "actor_id", Position: 0, PrimaryKey: true, BaseType: "INTEGER", ColumnType: "INTEGER",
|
||||
Kind: kind.Int, Nullable: false,
|
||||
},
|
||||
{
|
||||
Name: "first_name", Position: 1, BaseType: "VARCHAR(45)", ColumnType: "VARCHAR(45)", Kind: kind.Text,
|
||||
Nullable: false,
|
||||
},
|
||||
{
|
||||
Name: "last_name", Position: 2, BaseType: "VARCHAR(45)", ColumnType: "VARCHAR(45)", Kind: kind.Text,
|
||||
Nullable: false,
|
||||
},
|
||||
{
|
||||
Name: "last_update", Position: 3, BaseType: "TIMESTAMP", ColumnType: "TIMESTAMP", Kind: kind.Datetime,
|
||||
Nullable: false, DefaultValue: "CURRENT_TIMESTAMP",
|
||||
},
|
||||
{Name: "actor_id", Position: 0, PrimaryKey: true, BaseType: "INTEGER", ColumnType: "INTEGER", Kind: kind.Int, Nullable: false},
|
||||
{Name: "first_name", Position: 1, BaseType: "VARCHAR(45)", ColumnType: "VARCHAR(45)", Kind: kind.Text, Nullable: false},
|
||||
{Name: "last_name", Position: 2, BaseType: "VARCHAR(45)", ColumnType: "VARCHAR(45)", Kind: kind.Text, Nullable: false},
|
||||
{Name: "last_update", Position: 3, BaseType: "TIMESTAMP", ColumnType: "TIMESTAMP", Kind: kind.Datetime, Nullable: false, DefaultValue: "CURRENT_TIMESTAMP"},
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -181,18 +170,9 @@ func TestRecordMetadata(t *testing.T) {
|
||||
colKinds: []kind.Kind{kind.Int, kind.Int, kind.Datetime},
|
||||
scanTypes: []reflect.Type{sqlz.RTypeNullInt64, sqlz.RTypeNullInt64, sqlz.RTypeNullTime},
|
||||
colsMeta: []*source.ColMetadata{
|
||||
{
|
||||
Name: "actor_id", Position: 0, PrimaryKey: true, BaseType: "INT", ColumnType: "INT", Kind: kind.Int,
|
||||
Nullable: false,
|
||||
},
|
||||
{
|
||||
Name: "film_id", Position: 1, PrimaryKey: true, BaseType: "INT", ColumnType: "INT", Kind: kind.Int,
|
||||
Nullable: false,
|
||||
},
|
||||
{
|
||||
Name: "last_update", Position: 2, BaseType: "TIMESTAMP", ColumnType: "TIMESTAMP", Kind: kind.Datetime,
|
||||
Nullable: false,
|
||||
},
|
||||
{Name: "actor_id", Position: 0, PrimaryKey: true, BaseType: "INT", ColumnType: "INT", Kind: kind.Int, Nullable: false},
|
||||
{Name: "film_id", Position: 1, PrimaryKey: true, BaseType: "INT", ColumnType: "INT", Kind: kind.Int, Nullable: false},
|
||||
{Name: "last_update", Position: 2, BaseType: "TIMESTAMP", ColumnType: "TIMESTAMP", Kind: kind.Datetime, Nullable: false},
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -205,31 +185,13 @@ func TestRecordMetadata(t *testing.T) {
|
||||
sqlz.RTypeNullInt64, sqlz.RTypeNullString, sqlz.RTypeNullTime, sqlz.RTypeNullTime,
|
||||
},
|
||||
colsMeta: []*source.ColMetadata{
|
||||
{
|
||||
Name: "payment_id", Position: 0, PrimaryKey: true, BaseType: "INT", ColumnType: "INT", Kind: kind.Int,
|
||||
Nullable: false,
|
||||
},
|
||||
{Name: "payment_id", Position: 0, PrimaryKey: true, BaseType: "INT", ColumnType: "INT", Kind: kind.Int, Nullable: false},
|
||||
{Name: "customer_id", Position: 1, BaseType: "INT", ColumnType: "INT", Kind: kind.Int, Nullable: false},
|
||||
{
|
||||
Name: "staff_id", Position: 2, BaseType: "SMALLINT", ColumnType: "SMALLINT", Kind: kind.Int,
|
||||
Nullable: false,
|
||||
},
|
||||
{
|
||||
Name: "rental_id", Position: 3, BaseType: "INT", ColumnType: "INT", Kind: kind.Int, Nullable: true,
|
||||
DefaultValue: "NULL",
|
||||
},
|
||||
{
|
||||
Name: "amount", Position: 4, BaseType: "DECIMAL(5,2)", ColumnType: "DECIMAL(5,2)", Kind: kind.Decimal,
|
||||
Nullable: false,
|
||||
},
|
||||
{
|
||||
Name: "payment_date", Position: 5, BaseType: "TIMESTAMP", ColumnType: "TIMESTAMP", Kind: kind.Datetime,
|
||||
Nullable: false,
|
||||
},
|
||||
{
|
||||
Name: "last_update", Position: 6, BaseType: "TIMESTAMP", ColumnType: "TIMESTAMP", Kind: kind.Datetime,
|
||||
Nullable: false,
|
||||
},
|
||||
{Name: "staff_id", Position: 2, BaseType: "SMALLINT", ColumnType: "SMALLINT", Kind: kind.Int, Nullable: false},
|
||||
{Name: "rental_id", Position: 3, BaseType: "INT", ColumnType: "INT", Kind: kind.Int, Nullable: true, DefaultValue: "NULL"},
|
||||
{Name: "amount", Position: 4, BaseType: "DECIMAL(5,2)", ColumnType: "DECIMAL(5,2)", Kind: kind.Decimal, Nullable: false},
|
||||
{Name: "payment_date", Position: 5, BaseType: "TIMESTAMP", ColumnType: "TIMESTAMP", Kind: kind.Datetime, Nullable: false},
|
||||
{Name: "last_update", Position: 6, BaseType: "TIMESTAMP", ColumnType: "TIMESTAMP", Kind: kind.Datetime, Nullable: false},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -7,6 +7,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/neilotoole/sq/libsq/core/record"
|
||||
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
"github.com/neilotoole/sq/libsq/core/timez"
|
||||
@ -147,24 +149,38 @@ func TestSakila_query(t *testing.T) {
|
||||
wantCols []string
|
||||
wantCount int
|
||||
wantKinds []kind.Kind
|
||||
wantRec0 record.Record
|
||||
}{
|
||||
{
|
||||
sheet: sakila.TblActor,
|
||||
wantCols: sakila.TblActorCols(),
|
||||
wantCount: sakila.TblActorCount,
|
||||
wantKinds: sakila.TblActorColKinds(),
|
||||
wantRec0: record.Record{
|
||||
int64(1), "PENELOPE", "GUINESS",
|
||||
time.Date(2020, time.February, 15, 6, 59, 28, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
{
|
||||
sheet: sakila.TblFilmActor,
|
||||
wantCols: sakila.TblFilmActorCols(),
|
||||
wantCount: sakila.TblFilmActorCount,
|
||||
wantKinds: sakila.TblFilmActorColKinds(),
|
||||
wantRec0: record.Record{
|
||||
int64(1), int64(1),
|
||||
time.Date(2020, time.February, 15, 6, 59, 32, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
{
|
||||
sheet: sakila.TblPayment,
|
||||
wantCols: sakila.TblPaymentCols(),
|
||||
wantCount: sakila.TblPaymentCount,
|
||||
wantKinds: sakila.TblPaymentColKinds(),
|
||||
wantRec0: record.Record{
|
||||
int64(1), int64(1), int64(1), int64(76), "2.99",
|
||||
time.Date(2005, time.May, 25, 11, 30, 37, 0, time.UTC),
|
||||
time.Date(2020, time.February, 15, 6, 59, 47, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -391,15 +391,19 @@ func detectKindDate(s string) (ok bool, format string) {
|
||||
time.DateOnly,
|
||||
"02 Jan 2006",
|
||||
"2006/01/02",
|
||||
"2006-01-02",
|
||||
"01-02-06",
|
||||
"01-02-2006",
|
||||
"02-Jan-2006",
|
||||
"2-Jan-2006",
|
||||
"2-Jan-06",
|
||||
"Jan _2, 2006",
|
||||
"Jan 2, 2006",
|
||||
timez.ExcelLongDate,
|
||||
"Mon, January 2, 2006",
|
||||
"Mon, Jan 2, 2006",
|
||||
"January 2, 2006",
|
||||
"_2/Jan/06",
|
||||
"2/Jan/06",
|
||||
}
|
||||
var err error
|
||||
|
||||
@ -423,10 +427,6 @@ var datetimeFormats = []string{
|
||||
time.RFC850,
|
||||
time.RFC1123Z,
|
||||
time.RFC1123,
|
||||
time.StampNano,
|
||||
time.StampMicro,
|
||||
time.StampMilli,
|
||||
time.Stamp,
|
||||
timez.DateHourMinuteSecond,
|
||||
timez.DateHourMinute,
|
||||
timez.ExcelLongDate,
|
||||
|
@ -11,7 +11,7 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDetectDatetime(t *testing.T) {
|
||||
func TestDetectKindDatetime(t *testing.T) {
|
||||
denver, err := time.LoadLocation("America/Denver")
|
||||
require.NoError(t, err)
|
||||
tm := time.Date(1989, 11, 9, 15, 17, 59, 123456700, denver)
|
||||
@ -30,3 +30,99 @@ func TestDetectDatetime(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectKindDate(t *testing.T) {
|
||||
valid := []string{
|
||||
"2006-01-02",
|
||||
"02 Jan 2006",
|
||||
"2006-01-02",
|
||||
"01-02-06",
|
||||
"01-02-2006",
|
||||
"02-Jan-2006",
|
||||
"2-Jan-2006",
|
||||
"2-Jan-06",
|
||||
"Jan 2, 2006",
|
||||
"Jan 2, 2006",
|
||||
"January 2, 2006",
|
||||
"02/Jan/06",
|
||||
"2/Jan/06",
|
||||
"Monday, January 2, 2006",
|
||||
"Mon, January 2, 2006",
|
||||
}
|
||||
|
||||
for i, input := range valid {
|
||||
input := input
|
||||
t.Run(tutil.Name("valid", i, input), func(t *testing.T) {
|
||||
t.Log(input)
|
||||
ok, gotF := detectKindDate(input)
|
||||
require.True(t, ok)
|
||||
|
||||
tm, err := time.Parse(gotF, input)
|
||||
require.NoError(t, err)
|
||||
t.Log(tm)
|
||||
})
|
||||
}
|
||||
|
||||
invalid := []string{
|
||||
"not a date",
|
||||
"",
|
||||
"2006",
|
||||
"January",
|
||||
"Monday",
|
||||
}
|
||||
|
||||
for i, input := range invalid {
|
||||
input := input
|
||||
t.Run(tutil.Name("invalid", i, input), func(t *testing.T) {
|
||||
t.Log(input)
|
||||
ok, gotF := detectKindDate(input)
|
||||
require.False(t, ok)
|
||||
require.Empty(t, gotF)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectKindTime(t *testing.T) {
|
||||
valid := []string{
|
||||
"3:17 PM",
|
||||
"3:17PM",
|
||||
"3:17 PM",
|
||||
"3:17pm",
|
||||
"15:17",
|
||||
"15:17:59",
|
||||
"3:17",
|
||||
"03:17",
|
||||
}
|
||||
|
||||
for i, input := range valid {
|
||||
input := input
|
||||
t.Run(tutil.Name("valid", i, input), func(t *testing.T) {
|
||||
t.Log(input)
|
||||
ok, gotF := detectKindTime(input)
|
||||
require.True(t, ok)
|
||||
|
||||
tm, err := time.Parse(gotF, input)
|
||||
require.NoError(t, err)
|
||||
t.Log(tm)
|
||||
})
|
||||
}
|
||||
|
||||
invalid := []string{
|
||||
"not a time",
|
||||
"",
|
||||
"3",
|
||||
"15",
|
||||
"15 17",
|
||||
"02-Jan-2006",
|
||||
}
|
||||
|
||||
for i, input := range invalid {
|
||||
input := input
|
||||
t.Run(tutil.Name("invalid", i, input), func(t *testing.T) {
|
||||
t.Log(input)
|
||||
ok, gotF := detectKindTime(input)
|
||||
require.False(t, ok)
|
||||
require.Empty(t, gotF)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -63,31 +63,27 @@ func TestKind(t *testing.T) {
|
||||
}
|
||||
|
||||
const (
|
||||
fixtTime1 = "00:00:00"
|
||||
fixtTime2 = "08:30:05"
|
||||
fixtTime3 = "15:30"
|
||||
fixtTime4 = "7:15PM"
|
||||
fixtTime5 = "7:15 PM"
|
||||
fixtTime6 = "7:15pm"
|
||||
fixtTime7 = "7:15 pm"
|
||||
fixtDate1 = "1970-01-01"
|
||||
fixtDate2 = "1989-11-09"
|
||||
fixtDate3 = "02 Jan 2006"
|
||||
fixtDate4 = "2006/01/02"
|
||||
fixtDate5 = "01-02-2006"
|
||||
fixtDate6 = "02-Jan-2006"
|
||||
fixtDate7 = "Jan 2, 2006"
|
||||
fixtDate8 = "Jan 20, 2006"
|
||||
fixtDateExcelLong = "Wednesday, March 1, 2023"
|
||||
fixtDatetime1 = "1970-01-01T00:00:00Z" // RFC3339Nano
|
||||
fixtDatetime2 = "1989-11-09T00:00:00Z"
|
||||
fixtDatetimeAnsic = "Mon Jan 2 15:04:05 2006"
|
||||
fixtDatetimeUnix = "Mon Jan 2 15:04:05 MST 2006"
|
||||
fixtDatetimeRFC3339 = "2002-10-02T10:00:00-05:00"
|
||||
fixtDatetimeStamp = "Jan 2 15:04:05"
|
||||
fixtDatetimeStampMilli = "Jan 2 15:04:05.000"
|
||||
fixtDatetimeStampMicro = "Jan 2 15:04:05.000000"
|
||||
fixtDatetimeStampNano = "Jan 2 15:04:05.000000000"
|
||||
fixtTime1 = "00:00:00"
|
||||
fixtTime2 = "08:30:05"
|
||||
fixtTime3 = "15:30"
|
||||
fixtTime4 = "7:15PM"
|
||||
fixtTime5 = "7:15 PM"
|
||||
fixtTime6 = "7:15pm"
|
||||
fixtTime7 = "7:15 pm"
|
||||
fixtDate1 = "1970-01-01"
|
||||
fixtDate2 = "1989-11-09"
|
||||
fixtDate3 = "02 Jan 2006"
|
||||
fixtDate4 = "2006/01/02"
|
||||
fixtDate5 = "01-02-2006"
|
||||
fixtDate6 = "02-Jan-2006"
|
||||
fixtDate7 = "Jan 2, 2006"
|
||||
fixtDate8 = "Jan 20, 2006"
|
||||
fixtDateExcelLong = "Wednesday, March 1, 2023"
|
||||
fixtDatetime1 = "1970-01-01T00:00:00Z" // RFC3339Nano
|
||||
fixtDatetime2 = "1989-11-09T00:00:00Z"
|
||||
fixtDatetimeAnsic = "Mon Jan 2 15:04:05 2006"
|
||||
fixtDatetimeUnix = "Mon Jan 2 15:04:05 MST 2006"
|
||||
fixtDatetimeRFC3339 = "2002-10-02T10:00:00-05:00"
|
||||
)
|
||||
|
||||
func TestDetector(t *testing.T) {
|
||||
@ -145,10 +141,6 @@ func TestDetector(t *testing.T) {
|
||||
{in: []any{time.RFC1123}, want: kind.Datetime, wantMunge: true},
|
||||
{in: []any{time.RFC1123Z}, want: kind.Datetime, wantMunge: true},
|
||||
{in: []any{fixtDatetimeRFC3339}, want: kind.Datetime, wantMunge: true},
|
||||
{in: []any{fixtDatetimeStamp}, want: kind.Datetime, wantMunge: true},
|
||||
{in: []any{fixtDatetimeStampMilli}, want: kind.Datetime, wantMunge: true},
|
||||
{in: []any{fixtDatetimeStampMicro}, want: kind.Datetime, wantMunge: true},
|
||||
{in: []any{fixtDatetimeStampNano}, want: kind.Datetime, wantMunge: true},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
|
Loading…
Reference in New Issue
Block a user