2020-08-23 13:42:15 +03:00
|
|
|
package json
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2020-08-24 05:32:59 +03:00
|
|
|
"fmt"
|
2020-10-20 18:05:43 +03:00
|
|
|
"io"
|
2020-08-23 13:42:15 +03:00
|
|
|
"os"
|
|
|
|
"testing"
|
|
|
|
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
|
2020-08-24 05:32:59 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/kind"
|
2020-10-20 18:05:43 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/driver"
|
2024-01-25 09:29:55 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/files"
|
2020-10-20 18:05:43 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/source"
|
2020-08-23 13:42:15 +03:00
|
|
|
"github.com/neilotoole/sq/testh/sakila"
|
|
|
|
)
|
|
|
|
|
2022-12-18 10:18:35 +03:00
|
|
|
// export for testing.
|
2020-10-20 18:05:43 +03:00
|
|
|
var (
|
2024-01-15 04:45:34 +03:00
|
|
|
IngestJSON = ingestJSON
|
|
|
|
IngestJSONA = ingestJSONA
|
|
|
|
IngestJSONL = ingestJSONL
|
2020-10-20 18:05:43 +03:00
|
|
|
ColumnOrderFlat = columnOrderFlat
|
2024-01-15 04:45:34 +03:00
|
|
|
NewIngestJob = newIngestJob
|
2020-10-20 18:05:43 +03:00
|
|
|
)
|
|
|
|
|
2024-01-15 04:45:34 +03:00
|
|
|
// newIngestJob is a constructor for the unexported ingestJob type.
|
2020-10-20 18:05:43 +03:00
|
|
|
// If sampleSize <= 0, a default value is used.
|
2024-01-25 09:29:55 +03:00
|
|
|
func newIngestJob(fromSrc *source.Source, newRdrFn files.NewReaderFunc, destGrip driver.Grip, sampleSize int,
|
2022-12-18 11:35:59 +03:00
|
|
|
flatten bool,
|
2024-01-25 19:21:56 +03:00
|
|
|
) *ingestJob {
|
2020-10-20 18:05:43 +03:00
|
|
|
if sampleSize <= 0 {
|
2023-07-04 20:31:47 +03:00
|
|
|
sampleSize = driver.OptIngestSampleSize.Get(fromSrc.Options)
|
2020-10-20 18:05:43 +03:00
|
|
|
}
|
|
|
|
|
2024-01-25 19:21:56 +03:00
|
|
|
return &ingestJob{
|
2020-10-20 18:05:43 +03:00
|
|
|
fromSrc: fromSrc,
|
2024-01-25 07:01:24 +03:00
|
|
|
newRdrFn: newRdrFn,
|
2024-01-15 04:45:34 +03:00
|
|
|
destGrip: destGrip,
|
2020-10-20 18:05:43 +03:00
|
|
|
sampleSize: sampleSize,
|
|
|
|
flatten: flatten,
|
2024-01-25 19:21:56 +03:00
|
|
|
stmtCache: map[string]*driver.StmtExecer{},
|
2020-10-20 18:05:43 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestDetectColKindsJSONA(t *testing.T) {
|
2024-01-15 04:45:34 +03:00
|
|
|
t.Parallel()
|
|
|
|
|
2020-08-24 05:32:59 +03:00
|
|
|
testCases := []struct {
|
|
|
|
tbl string
|
|
|
|
wantKinds []kind.Kind
|
|
|
|
}{
|
|
|
|
{tbl: sakila.TblActor, wantKinds: sakila.TblActorColKinds()},
|
|
|
|
{tbl: sakila.TblFilmActor, wantKinds: sakila.TblFilmActorColKinds()},
|
|
|
|
{tbl: sakila.TblPayment, wantKinds: sakila.TblPaymentColKinds()},
|
|
|
|
}
|
2020-08-23 13:42:15 +03:00
|
|
|
|
2020-08-24 05:32:59 +03:00
|
|
|
for _, tc := range testCases {
|
|
|
|
tc := tc
|
|
|
|
|
|
|
|
t.Run(tc.tbl, func(t *testing.T) {
|
2024-01-15 04:45:34 +03:00
|
|
|
t.Parallel()
|
|
|
|
|
2020-08-24 05:32:59 +03:00
|
|
|
f, err := os.Open(fmt.Sprintf("testdata/%s.jsona", tc.tbl))
|
|
|
|
require.NoError(t, err)
|
|
|
|
t.Cleanup(func() { require.NoError(t, f.Close()) })
|
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
kinds, _, err := detectColKindsJSONA(context.Background(), f, 1000)
|
2020-08-24 05:32:59 +03:00
|
|
|
require.NoError(t, err)
|
|
|
|
require.Equal(t, tc.wantKinds, kinds)
|
|
|
|
})
|
|
|
|
}
|
2020-08-23 13:42:15 +03:00
|
|
|
}
|
2020-10-20 18:05:43 +03:00
|
|
|
|
|
|
|
// ScanObjectsInArray is a convenience function
|
|
|
|
// for objectsInArrayScanner.
|
2022-12-17 02:34:33 +03:00
|
|
|
func ScanObjectsInArray(r io.Reader) (objs []map[string]any, chunks [][]byte, err error) {
|
2020-10-20 18:05:43 +03:00
|
|
|
sc := newObjectInArrayScanner(r)
|
|
|
|
|
|
|
|
for {
|
2022-12-17 02:34:33 +03:00
|
|
|
var obj map[string]any
|
2020-10-20 18:05:43 +03:00
|
|
|
var chunk []byte
|
|
|
|
|
|
|
|
obj, chunk, err = sc.next()
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if obj == nil {
|
|
|
|
// No more objects to be scanned
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
objs = append(objs, obj)
|
|
|
|
chunks = append(chunks, chunk)
|
|
|
|
}
|
|
|
|
|
|
|
|
return objs, chunks, nil
|
|
|
|
}
|