sq/drivers/csv/internal_test.go
Neil O'Toole ad23d5c116
Auto-detect CSV headers (#188)
* wip: initial header detection

* wip: refactored csv driver

* wip: Fixes to kind.Detector

* typo

* Refactor pkg csv files

* Update to changelog

* workflow: now running codacy and codeql on cron, not on push/pr
2023-04-09 08:44:27 -06:00

130 lines
2.8 KiB
Go

package csv
import (
"bytes"
"context"
"encoding/csv"
"fmt"
"io"
"strings"
"testing"
"github.com/neilotoole/sq/libsq/core/stringz"
"github.com/neilotoole/sq/testh/tutil"
"github.com/stretchr/testify/require"
"github.com/neilotoole/sq/libsq/core/kind"
)
func Test_isCSV(t *testing.T) {
t.Parallel()
const (
comma = ','
tab = '\t'
)
testCases := []struct {
delim rune
want float32
input string
}{
{delim: comma, input: "", want: scoreNo},
{delim: comma, input: "a", want: scoreMaybe},
{delim: comma, input: "a,b", want: scoreMaybe},
{delim: comma, input: "a,b\n", want: scoreMaybe},
{delim: comma, input: "a,b\na,b", want: scoreProbably},
{delim: comma, input: "a,b,c\na,b,c\na,b,c", want: scoreYes},
{delim: comma, input: "a,b\na,b,c", want: scoreNo}, // Fields per record not equal
{delim: tab, input: "", want: scoreNo},
{delim: tab, input: "a", want: scoreMaybe},
{delim: tab, input: "a\tb", want: scoreMaybe},
{delim: tab, input: "a\tb\n", want: scoreMaybe},
{delim: tab, input: "a\tb\na\tb", want: scoreProbably},
{delim: tab, input: "a\tb\tc\na\tb\tc\na\tb\tc", want: scoreYes},
{delim: tab, input: "a\tb\na\tb\tc", want: scoreNo}, // Fields per record not equal
}
for i, tc := range testCases {
tc := tc
t.Run(fmt.Sprintf("%d %s", i, tc.input), func(t *testing.T) {
t.Parallel()
cr := csv.NewReader(&crFilterReader{r: strings.NewReader(tc.input)})
cr.Comma = tc.delim
got := isCSV(context.Background(), cr)
require.Equal(t, tc.want, got)
})
}
}
func Test_detectColKinds(t *testing.T) {
testCases := []struct {
name string
recs [][]string
wantKinds []kind.Kind
wantErr bool
}{
{
name: "empty",
recs: [][]string{},
wantErr: true,
},
{
name: "basic",
recs: [][]string{
{"1", "true", "hello", "0.0"},
{"2", "false", "world", "1"},
{"3", "true", "", "7.7"},
{"", "", "", ""},
},
wantKinds: []kind.Kind{kind.Int, kind.Bool, kind.Text, kind.Decimal},
},
}
for i, tc := range testCases {
tc := tc
t.Run(tutil.Name(i, tc.name), func(t *testing.T) {
gotKinds, _, gotErr := detectColKinds(tc.recs)
if tc.wantErr {
require.Error(t, gotErr)
return
}
require.NoError(t, gotErr)
require.Equal(t, stringz.Strings(tc.wantKinds), stringz.Strings(gotKinds))
})
}
}
func TestCRFilterReader(t *testing.T) {
t.Parallel()
testCases := []struct {
in string
want string
}{
{"", ""},
{"\r", "\n"},
{"\r\n", "\r\n"},
{"\r\r\n", "\n\r\n"},
{"a\rb\rc", "a\nb\nc"},
{" \r ", " \n "},
{" \r\n\n", " \r\n\n"},
{"\r \n", "\n \n"},
{"abc\r", "abc\n"},
{"abc\r\n\r", "abc\r\n\n"},
}
for _, tc := range testCases {
filter := &crFilterReader{r: bytes.NewReader([]byte(tc.in))}
actual, err := io.ReadAll(filter)
require.Nil(t, err)
require.Equal(t, tc.want, string(actual))
}
}