2020-08-06 20:58:47 +03:00
|
|
|
package csv
|
|
|
|
|
|
|
|
import (
|
2020-08-23 13:42:15 +03:00
|
|
|
"bytes"
|
2020-08-06 20:58:47 +03:00
|
|
|
"context"
|
|
|
|
"encoding/csv"
|
|
|
|
"fmt"
|
2022-12-18 02:11:33 +03:00
|
|
|
"io"
|
2020-08-06 20:58:47 +03:00
|
|
|
"strings"
|
|
|
|
"testing"
|
|
|
|
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
|
2020-08-23 13:42:15 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/kind"
|
2023-11-20 04:06:36 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/stringz"
|
2024-01-15 04:45:34 +03:00
|
|
|
"github.com/neilotoole/sq/testh/tu"
|
2020-08-06 20:58:47 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
func Test_isCSV(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
const (
|
|
|
|
comma = ','
|
|
|
|
tab = '\t'
|
|
|
|
)
|
|
|
|
|
|
|
|
testCases := []struct {
|
|
|
|
delim rune
|
|
|
|
want float32
|
|
|
|
input string
|
|
|
|
}{
|
|
|
|
{delim: comma, input: "", want: scoreNo},
|
|
|
|
{delim: comma, input: "a", want: scoreMaybe},
|
|
|
|
{delim: comma, input: "a,b", want: scoreMaybe},
|
|
|
|
{delim: comma, input: "a,b\n", want: scoreMaybe},
|
|
|
|
{delim: comma, input: "a,b\na,b", want: scoreProbably},
|
|
|
|
{delim: comma, input: "a,b,c\na,b,c\na,b,c", want: scoreYes},
|
|
|
|
{delim: comma, input: "a,b\na,b,c", want: scoreNo}, // Fields per record not equal
|
|
|
|
{delim: tab, input: "", want: scoreNo},
|
|
|
|
{delim: tab, input: "a", want: scoreMaybe},
|
|
|
|
{delim: tab, input: "a\tb", want: scoreMaybe},
|
|
|
|
{delim: tab, input: "a\tb\n", want: scoreMaybe},
|
|
|
|
{delim: tab, input: "a\tb\na\tb", want: scoreProbably},
|
|
|
|
{delim: tab, input: "a\tb\tc\na\tb\tc\na\tb\tc", want: scoreYes},
|
|
|
|
{delim: tab, input: "a\tb\na\tb\tc", want: scoreNo}, // Fields per record not equal
|
|
|
|
}
|
|
|
|
|
|
|
|
for i, tc := range testCases {
|
|
|
|
tc := tc
|
|
|
|
|
|
|
|
t.Run(fmt.Sprintf("%d %s", i, tc.input), func(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
cr := csv.NewReader(&crFilterReader{r: strings.NewReader(tc.input)})
|
|
|
|
cr.Comma = tc.delim
|
|
|
|
|
|
|
|
got := isCSV(context.Background(), cr)
|
|
|
|
require.Equal(t, tc.want, got)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-09 17:44:27 +03:00
|
|
|
func Test_detectColKinds(t *testing.T) {
|
2020-08-06 20:58:47 +03:00
|
|
|
testCases := []struct {
|
2023-04-09 17:44:27 +03:00
|
|
|
name string
|
|
|
|
recs [][]string
|
|
|
|
wantKinds []kind.Kind
|
|
|
|
wantErr bool
|
2020-08-06 20:58:47 +03:00
|
|
|
}{
|
|
|
|
{
|
2023-04-09 17:44:27 +03:00
|
|
|
name: "empty",
|
|
|
|
recs: [][]string{},
|
|
|
|
wantErr: true,
|
2020-08-06 20:58:47 +03:00
|
|
|
},
|
|
|
|
{
|
2023-04-09 17:44:27 +03:00
|
|
|
name: "basic",
|
|
|
|
recs: [][]string{
|
2020-08-06 20:58:47 +03:00
|
|
|
{"1", "true", "hello", "0.0"},
|
|
|
|
{"2", "false", "world", "1"},
|
|
|
|
{"3", "true", "", "7.7"},
|
|
|
|
{"", "", "", ""},
|
|
|
|
},
|
2020-08-23 13:42:15 +03:00
|
|
|
wantKinds: []kind.Kind{kind.Int, kind.Bool, kind.Text, kind.Decimal},
|
2020-08-06 20:58:47 +03:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
for i, tc := range testCases {
|
|
|
|
tc := tc
|
2024-01-15 04:45:34 +03:00
|
|
|
t.Run(tu.Name(i, tc.name), func(t *testing.T) {
|
2023-04-09 17:44:27 +03:00
|
|
|
gotKinds, _, gotErr := detectColKinds(tc.recs)
|
|
|
|
if tc.wantErr {
|
|
|
|
require.Error(t, gotErr)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
require.NoError(t, gotErr)
|
|
|
|
require.Equal(t, stringz.Strings(tc.wantKinds), stringz.Strings(gotKinds))
|
2020-08-06 20:58:47 +03:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2020-08-23 13:42:15 +03:00
|
|
|
|
|
|
|
func TestCRFilterReader(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
testCases := []struct {
|
|
|
|
in string
|
|
|
|
want string
|
|
|
|
}{
|
|
|
|
{"", ""},
|
|
|
|
{"\r", "\n"},
|
|
|
|
{"\r\n", "\r\n"},
|
|
|
|
{"\r\r\n", "\n\r\n"},
|
|
|
|
{"a\rb\rc", "a\nb\nc"},
|
|
|
|
{" \r ", " \n "},
|
|
|
|
{" \r\n\n", " \r\n\n"},
|
|
|
|
{"\r \n", "\n \n"},
|
|
|
|
{"abc\r", "abc\n"},
|
|
|
|
{"abc\r\n\r", "abc\r\n\n"},
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, tc := range testCases {
|
|
|
|
filter := &crFilterReader{r: bytes.NewReader([]byte(tc.in))}
|
2022-12-18 02:11:33 +03:00
|
|
|
actual, err := io.ReadAll(filter)
|
2020-08-23 13:42:15 +03:00
|
|
|
require.Nil(t, err)
|
|
|
|
require.Equal(t, tc.want, string(actual))
|
|
|
|
}
|
|
|
|
}
|