2020-08-06 20:58:47 +03:00
|
|
|
|
package xmlud_test
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bytes"
|
|
|
|
|
"testing"
|
|
|
|
|
|
2023-04-26 18:16:42 +03:00
|
|
|
|
"github.com/neilotoole/sq/libsq/core/ioz"
|
|
|
|
|
|
2023-04-09 17:44:27 +03:00
|
|
|
|
"github.com/neilotoole/sq/libsq/core/stringz"
|
2022-12-18 10:18:35 +03:00
|
|
|
|
|
2021-01-02 04:57:07 +03:00
|
|
|
|
"github.com/stretchr/testify/assert"
|
2020-08-06 20:58:47 +03:00
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
|
|
|
|
|
|
"github.com/neilotoole/sq/cli/config"
|
|
|
|
|
"github.com/neilotoole/sq/drivers/userdriver/xmlud"
|
|
|
|
|
"github.com/neilotoole/sq/testh"
|
|
|
|
|
"github.com/neilotoole/sq/testh/proj"
|
|
|
|
|
"github.com/neilotoole/sq/testh/testsrc"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
driverRSS = "rss"
|
|
|
|
|
driverPpl = "ppl"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func TestImport_Ppl(t *testing.T) {
|
|
|
|
|
th := testh.New(t)
|
|
|
|
|
|
|
|
|
|
ext := &config.Ext{}
|
2023-04-26 18:16:42 +03:00
|
|
|
|
require.NoError(t, ioz.UnmarshallYAML(proj.ReadFile(testsrc.PathDriverDefPpl), ext))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
require.Equal(t, 1, len(ext.UserDrivers))
|
|
|
|
|
udDef := ext.UserDrivers[0]
|
|
|
|
|
require.Equal(t, driverPpl, udDef.Name)
|
|
|
|
|
require.Equal(t, xmlud.Genre, udDef.Genre)
|
|
|
|
|
|
|
|
|
|
scratchDB, err := th.Databases().OpenScratch(th.Context, "ppl")
|
|
|
|
|
require.NoError(t, err)
|
2021-01-02 04:57:07 +03:00
|
|
|
|
t.Cleanup(func() {
|
|
|
|
|
assert.NoError(t, scratchDB.Close())
|
|
|
|
|
})
|
2020-08-06 20:58:47 +03:00
|
|
|
|
|
|
|
|
|
data := proj.ReadFile("drivers/userdriver/xmlud/testdata/people.xml")
|
2023-04-02 22:49:45 +03:00
|
|
|
|
err = xmlud.Import(th.Context, udDef, bytes.NewReader(data), scratchDB)
|
2020-08-06 20:58:47 +03:00
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
|
|
srcMeta, err := scratchDB.SourceMetadata(th.Context)
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
require.Equal(t, 2, len(srcMeta.Tables))
|
|
|
|
|
require.Equal(t, "person", srcMeta.Tables[0].Name)
|
|
|
|
|
require.Equal(t, "skill", srcMeta.Tables[1].Name)
|
|
|
|
|
|
|
|
|
|
sink, err := th.QuerySQL(scratchDB.Source(), "SELECT * FROM person")
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
require.Equal(t, 3, len(sink.Recs))
|
2023-04-09 17:44:27 +03:00
|
|
|
|
require.Equal(t, "Nikola", stringz.Val(sink.Recs[0][1]))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
for i, rec := range sink.Recs {
|
|
|
|
|
// Verify that the primary id cols are sequential
|
2023-04-09 17:44:27 +03:00
|
|
|
|
require.Equal(t, int64(i+1), stringz.Val(rec[0]))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sink, err = th.QuerySQL(scratchDB.Source(), "SELECT * FROM skill")
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
require.Equal(t, 6, len(sink.Recs))
|
2023-04-09 17:44:27 +03:00
|
|
|
|
require.Equal(t, "Electrifying", stringz.Val(sink.Recs[0][2]))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
for i, rec := range sink.Recs {
|
|
|
|
|
// Verify that the primary id cols are sequential
|
2023-04-09 17:44:27 +03:00
|
|
|
|
require.Equal(t, int64(i+1), stringz.Val(rec[0]))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestImport_RSS(t *testing.T) {
|
|
|
|
|
th := testh.New(t)
|
|
|
|
|
|
|
|
|
|
ext := &config.Ext{}
|
2023-04-26 18:16:42 +03:00
|
|
|
|
require.NoError(t, ioz.UnmarshallYAML(proj.ReadFile(testsrc.PathDriverDefRSS), ext))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
require.Equal(t, 1, len(ext.UserDrivers))
|
|
|
|
|
udDef := ext.UserDrivers[0]
|
|
|
|
|
require.Equal(t, driverRSS, udDef.Name)
|
|
|
|
|
require.Equal(t, xmlud.Genre, udDef.Genre)
|
|
|
|
|
|
|
|
|
|
scratchDB, err := th.Databases().OpenScratch(th.Context, "rss")
|
|
|
|
|
require.NoError(t, err)
|
2021-01-02 04:57:07 +03:00
|
|
|
|
t.Cleanup(func() {
|
|
|
|
|
assert.NoError(t, scratchDB.Close())
|
|
|
|
|
})
|
2020-08-06 20:58:47 +03:00
|
|
|
|
|
|
|
|
|
data := proj.ReadFile("drivers/userdriver/xmlud/testdata/nytimes_local.rss.xml")
|
2023-04-02 22:49:45 +03:00
|
|
|
|
err = xmlud.Import(th.Context, udDef, bytes.NewReader(data), scratchDB)
|
2020-08-06 20:58:47 +03:00
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
|
|
srcMeta, err := scratchDB.SourceMetadata(th.Context)
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
require.Equal(t, 3, len(srcMeta.Tables))
|
|
|
|
|
require.Equal(t, "category", srcMeta.Tables[0].Name)
|
|
|
|
|
require.Equal(t, "channel", srcMeta.Tables[1].Name)
|
|
|
|
|
require.Equal(t, "item", srcMeta.Tables[2].Name)
|
|
|
|
|
|
|
|
|
|
sink, err := th.QuerySQL(scratchDB.Source(), "SELECT * FROM channel")
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
require.Equal(t, 1, len(sink.Recs))
|
2023-04-09 17:44:27 +03:00
|
|
|
|
require.Equal(t, "NYT > World", stringz.Val(sink.Recs[0][1]))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
for i, rec := range sink.Recs {
|
|
|
|
|
// Verify that the primary id cols are sequential
|
2023-04-09 17:44:27 +03:00
|
|
|
|
require.Equal(t, int64(i+1), stringz.Val(rec[0]))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sink, err = th.QuerySQL(scratchDB.Source(), "SELECT * FROM category")
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
require.Equal(t, 251, len(sink.Recs))
|
2023-04-09 17:44:27 +03:00
|
|
|
|
require.EqualValues(t, "Extradition", stringz.Val(sink.Recs[0][2]))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
for i, rec := range sink.Recs {
|
|
|
|
|
// Verify that the primary id cols are sequential
|
2023-04-09 17:44:27 +03:00
|
|
|
|
require.Equal(t, int64(i+1), stringz.Val(rec[0]))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sink, err = th.QuerySQL(scratchDB.Source(), "SELECT * FROM item")
|
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
require.Equal(t, 45, len(sink.Recs))
|
2022-12-18 10:18:35 +03:00
|
|
|
|
require.EqualValues(t, "Trilobites: Fishing for Clues to Solve Namibia’s Fairy Circle Mystery",
|
2023-04-09 17:44:27 +03:00
|
|
|
|
stringz.Val(sink.Recs[17][4]))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
for i, rec := range sink.Recs {
|
|
|
|
|
// Verify that the primary id cols are sequential
|
2023-04-09 17:44:27 +03:00
|
|
|
|
require.Equal(t, int64(i+1), stringz.Val(rec[0]))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
}
|
|
|
|
|
}
|