mirror of
https://github.com/hasura/graphql-engine.git
synced 2025-01-05 22:34:22 +03:00
Export Chinook dataset to JSON lines for Athena consumption
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6489 GitOrigin-RevId: 36a068179d595f3d838995fb057b8ee590d1fd02
This commit is contained in:
parent
ef386ccee1
commit
792ef3117d
@ -102,7 +102,10 @@ test-suite tests-dc-api
|
||||
data-fix,
|
||||
deepseq,
|
||||
dc-api,
|
||||
directory,
|
||||
file-embed,
|
||||
filepath,
|
||||
graphql-parser,
|
||||
hashable,
|
||||
hspec,
|
||||
hspec-core,
|
||||
@ -119,6 +122,7 @@ test-suite tests-dc-api
|
||||
servant-client-core,
|
||||
servant-openapi3,
|
||||
text,
|
||||
time,
|
||||
unordered-containers,
|
||||
vector,
|
||||
xml-conduit,
|
||||
@ -134,6 +138,7 @@ test-suite tests-dc-api
|
||||
Command
|
||||
Paths_dc_api
|
||||
Test.Data
|
||||
Test.DataExport
|
||||
Test.CapabilitiesSpec
|
||||
Test.Expectations
|
||||
Test.HealthSpec
|
||||
|
@ -3,6 +3,8 @@ module Command
|
||||
TestConfig (..),
|
||||
NameCasing (..),
|
||||
TestOptions (..),
|
||||
ExportDataConfig (..),
|
||||
ExportFormat (..),
|
||||
parseCommandLine,
|
||||
)
|
||||
where
|
||||
@ -22,6 +24,7 @@ import Prelude
|
||||
data Command
|
||||
= Test TestOptions
|
||||
| ExportOpenAPISpec
|
||||
| ExportData ExportDataConfig
|
||||
|
||||
data TestConfig = TestConfig
|
||||
{ _tcTableNamePrefix :: [Text],
|
||||
@ -45,6 +48,17 @@ data TestOptions = TestOptions
|
||||
_toExportMatchStrings :: Bool
|
||||
}
|
||||
|
||||
data ExportDataConfig = ExportDataConfig
|
||||
{ _edcDirectory :: FilePath,
|
||||
_edcFormat :: ExportFormat,
|
||||
_edcDateTimeFormat :: Maybe String
|
||||
}
|
||||
|
||||
data ExportFormat
|
||||
= JSON
|
||||
| JSONLines
|
||||
deriving (Eq, Show, Read)
|
||||
|
||||
parseCommandLine :: IO Command
|
||||
parseCommandLine =
|
||||
execParser $
|
||||
@ -69,7 +83,7 @@ version =
|
||||
commandParser :: Parser Command
|
||||
commandParser =
|
||||
subparser
|
||||
(testCommand <> exportOpenApiSpecCommand)
|
||||
(testCommand <> exportOpenApiSpecCommand <> exportData)
|
||||
where
|
||||
testCommand =
|
||||
command
|
||||
@ -85,6 +99,13 @@ commandParser =
|
||||
(helper <*> pure ExportOpenAPISpec)
|
||||
(progDesc "Exports the OpenAPI specification of the Data Connector API that agents must implement")
|
||||
)
|
||||
exportData =
|
||||
command
|
||||
"export-data"
|
||||
( info
|
||||
(helper <*> (ExportData <$> exportDataConfigParser))
|
||||
(progDesc "Exports the Chinook dataset to files in the specified directory")
|
||||
)
|
||||
|
||||
testConfigParser :: Parser TestConfig
|
||||
testConfigParser =
|
||||
@ -167,6 +188,30 @@ testOptionsParser =
|
||||
testCommandParser :: Parser Command
|
||||
testCommandParser = Test <$> testOptionsParser
|
||||
|
||||
exportDataConfigParser :: Parser ExportDataConfig
|
||||
exportDataConfigParser =
|
||||
ExportDataConfig
|
||||
<$> strOption
|
||||
( long "directory"
|
||||
<> short 'd'
|
||||
<> metavar "DIR"
|
||||
<> help "The directory to export the data files into"
|
||||
)
|
||||
<*> option
|
||||
auto
|
||||
( long "format"
|
||||
<> short 'f'
|
||||
<> metavar "FORMAT"
|
||||
<> help "The format to export (JSON or JSONLines)"
|
||||
)
|
||||
<*> optional
|
||||
( strOption
|
||||
( long "datetime-format"
|
||||
<> metavar "FORMAT"
|
||||
<> help "Format string to use when formatting DateTime columns (use format syntax from https://hackage.haskell.org/package/time-1.12.2/docs/Data-Time-Format.html#v:formatTime)"
|
||||
)
|
||||
)
|
||||
|
||||
baseUrl :: ReadM BaseUrl
|
||||
baseUrl = eitherReader $ left show . parseBaseUrl
|
||||
|
||||
|
@ -18,6 +18,7 @@ import Servant.API (NamedRoutes)
|
||||
import Servant.Client (Client, ClientError, hoistClient, mkClientEnv, runClientM)
|
||||
import Test.CapabilitiesSpec qualified
|
||||
import Test.Data (TestData, guardedCapabilities, mkTestData)
|
||||
import Test.DataExport (exportData)
|
||||
import Test.ErrorSpec qualified
|
||||
import Test.ExplainSpec qualified
|
||||
import Test.HealthSpec qualified
|
||||
@ -62,6 +63,8 @@ main = do
|
||||
traverse_ ((traverse_ putStrLn) . (foldPaths . extractLabels)) tree
|
||||
ExportOpenAPISpec ->
|
||||
Text.putStrLn $ encodeToLazyText openApiSchema
|
||||
ExportData config ->
|
||||
exportData config
|
||||
|
||||
pure ()
|
||||
|
||||
|
@ -3,7 +3,7 @@ This test suite provides a set of tests that is able to test any Data Connector
|
||||
|
||||
Not all tests will be appropriate for all agents. Agents self-describe their capabilities and only the tests appropriate for those capabilities will be run.
|
||||
|
||||
The executable also has the ability to export the OpenAPI spec of the Data Connector agent API so that customers can use that to ensure their agent complies with the API format.
|
||||
The executable also has the ability to export the OpenAPI spec of the Data Connector agent API so that customers can use that to ensure their agent complies with the API format. In addition, the Chinook data set can be exported to files on disk in various formats.
|
||||
|
||||
## How to Use
|
||||
First, start your Data Connector agent and ensure it is populated with the [Chinook data set](https://github.com/lerocha/chinook-database/). For example, you could start the Reference Agent by following the instructions in [its README](../../dc-agents/reference/README.md).
|
||||
@ -29,3 +29,14 @@ To export the OpenAPI spec, you can run this command, and the spec will be writt
|
||||
```
|
||||
> cabal run test:tests-dc-api -- export-openapi-spec
|
||||
```
|
||||
|
||||
To export the Chinook data set, you can run this command:
|
||||
```
|
||||
> cabal run test:tests-dc-api -- export-data -d /tmp/chinook-data -f JSONLines
|
||||
```
|
||||
|
||||
This will export the data into the directory specified by `-d` in the `JSONLines` format (`-f`) which is as a JSON object per row, newline separated. Each table's data will be exported into a separate file.
|
||||
|
||||
If you need to customize the format of any DateTime columns, you can use the `--datetime-format` option and specify a format string using the syntax specified [here](https://hackage.haskell.org/package/time-1.12.2/docs/Data-Time-Format.html#v:formatTime). By default the DateTime columns are exported in ISO8601 format.
|
||||
|
||||
The other format supported by `-f` is `JSON`, which results in each file being a JSON array of table rows as JSON objects.
|
||||
|
@ -5,6 +5,8 @@ module Test.Data
|
||||
( -- = Test Data
|
||||
TestData (..),
|
||||
mkTestData,
|
||||
schemaTables,
|
||||
allTableRows,
|
||||
-- = Utilities
|
||||
emptyQuery,
|
||||
sortBy,
|
||||
@ -274,6 +276,34 @@ genresTableRelationships =
|
||||
]
|
||||
)
|
||||
|
||||
playlistsTableName :: API.TableName
|
||||
playlistsTableName = mkTableName "Playlist"
|
||||
|
||||
playlistsRows :: [HashMap API.FieldName API.FieldValue]
|
||||
playlistsRows = sortBy (API.FieldName "PlaylistId") $ readTableFromXmlIntoRows playlistsTableName
|
||||
|
||||
playlistTracksTableName :: API.TableName
|
||||
playlistTracksTableName = mkTableName "PlaylistTrack"
|
||||
|
||||
playlistTracksRows :: [HashMap API.FieldName API.FieldValue]
|
||||
playlistTracksRows = sortOn (\r -> (r ^? ix (API.FieldName "PlaylistId"), r ^? ix (API.FieldName "TrackId"))) $ readTableFromXmlIntoRows playlistTracksTableName
|
||||
|
||||
allTableRows :: HashMap API.TableName ([HashMap API.FieldName API.FieldValue])
|
||||
allTableRows =
|
||||
HashMap.fromList
|
||||
[ (artistsTableName, artistsRows),
|
||||
(albumsTableName, albumsRows),
|
||||
(customersTableName, customersRows),
|
||||
(employeesTableName, employeesRows),
|
||||
(genresTableName, genresRows),
|
||||
(invoicesTableName, invoicesRows),
|
||||
(invoiceLinesTableName, invoiceLinesRows),
|
||||
(mediaTypesTableName, mediaTypesRows),
|
||||
(playlistsTableName, playlistsRows),
|
||||
(playlistTracksTableName, playlistTracksRows),
|
||||
(tracksTableName, tracksRows)
|
||||
]
|
||||
|
||||
data TestData = TestData
|
||||
{ -- = Schema
|
||||
_tdSchemaTables :: [API.TableInfo],
|
||||
|
72
server/lib/dc-api/test/Test/DataExport.hs
Normal file
72
server/lib/dc-api/test/Test/DataExport.hs
Normal file
@ -0,0 +1,72 @@
|
||||
module Test.DataExport
|
||||
( exportData,
|
||||
)
|
||||
where
|
||||
|
||||
import Command (ExportDataConfig (..), ExportFormat (..))
|
||||
import Control.Lens ((&))
|
||||
import Data.Aeson qualified as J
|
||||
import Data.ByteString.Lazy qualified as BSL
|
||||
import Data.HashMap.Strict (HashMap)
|
||||
import Data.HashMap.Strict qualified as HashMap
|
||||
import Data.List.NonEmpty qualified as NonEmpty
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Text qualified as Text
|
||||
import Data.Time (ZonedTime, defaultTimeLocale)
|
||||
import Data.Time.Format (formatTime)
|
||||
import Data.Time.Format.ISO8601 (iso8601ParseM)
|
||||
import Hasura.Backends.DataConnector.API
|
||||
import System.Directory qualified as Directory
|
||||
import System.FilePath ((<.>), (</>))
|
||||
import Test.Data qualified as Data
|
||||
|
||||
exportData :: ExportDataConfig -> IO ()
|
||||
exportData ExportDataConfig {..} = do
|
||||
absDirectory <- Directory.makeAbsolute _edcDirectory
|
||||
Directory.createDirectoryIfMissing True absDirectory
|
||||
putStrLn $ "Exporting to " <> absDirectory
|
||||
let convertTable' = case _edcFormat of
|
||||
JSON -> convertTable ".json" convertTableToJSON
|
||||
JSONLines -> convertTable ".json" convertTableToJSONLines
|
||||
Data.schemaTables
|
||||
& fmap (\tableInfo@TableInfo {..} -> (tableInfo,) <$> fromMaybe [] $ HashMap.lookup _tiName Data.allTableRows)
|
||||
& mapM_ \(tableInfo@TableInfo {..}, rows) -> do
|
||||
let rows' = maybe rows (\formatString -> formatDateColumnsInRow formatString tableInfo <$> rows) _edcDateTimeFormat
|
||||
let (filename, contents) = convertTable' _tiName rows'
|
||||
let destFile = absDirectory </> filename
|
||||
BSL.writeFile destFile contents
|
||||
putStrLn $ "Exported " <> filename
|
||||
|
||||
convertTable :: String -> ([HashMap FieldName FieldValue] -> BSL.ByteString) -> TableName -> [HashMap FieldName FieldValue] -> (FilePath, BSL.ByteString)
|
||||
convertTable extension convertRows tableName rows =
|
||||
(filename, fileContents)
|
||||
where
|
||||
filename = (Text.unpack . NonEmpty.last $ unTableName tableName) <.> extension
|
||||
fileContents = convertRows rows
|
||||
|
||||
convertTableToJSON :: [HashMap FieldName FieldValue] -> BSL.ByteString
|
||||
convertTableToJSON rows =
|
||||
J.encode $ J.toJSON rows
|
||||
|
||||
convertTableToJSONLines :: [HashMap FieldName FieldValue] -> BSL.ByteString
|
||||
convertTableToJSONLines rows =
|
||||
BSL.intercalate "\n" $ J.encode . J.toJSON <$> rows
|
||||
|
||||
formatDateColumnsInRow :: String -> TableInfo -> HashMap FieldName FieldValue -> HashMap FieldName FieldValue
|
||||
formatDateColumnsInRow dateTimeFormatString TableInfo {..} row =
|
||||
row
|
||||
& HashMap.mapWithKey
|
||||
( \fieldName fieldValue ->
|
||||
if fieldName `elem` dateFields
|
||||
then fromMaybe fieldValue $ tryFormatDate fieldValue
|
||||
else fieldValue
|
||||
)
|
||||
where
|
||||
dateFields = fmap (\ColumnInfo {..} -> FieldName $ unColumnName _ciName) $ filter (\ColumnInfo {..} -> _ciType == dateTimeScalarType) _tiColumns
|
||||
dateTimeScalarType = CustomTy "DateTime"
|
||||
tryFormatDate fieldValue = case deserializeAsColumnFieldValue fieldValue of
|
||||
J.String value -> do
|
||||
(zonedTime :: ZonedTime) <- iso8601ParseM $ Text.unpack value
|
||||
let formattedString = formatTime defaultTimeLocale dateTimeFormatString zonedTime
|
||||
Just . mkColumnFieldValue . J.String . Text.pack $ formattedString
|
||||
_ -> Nothing
|
Loading…
Reference in New Issue
Block a user