2021-04-12 13:18:29 +03:00
|
|
|
{-# LANGUAGE DuplicateRecordFields #-}
|
2021-09-24 01:56:37 +03:00
|
|
|
{-# LANGUAGE ExtendedDefaultRules #-}
|
|
|
|
{-# OPTIONS_GHC -fno-warn-type-defaults #-}
|
2021-04-12 13:18:29 +03:00
|
|
|
|
2021-06-28 16:29:48 +03:00
|
|
|
-- | Execute a Select query against the BigQuery REST API.
|
|
|
|
module Hasura.Backends.BigQuery.Execute
|
2021-09-24 01:56:37 +03:00
|
|
|
( executeSelect,
|
|
|
|
runExecute,
|
|
|
|
streamBigQuery,
|
2022-02-09 18:26:14 +03:00
|
|
|
executeBigQuery,
|
2021-09-24 01:56:37 +03:00
|
|
|
BigQuery (..),
|
|
|
|
OutputValue (..),
|
|
|
|
RecordSet (..),
|
|
|
|
Execute,
|
|
|
|
Value (..),
|
|
|
|
FieldNameText (..),
|
|
|
|
)
|
|
|
|
where
|
|
|
|
|
|
|
|
import Control.Applicative
|
|
|
|
import Control.Concurrent
|
|
|
|
import Control.Exception.Safe
|
|
|
|
import Control.Monad.Except
|
|
|
|
import Control.Monad.Reader
|
2021-10-01 21:29:03 +03:00
|
|
|
import Data.Aeson ((.!=), (.:), (.:?), (.=))
|
2021-09-24 01:56:37 +03:00
|
|
|
import Data.Aeson qualified as Aeson
|
|
|
|
import Data.Aeson.Types qualified as Aeson
|
|
|
|
import Data.ByteString.Lazy qualified as L
|
|
|
|
import Data.Foldable
|
|
|
|
import Data.HashMap.Strict.InsOrd qualified as OMap
|
|
|
|
import Data.Maybe
|
|
|
|
import Data.Text qualified as T
|
|
|
|
import Data.Text.Lazy qualified as LT
|
|
|
|
import Data.Text.Lazy.Builder qualified as LT
|
|
|
|
import Data.Vector (Vector)
|
|
|
|
import Data.Vector qualified as V
|
|
|
|
import GHC.Generics
|
|
|
|
import Hasura.Backends.BigQuery.Connection
|
|
|
|
import Hasura.Backends.BigQuery.Source
|
|
|
|
import Hasura.Backends.BigQuery.ToQuery qualified as ToQuery
|
|
|
|
import Hasura.Backends.BigQuery.Types as BigQuery
|
|
|
|
import Hasura.Prelude hiding (head, state, tail)
|
|
|
|
import Network.HTTP.Simple
|
|
|
|
import Network.HTTP.Types
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Types
|
|
|
|
|
|
|
|
-- | A set of records produced by the database. These are joined
|
|
|
|
-- together. There are all sorts of optimizations possible here, from
|
|
|
|
-- using a matrix/flat vector, unboxed sums for Value, etc. Presently
|
|
|
|
-- we choose a naive implementation in the interest of getting other
|
|
|
|
-- work done.
|
|
|
|
data RecordSet = RecordSet
|
2021-09-24 01:56:37 +03:00
|
|
|
{ rows :: !(Vector (InsOrdHashMap FieldNameText OutputValue)),
|
|
|
|
wantedFields :: !(Maybe [Text])
|
|
|
|
}
|
|
|
|
deriving (Show)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
2021-06-28 16:29:48 +03:00
|
|
|
-- | As opposed to BigQuery.FieldName which is a qualified name, this
|
|
|
|
-- is just the unqualified text name itself.
|
2021-09-24 01:56:37 +03:00
|
|
|
newtype FieldNameText
|
|
|
|
= FieldNameText Text
|
2021-06-28 16:29:48 +03:00
|
|
|
deriving (Show, Ord, Eq, Hashable, Aeson.FromJSON, Aeson.ToJSONKey, IsString)
|
|
|
|
|
2021-04-12 13:18:29 +03:00
|
|
|
data OutputValue
|
|
|
|
= DecimalOutputValue !Decimal
|
|
|
|
| BigDecimalOutputValue !BigDecimal
|
|
|
|
| IntegerOutputValue !Int64
|
|
|
|
| FloatOutputValue !Float64
|
|
|
|
| GeographyOutputValue !Geography
|
|
|
|
| TextOutputValue !Text
|
|
|
|
| TimestampOutputValue !Timestamp
|
|
|
|
| DateOutputValue !Date
|
|
|
|
| TimeOutputValue !Time
|
|
|
|
| DatetimeOutputValue !Datetime
|
|
|
|
| BytesOutputValue !Base64
|
|
|
|
| BoolOutputValue !Bool
|
|
|
|
| ArrayOutputValue !(Vector OutputValue)
|
2021-06-28 16:29:48 +03:00
|
|
|
| RecordOutputValue !(InsOrdHashMap FieldNameText OutputValue)
|
2021-04-12 13:18:29 +03:00
|
|
|
| NullOutputValue -- TODO: Consider implications.
|
|
|
|
deriving (Show, Eq, Generic)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
2021-04-12 13:18:29 +03:00
|
|
|
instance Hashable OutputValue
|
2021-09-24 01:56:37 +03:00
|
|
|
|
2021-04-12 13:18:29 +03:00
|
|
|
instance Aeson.ToJSON OutputValue where
|
|
|
|
toJSON = \case
|
2021-09-24 01:56:37 +03:00
|
|
|
NullOutputValue -> Aeson.toJSON Aeson.Null
|
|
|
|
DecimalOutputValue !i -> Aeson.toJSON i
|
|
|
|
BigDecimalOutputValue !i -> Aeson.toJSON i
|
|
|
|
FloatOutputValue !i -> Aeson.toJSON i
|
|
|
|
TextOutputValue !i -> Aeson.toJSON i
|
|
|
|
BytesOutputValue !i -> Aeson.toJSON i
|
|
|
|
DateOutputValue !i -> Aeson.toJSON i
|
|
|
|
TimestampOutputValue !i -> Aeson.toJSON i
|
|
|
|
TimeOutputValue !i -> Aeson.toJSON i
|
|
|
|
DatetimeOutputValue !i -> Aeson.toJSON i
|
|
|
|
GeographyOutputValue !i -> Aeson.toJSON i
|
|
|
|
BoolOutputValue !i -> Aeson.toJSON i
|
|
|
|
IntegerOutputValue !i -> Aeson.toJSON i
|
|
|
|
ArrayOutputValue !vector -> Aeson.toJSON vector
|
2021-04-12 13:18:29 +03:00
|
|
|
RecordOutputValue !record -> Aeson.toJSON record
|
|
|
|
|
|
|
|
data ExecuteReader = ExecuteReader
|
2022-02-09 18:26:14 +03:00
|
|
|
{ sourceConfig :: !BigQuerySourceConfig
|
2021-04-12 13:18:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
data ExecuteProblem
|
|
|
|
= GetJobDecodeProblem String
|
|
|
|
| CreateQueryJobDecodeProblem String
|
|
|
|
| ErrorResponseFromServer Status L.ByteString
|
|
|
|
| GetJobResultsProblem SomeException
|
|
|
|
| RESTRequestNonOK Status Text
|
|
|
|
| CreateQueryJobProblem SomeException
|
|
|
|
| ExecuteRunBigQueryProblem BigQueryProblem
|
|
|
|
deriving (Show)
|
|
|
|
|
|
|
|
-- | Execute monad; as queries are performed, the record sets are
|
|
|
|
-- stored in the map.
|
|
|
|
newtype Execute a = Execute
|
|
|
|
{ unExecute :: ReaderT ExecuteReader (ExceptT ExecuteProblem IO) a
|
2021-09-24 01:56:37 +03:00
|
|
|
}
|
|
|
|
deriving
|
|
|
|
( Functor,
|
|
|
|
Applicative,
|
|
|
|
Monad,
|
|
|
|
MonadReader ExecuteReader,
|
|
|
|
MonadIO,
|
|
|
|
MonadError ExecuteProblem
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
-- | Big query parameters must be accompanied by an explicit type
|
|
|
|
-- signature.
|
|
|
|
data BigQueryType
|
|
|
|
= DECIMAL
|
|
|
|
| INTEGER
|
|
|
|
| FLOAT
|
|
|
|
| BYTES
|
|
|
|
| STRING
|
|
|
|
| BOOL
|
|
|
|
| ARRAY BigQueryType
|
|
|
|
| GEOGRAPHY
|
|
|
|
| DATE
|
|
|
|
| TIMESTAMP
|
|
|
|
| DATETIME
|
|
|
|
| TIME
|
|
|
|
| BIGDECIMAL
|
|
|
|
deriving (Show, Eq)
|
|
|
|
|
|
|
|
data BigQuery = BigQuery
|
2021-09-24 01:56:37 +03:00
|
|
|
{ query :: !LT.Text,
|
2022-02-09 18:26:14 +03:00
|
|
|
parameters :: !(InsOrdHashMap ParameterName Parameter)
|
2021-09-24 01:56:37 +03:00
|
|
|
}
|
|
|
|
deriving (Show)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
data Parameter = Parameter
|
2021-09-24 01:56:37 +03:00
|
|
|
{ typ :: !BigQueryType,
|
|
|
|
value :: !Value
|
|
|
|
}
|
|
|
|
deriving (Show)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
newtype ParameterName
|
|
|
|
= ParameterName LT.Text
|
|
|
|
deriving (Show, Aeson.ToJSON, Ord, Eq, Hashable)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
data BigQueryField = BigQueryField
|
2021-09-24 01:56:37 +03:00
|
|
|
{ name :: !FieldNameText,
|
|
|
|
typ :: !BigQueryFieldType,
|
|
|
|
mode :: !Mode
|
|
|
|
}
|
|
|
|
deriving (Show)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
data BigQueryFieldType
|
|
|
|
= FieldSTRING
|
|
|
|
| FieldBYTES
|
|
|
|
| FieldINTEGER
|
|
|
|
| FieldFLOAT
|
|
|
|
| FieldBOOL
|
|
|
|
| FieldTIMESTAMP
|
|
|
|
| FieldDATE
|
|
|
|
| FieldTIME
|
|
|
|
| FieldDATETIME
|
|
|
|
| FieldGEOGRAPHY
|
|
|
|
| FieldDECIMAL
|
|
|
|
| FieldBIGDECIMAL
|
|
|
|
| FieldSTRUCT (Vector BigQueryField)
|
|
|
|
deriving (Show)
|
|
|
|
|
|
|
|
data Mode
|
|
|
|
= Nullable
|
|
|
|
| NotNullable
|
|
|
|
| Repeated
|
|
|
|
deriving (Show)
|
|
|
|
|
|
|
|
data IsNullable
|
|
|
|
= IsNullable
|
|
|
|
| IsRequired
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Constants
|
|
|
|
|
|
|
|
-- | Delay between attempts to get job results if the job is incomplete.
|
|
|
|
streamDelaySeconds :: Int
|
|
|
|
streamDelaySeconds = 1
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Executing the planned actions forest
|
|
|
|
|
|
|
|
runExecute ::
|
2021-09-24 01:56:37 +03:00
|
|
|
MonadIO m =>
|
|
|
|
BigQuerySourceConfig ->
|
|
|
|
Execute RecordSet ->
|
|
|
|
m (Either ExecuteProblem RecordSet)
|
2022-02-09 18:26:14 +03:00
|
|
|
runExecute sourceConfig m =
|
2021-04-12 13:18:29 +03:00
|
|
|
liftIO
|
2021-09-24 01:56:37 +03:00
|
|
|
( runExceptT
|
|
|
|
( runReaderT
|
|
|
|
(unExecute (m >>= getFinalRecordSet))
|
2022-02-09 18:26:14 +03:00
|
|
|
(ExecuteReader {sourceConfig})
|
2021-09-24 01:56:37 +03:00
|
|
|
)
|
|
|
|
)
|
2021-06-28 16:29:48 +03:00
|
|
|
|
|
|
|
executeSelect :: Select -> Execute RecordSet
|
|
|
|
executeSelect select = do
|
2022-02-09 18:26:14 +03:00
|
|
|
conn <- asks (_scConnection . sourceConfig)
|
2021-06-28 16:29:48 +03:00
|
|
|
recordSet <-
|
2022-02-09 18:26:14 +03:00
|
|
|
streamBigQuery conn (selectToBigQuery select) >>= liftEither
|
2021-06-28 16:29:48 +03:00
|
|
|
pure recordSet {wantedFields = selectFinalWantedFields select}
|
|
|
|
|
|
|
|
-- | This is needed to strip out unneeded fields (join keys) in the
|
|
|
|
-- final query. This is a relic of the data loader approach. A later
|
|
|
|
-- improvement would be to update the FromIr code to explicitly
|
|
|
|
-- reselect the query. But the purpose of this commit is to drop the
|
|
|
|
-- dataloader code and not modify the from IR code which is more
|
|
|
|
-- delicate.
|
|
|
|
getFinalRecordSet :: RecordSet -> Execute RecordSet
|
|
|
|
getFinalRecordSet recordSet =
|
2021-04-12 13:18:29 +03:00
|
|
|
pure
|
2021-06-28 16:29:48 +03:00
|
|
|
recordSet
|
2021-04-12 13:18:29 +03:00
|
|
|
{ rows =
|
|
|
|
fmap
|
2021-10-12 20:58:46 +03:00
|
|
|
( OMap.filterWithKey
|
|
|
|
( \(FieldNameText k) _ ->
|
|
|
|
maybe True (elem k) (wantedFields recordSet)
|
|
|
|
)
|
2021-09-24 01:56:37 +03:00
|
|
|
)
|
2021-06-28 16:29:48 +03:00
|
|
|
(rows recordSet)
|
2021-04-12 13:18:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Make a big query from a select
|
|
|
|
|
2021-06-28 16:29:48 +03:00
|
|
|
selectToBigQuery :: Select -> BigQuery
|
2021-04-12 13:18:29 +03:00
|
|
|
selectToBigQuery select =
|
|
|
|
BigQuery
|
2021-09-24 01:56:37 +03:00
|
|
|
{ query = LT.toLazyText query,
|
|
|
|
parameters =
|
2021-04-12 13:18:29 +03:00
|
|
|
OMap.fromList
|
2021-09-24 01:56:37 +03:00
|
|
|
( map
|
|
|
|
( \(int, value) ->
|
|
|
|
( ParameterName (LT.toLazyText (ToQuery.paramName int)),
|
|
|
|
Parameter {typ = valueType value, value}
|
|
|
|
)
|
|
|
|
)
|
|
|
|
(OMap.toList params)
|
2022-02-09 18:26:14 +03:00
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
}
|
|
|
|
where
|
|
|
|
(query, params) =
|
2021-06-28 16:29:48 +03:00
|
|
|
ToQuery.renderBuilderPretty (ToQuery.fromSelect select)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Type system
|
|
|
|
|
|
|
|
-- | Make a BigQuery type for the given value.
|
|
|
|
valueType :: Value -> BigQueryType
|
|
|
|
valueType =
|
|
|
|
\case
|
|
|
|
DecimalValue {} -> DECIMAL
|
|
|
|
BigDecimalValue {} -> BIGDECIMAL
|
|
|
|
IntegerValue {} -> INTEGER
|
|
|
|
FloatValue {} -> FLOAT
|
|
|
|
GeographyValue {} -> GEOGRAPHY
|
|
|
|
StringValue {} -> STRING
|
|
|
|
BytesValue {} -> BYTES
|
|
|
|
BoolValue {} -> BOOL
|
|
|
|
DatetimeValue {} -> DATETIME
|
|
|
|
TimeValue {} -> TIME
|
|
|
|
DateValue {} -> DATE
|
|
|
|
TimestampValue {} -> TIMESTAMP
|
|
|
|
ArrayValue values ->
|
|
|
|
ARRAY
|
2021-10-12 20:58:46 +03:00
|
|
|
( maybe
|
|
|
|
STRING
|
|
|
|
-- Above: If the array is null, it doesn't matter what type
|
|
|
|
-- the element is. So we put STRING.
|
|
|
|
valueType
|
|
|
|
(values V.!? 0)
|
2021-09-24 01:56:37 +03:00
|
|
|
-- Above: We base the type from the first element. Later,
|
|
|
|
-- we could add some kind of sanity check that they are all
|
|
|
|
-- the same type.
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
NullValue -> STRING
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
|
|
-- Above: If the value is null, it doesn't matter what type
|
|
|
|
-- the element is. So we put STRING.
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- JSON serialization
|
|
|
|
|
|
|
|
-- | Make a JSON representation of the type of the given value.
|
|
|
|
valueToBigQueryJson :: Value -> Aeson.Value
|
|
|
|
valueToBigQueryJson = go
|
|
|
|
where
|
|
|
|
go =
|
|
|
|
\case
|
|
|
|
NullValue -> Aeson.Null -- TODO: I haven't tested whether BigQuery is happy with this null value.
|
|
|
|
DecimalValue i -> Aeson.object ["value" .= i]
|
|
|
|
BigDecimalValue i -> Aeson.object ["value" .= i]
|
|
|
|
IntegerValue i -> Aeson.object ["value" .= i]
|
|
|
|
FloatValue i -> Aeson.object ["value" .= i]
|
|
|
|
TimestampValue i -> Aeson.object ["value" .= i]
|
|
|
|
DateValue (Date i) -> Aeson.object ["value" .= i]
|
|
|
|
TimeValue (Time i) -> Aeson.object ["value" .= i]
|
|
|
|
DatetimeValue (Datetime i) -> Aeson.object ["value" .= i]
|
|
|
|
GeographyValue (Geography i) -> Aeson.object ["value" .= i]
|
|
|
|
StringValue i -> Aeson.object ["value" .= Aeson.String i]
|
|
|
|
BytesValue i -> Aeson.object ["value" .= i]
|
|
|
|
BoolValue i ->
|
|
|
|
Aeson.object
|
2021-09-24 01:56:37 +03:00
|
|
|
[ "value"
|
|
|
|
.= Aeson.String
|
|
|
|
( if i
|
|
|
|
then "true"
|
|
|
|
else "false"
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
]
|
|
|
|
ArrayValue vs ->
|
|
|
|
Aeson.object ["array_values" .= Aeson.Array (fmap go vs)]
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Execute a query as a job and stream the results into a record set
|
|
|
|
|
|
|
|
-- | TODO: WARNING: This function hasn't been tested on Big Data(tm),
|
|
|
|
-- and therefore I was unable to get BigQuery to produce paginated
|
|
|
|
-- results that would contain the 'pageToken' field in the JSON
|
|
|
|
-- response. Until that test has been done, we should consider this a
|
|
|
|
-- preliminary implementation.
|
|
|
|
streamBigQuery ::
|
2022-02-09 18:26:14 +03:00
|
|
|
MonadIO m => BigQueryConnection -> BigQuery -> m (Either ExecuteProblem RecordSet)
|
|
|
|
streamBigQuery conn bigquery = do
|
|
|
|
jobResult <- createQueryJob conn bigquery
|
2021-04-12 13:18:29 +03:00
|
|
|
case jobResult of
|
2021-10-01 21:29:03 +03:00
|
|
|
Right job -> loop Nothing Nothing
|
2021-09-24 01:56:37 +03:00
|
|
|
where
|
|
|
|
loop pageToken mrecordSet = do
|
2022-02-09 18:26:14 +03:00
|
|
|
results <- getJobResults conn job Fetch {pageToken}
|
2021-09-24 01:56:37 +03:00
|
|
|
case results of
|
|
|
|
Left problem -> pure (Left problem)
|
|
|
|
Right
|
|
|
|
( JobComplete
|
|
|
|
JobResults
|
|
|
|
{ pageToken = mpageToken',
|
|
|
|
recordSet = recordSet'@RecordSet {rows = rows'}
|
|
|
|
}
|
|
|
|
) -> do
|
|
|
|
let extendedRecordSet =
|
|
|
|
case mrecordSet of
|
|
|
|
Nothing -> recordSet'
|
|
|
|
Just recordSet@RecordSet {rows} ->
|
|
|
|
(recordSet {rows = rows <> rows'})
|
|
|
|
case mpageToken' of
|
|
|
|
Nothing -> pure (Right extendedRecordSet)
|
|
|
|
Just pageToken' ->
|
|
|
|
loop (pure pageToken') (pure extendedRecordSet)
|
|
|
|
Right JobIncomplete {} -> do
|
|
|
|
liftIO (threadDelay (1000 * 1000 * streamDelaySeconds))
|
|
|
|
loop pageToken mrecordSet
|
2021-04-12 13:18:29 +03:00
|
|
|
Left e -> pure (Left e)
|
|
|
|
|
2022-02-09 18:26:14 +03:00
|
|
|
-- | Execute a query without expecting any output (e.g. CREATE TABLE or INSERT)
|
|
|
|
executeBigQuery :: MonadIO m => BigQueryConnection -> BigQuery -> m (Either ExecuteProblem ())
|
|
|
|
executeBigQuery conn bigquery = do
|
|
|
|
jobResult <- createQueryJob conn bigquery
|
|
|
|
case jobResult of
|
|
|
|
Right job -> loop Nothing
|
|
|
|
where
|
|
|
|
loop mrecordSet = do
|
|
|
|
results <- getJobResults conn job Fetch {pageToken = Nothing}
|
|
|
|
case results of
|
|
|
|
Left problem -> pure (Left problem)
|
|
|
|
Right (JobComplete _) -> pure (Right ())
|
|
|
|
Right JobIncomplete {} -> do
|
|
|
|
liftIO (threadDelay (1000 * 1000 * streamDelaySeconds))
|
|
|
|
loop mrecordSet
|
|
|
|
Left e -> pure (Left e)
|
|
|
|
|
2021-04-12 13:18:29 +03:00
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Querying results from a job
|
|
|
|
|
|
|
|
data JobResults = JobResults
|
2021-09-24 01:56:37 +03:00
|
|
|
{ pageToken :: Maybe Text,
|
|
|
|
recordSet :: RecordSet
|
|
|
|
}
|
|
|
|
deriving (Show)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
instance Aeson.FromJSON JobResults where
|
|
|
|
parseJSON =
|
|
|
|
Aeson.withObject
|
|
|
|
"JobResults"
|
2021-09-24 01:56:37 +03:00
|
|
|
( \o -> do
|
|
|
|
recordSet <- parseRecordSetPayload o
|
|
|
|
pageToken <-
|
|
|
|
fmap
|
|
|
|
( \mtoken -> do
|
|
|
|
token <- mtoken
|
|
|
|
guard (not (T.null token))
|
|
|
|
pure token
|
|
|
|
)
|
|
|
|
(o .:? "pageToken")
|
|
|
|
pure JobResults {..}
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
data JobResultsResponse
|
|
|
|
= JobIncomplete
|
|
|
|
| JobComplete JobResults
|
|
|
|
deriving (Show)
|
|
|
|
|
|
|
|
instance Aeson.FromJSON JobResultsResponse where
|
|
|
|
parseJSON j =
|
|
|
|
Aeson.withObject
|
|
|
|
"JobResultsResponse"
|
2021-09-24 01:56:37 +03:00
|
|
|
( \o -> do
|
|
|
|
kind <- o .: "kind"
|
|
|
|
if kind == ("bigquery#getQueryResultsResponse" :: Text)
|
|
|
|
then do
|
|
|
|
complete <- o .: "jobComplete"
|
|
|
|
if complete
|
|
|
|
then fmap JobComplete (Aeson.parseJSON j)
|
|
|
|
else pure JobIncomplete
|
|
|
|
else fail ("Invalid kind: " <> show kind)
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
j
|
|
|
|
|
|
|
|
data Fetch = Fetch
|
|
|
|
{ pageToken :: Maybe Text
|
2021-09-24 01:56:37 +03:00
|
|
|
}
|
|
|
|
deriving (Show)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
-- | Get results of a job.
|
|
|
|
getJobResults ::
|
2021-09-24 01:56:37 +03:00
|
|
|
MonadIO m =>
|
2022-02-09 18:26:14 +03:00
|
|
|
BigQueryConnection ->
|
2021-09-24 01:56:37 +03:00
|
|
|
Job ->
|
|
|
|
Fetch ->
|
|
|
|
m (Either ExecuteProblem JobResultsResponse)
|
2022-02-09 18:26:14 +03:00
|
|
|
getJobResults conn Job {jobId, location} Fetch {pageToken} =
|
2021-04-12 13:18:29 +03:00
|
|
|
liftIO (catchAny run (pure . Left . GetJobResultsProblem))
|
|
|
|
where
|
2021-07-22 17:59:23 +03:00
|
|
|
-- https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get#query-parameters
|
2021-04-12 13:18:29 +03:00
|
|
|
url =
|
2021-09-24 01:56:37 +03:00
|
|
|
"GET https://bigquery.googleapis.com/bigquery/v2/projects/"
|
2022-02-09 18:26:14 +03:00
|
|
|
<> T.unpack (_bqProjectId conn)
|
2021-09-24 01:56:37 +03:00
|
|
|
<> "/queries/"
|
|
|
|
<> T.unpack jobId
|
2022-01-04 13:56:19 +03:00
|
|
|
<> "?alt=json&prettyPrint=false"
|
2021-09-24 01:56:37 +03:00
|
|
|
<> "&location="
|
|
|
|
<> T.unpack location
|
|
|
|
<> "&"
|
|
|
|
<> T.unpack (encodeParams extraParameters)
|
2021-04-12 13:18:29 +03:00
|
|
|
run = do
|
2021-09-24 01:56:37 +03:00
|
|
|
let req =
|
|
|
|
setRequestHeader "Content-Type" ["application/json"] $
|
|
|
|
parseRequest_ url
|
2022-02-09 18:26:14 +03:00
|
|
|
eResp <- runBigQuery conn req
|
2021-04-12 13:18:29 +03:00
|
|
|
case eResp of
|
|
|
|
Left e -> pure (Left (ExecuteRunBigQueryProblem e))
|
|
|
|
Right resp ->
|
|
|
|
case getResponseStatusCode resp of
|
|
|
|
200 -> case Aeson.eitherDecode (getResponseBody resp) of
|
2021-09-24 01:56:37 +03:00
|
|
|
Left e -> pure (Left (GetJobDecodeProblem e))
|
|
|
|
Right results -> pure (Right results)
|
|
|
|
_ -> do
|
|
|
|
pure $ Left $ RESTRequestNonOK (getResponseStatus resp) $ lbsToTxt $ getResponseBody resp
|
2021-04-12 13:18:29 +03:00
|
|
|
extraParameters = pageTokenParam
|
|
|
|
where
|
|
|
|
pageTokenParam =
|
|
|
|
case pageToken of
|
2021-09-24 01:56:37 +03:00
|
|
|
Nothing -> []
|
2021-04-12 13:18:29 +03:00
|
|
|
Just token -> [("pageToken", token)]
|
|
|
|
encodeParams = T.intercalate "&" . map (\(k, v) -> k <> "=" <> v)
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Creating jobs
|
|
|
|
|
|
|
|
data Job = Job
|
2021-09-24 01:56:37 +03:00
|
|
|
{ state :: !Text,
|
|
|
|
jobId :: !Text,
|
|
|
|
location :: !Text
|
|
|
|
}
|
|
|
|
deriving (Show)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
instance Aeson.FromJSON Job where
|
|
|
|
parseJSON =
|
|
|
|
Aeson.withObject
|
|
|
|
"Job"
|
2021-09-24 01:56:37 +03:00
|
|
|
( \o -> do
|
|
|
|
kind <- o .: "kind"
|
|
|
|
if kind == ("bigquery#job" :: Text)
|
|
|
|
then do
|
|
|
|
state <- do
|
|
|
|
status <- o .: "status"
|
|
|
|
status .: "state"
|
|
|
|
(jobId, location) <- do
|
|
|
|
ref <- o .: "jobReference"
|
|
|
|
-- 'location' is needed in addition to 'jobId' to query a job's
|
|
|
|
-- status
|
|
|
|
(,) <$> ref .: "jobId" <*> ref .: "location"
|
|
|
|
pure Job {state, jobId, location}
|
|
|
|
else fail ("Invalid kind: " <> show kind)
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
-- | Create a job asynchronously.
|
2022-02-09 18:26:14 +03:00
|
|
|
createQueryJob :: MonadIO m => BigQueryConnection -> BigQuery -> m (Either ExecuteProblem Job)
|
|
|
|
createQueryJob conn BigQuery {..} =
|
2021-09-24 01:56:37 +03:00
|
|
|
liftIO
|
|
|
|
( do
|
|
|
|
-- putStrLn (LT.unpack query)
|
|
|
|
catchAny run (pure . Left . CreateQueryJobProblem)
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
where
|
|
|
|
run = do
|
2021-09-24 01:56:37 +03:00
|
|
|
let url =
|
|
|
|
"POST https://content-bigquery.googleapis.com/bigquery/v2/projects/"
|
2022-02-09 18:26:14 +03:00
|
|
|
<> T.unpack (_bqProjectId conn)
|
2022-01-04 13:56:19 +03:00
|
|
|
<> "/jobs?alt=json&prettyPrint=false"
|
2021-09-24 01:56:37 +03:00
|
|
|
let req =
|
|
|
|
setRequestHeader "Content-Type" ["application/json"] $
|
|
|
|
setRequestBodyLBS body $
|
|
|
|
parseRequest_ url
|
2022-02-09 18:26:14 +03:00
|
|
|
eResp <- runBigQuery conn req
|
2021-04-12 13:18:29 +03:00
|
|
|
case eResp of
|
|
|
|
Left e -> pure (Left (ExecuteRunBigQueryProblem e))
|
|
|
|
Right resp ->
|
|
|
|
case getResponseStatusCode resp of
|
|
|
|
200 ->
|
|
|
|
case Aeson.eitherDecode (getResponseBody resp) of
|
2021-09-24 01:56:37 +03:00
|
|
|
Left e -> pure (Left (CreateQueryJobDecodeProblem e))
|
2021-04-12 13:18:29 +03:00
|
|
|
Right job -> pure (Right job)
|
|
|
|
_ -> do
|
2021-09-24 01:56:37 +03:00
|
|
|
pure $ Left $ RESTRequestNonOK (getResponseStatus resp) $ lbsToTxt $ getResponseBody resp
|
2021-04-12 13:18:29 +03:00
|
|
|
body =
|
|
|
|
Aeson.encode
|
2021-09-24 01:56:37 +03:00
|
|
|
( Aeson.object
|
|
|
|
[ "configuration"
|
|
|
|
.= Aeson.object
|
|
|
|
[ "jobType" .= "QUERY",
|
|
|
|
"query"
|
|
|
|
.= Aeson.object
|
|
|
|
[ "query" .= query,
|
|
|
|
"useLegacySql" .= False, -- Important, it makes `quotes` work properly.
|
|
|
|
"parameterMode" .= "NAMED",
|
|
|
|
"queryParameters"
|
|
|
|
.= map
|
|
|
|
( \(name, Parameter {..}) ->
|
|
|
|
Aeson.object
|
|
|
|
[ "name" .= Aeson.toJSON name,
|
|
|
|
"parameterType" .= Aeson.toJSON typ,
|
|
|
|
"parameterValue" .= valueToBigQueryJson value
|
|
|
|
]
|
|
|
|
)
|
|
|
|
(OMap.toList parameters)
|
|
|
|
]
|
|
|
|
]
|
|
|
|
]
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Consuming recordset from big query
|
|
|
|
|
2021-06-28 16:29:48 +03:00
|
|
|
parseRecordSetPayload :: Aeson.Object -> Aeson.Parser RecordSet
|
|
|
|
parseRecordSetPayload resp = do
|
2021-10-01 21:29:03 +03:00
|
|
|
mSchema <- resp .:? "schema"
|
2021-10-12 20:58:46 +03:00
|
|
|
columns <- maybe (pure V.empty) (.: "fields") mSchema :: Aeson.Parser (Vector BigQueryField)
|
|
|
|
rowsJSON <- fmap (fromMaybe V.empty) (resp .:? "rows" :: Aeson.Parser (Maybe (Vector Aeson.Value)))
|
2021-04-12 13:18:29 +03:00
|
|
|
rows <-
|
|
|
|
V.imapM
|
|
|
|
(\i row -> parseRow columns row Aeson.<?> Aeson.Index i)
|
2021-09-24 01:56:37 +03:00
|
|
|
rowsJSON
|
|
|
|
Aeson.<?> Aeson.Key "rows"
|
2021-06-28 16:29:48 +03:00
|
|
|
pure RecordSet {wantedFields = Nothing, rows}
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Schema-driven JSON deserialization
|
|
|
|
|
2021-06-28 16:29:48 +03:00
|
|
|
parseRow :: Vector BigQueryField -> Aeson.Value -> Aeson.Parser (InsOrdHashMap FieldNameText OutputValue)
|
2021-04-12 13:18:29 +03:00
|
|
|
parseRow columnTypes value = do
|
|
|
|
result <- parseBigQueryRow columnTypes value
|
|
|
|
case result of
|
|
|
|
RecordOutputValue row -> pure row
|
2021-09-24 01:56:37 +03:00
|
|
|
_ -> fail ("Expected a record when parsing a top-level row: " ++ show value)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
-- | Parse a row, which at the top-level of the "rows" output has no
|
|
|
|
-- {"v":..} wrapper. But when appearing nestedly, does have the
|
|
|
|
-- wrapper. See 'parseBigQueryValue'.
|
|
|
|
parseBigQueryRow :: Vector BigQueryField -> Aeson.Value -> Aeson.Parser OutputValue
|
|
|
|
parseBigQueryRow columnTypes =
|
|
|
|
Aeson.withObject
|
|
|
|
"RECORD"
|
2021-09-24 01:56:37 +03:00
|
|
|
( \o -> do
|
|
|
|
fields <- o .: "f" Aeson.<?> Aeson.Key "RECORD"
|
|
|
|
values <-
|
|
|
|
sequence
|
|
|
|
( V.izipWith
|
|
|
|
( \i typ field ->
|
|
|
|
parseBigQueryField typ field Aeson.<?> Aeson.Index i
|
|
|
|
)
|
|
|
|
columnTypes
|
|
|
|
fields
|
|
|
|
)
|
|
|
|
Aeson.<?> Aeson.Key "f"
|
|
|
|
pure (RecordOutputValue (OMap.fromList (V.toList values)))
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
parseBigQueryValue :: IsNullable -> BigQueryFieldType -> Aeson.Value -> Aeson.Parser OutputValue
|
|
|
|
parseBigQueryValue isNullable fieldType object =
|
|
|
|
case fieldType of
|
|
|
|
FieldSTRUCT types ->
|
|
|
|
has_v isNullable (parseBigQueryRow types) object Aeson.<?> Aeson.Key "RECORD"
|
|
|
|
FieldDECIMAL ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap DecimalOutputValue . Aeson.parseJSON) object
|
|
|
|
Aeson.<?> Aeson.Key "DECIMAL"
|
2021-04-12 13:18:29 +03:00
|
|
|
FieldBIGDECIMAL ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap BigDecimalOutputValue . Aeson.parseJSON) object
|
|
|
|
Aeson.<?> Aeson.Key "BIGDECIMAL"
|
2021-04-12 13:18:29 +03:00
|
|
|
FieldINTEGER ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap IntegerOutputValue . Aeson.parseJSON) object
|
|
|
|
Aeson.<?> Aeson.Key "INTEGER"
|
2021-04-12 13:18:29 +03:00
|
|
|
FieldDATE ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap DateOutputValue . Aeson.parseJSON) object
|
|
|
|
Aeson.<?> Aeson.Key "DATE"
|
2021-04-12 13:18:29 +03:00
|
|
|
FieldTIME ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap TimeOutputValue . Aeson.parseJSON) object
|
|
|
|
Aeson.<?> Aeson.Key "TIME"
|
2021-04-12 13:18:29 +03:00
|
|
|
FieldDATETIME ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap DatetimeOutputValue . Aeson.parseJSON) object
|
|
|
|
Aeson.<?> Aeson.Key "DATETIME"
|
2021-04-12 13:18:29 +03:00
|
|
|
FieldTIMESTAMP ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap (TimestampOutputValue . Timestamp . utctimeToISO8601Text) . Aeson.withText "FieldTIMESTAMP" textToUTCTime) object
|
|
|
|
Aeson.<?> Aeson.Key "TIMESTAMP"
|
2021-04-12 13:18:29 +03:00
|
|
|
FieldGEOGRAPHY ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap GeographyOutputValue . Aeson.parseJSON) object
|
|
|
|
Aeson.<?> Aeson.Key "GEOGRAPHY"
|
2021-04-12 13:18:29 +03:00
|
|
|
FieldFLOAT ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap FloatOutputValue . Aeson.parseJSON) object
|
|
|
|
Aeson.<?> Aeson.Key "FLOAT"
|
2021-04-12 13:18:29 +03:00
|
|
|
FieldBOOL ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap (BoolOutputValue . (== "true")) . Aeson.parseJSON) object
|
|
|
|
Aeson.<?> Aeson.Key "BOOL"
|
2021-04-12 13:18:29 +03:00
|
|
|
FieldSTRING ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap TextOutputValue . Aeson.parseJSON) object
|
|
|
|
Aeson.<?> Aeson.Key "STRING"
|
2021-04-12 13:18:29 +03:00
|
|
|
FieldBYTES ->
|
2021-09-24 01:56:37 +03:00
|
|
|
has_v isNullable (fmap BytesOutputValue . Aeson.parseJSON) object
|
|
|
|
Aeson.<?> Aeson.Key "BYTES"
|
2021-04-12 13:18:29 +03:00
|
|
|
|
2021-06-28 16:29:48 +03:00
|
|
|
parseBigQueryField :: BigQueryField -> Aeson.Value -> Aeson.Parser (FieldNameText, OutputValue)
|
2021-04-12 13:18:29 +03:00
|
|
|
parseBigQueryField BigQueryField {name, typ, mode} value1 =
|
|
|
|
case mode of
|
|
|
|
Repeated ->
|
2021-09-24 01:56:37 +03:00
|
|
|
( do
|
|
|
|
values <- has_v_generic Aeson.parseJSON value1
|
2021-04-12 13:18:29 +03:00
|
|
|
outputs <-
|
|
|
|
V.imapM
|
2021-09-24 01:56:37 +03:00
|
|
|
( \i value2 ->
|
|
|
|
parseBigQueryValue IsRequired typ value2
|
|
|
|
Aeson.<?> Aeson.Index i
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
values
|
2021-09-24 01:56:37 +03:00
|
|
|
pure (name, ArrayOutputValue outputs)
|
|
|
|
)
|
|
|
|
Aeson.<?> Aeson.Key "REPEATED"
|
2021-04-12 13:18:29 +03:00
|
|
|
Nullable -> do
|
|
|
|
output <-
|
|
|
|
parseBigQueryValue IsNullable typ value1 Aeson.<?> Aeson.Key "NULLABLE"
|
|
|
|
pure (name, output)
|
|
|
|
NotNullable -> do
|
|
|
|
output <-
|
|
|
|
parseBigQueryValue IsRequired typ value1 Aeson.<?> Aeson.Key "REQUIRED"
|
|
|
|
pure (name, output)
|
|
|
|
|
|
|
|
-- Every value, after the top-level row, is wrapped in this.
|
|
|
|
has_v ::
|
2021-09-24 01:56:37 +03:00
|
|
|
IsNullable ->
|
|
|
|
(Aeson.Value -> Aeson.Parser OutputValue) ->
|
|
|
|
Aeson.Value ->
|
|
|
|
Aeson.Parser OutputValue
|
2021-04-12 13:18:29 +03:00
|
|
|
has_v isNullable f =
|
|
|
|
Aeson.withObject
|
|
|
|
"HAS_V"
|
2021-09-24 01:56:37 +03:00
|
|
|
( \o ->
|
|
|
|
o .: "v" >>= \v ->
|
|
|
|
case v of
|
|
|
|
Aeson.Null
|
|
|
|
| IsNullable <- isNullable -> pure NullOutputValue
|
|
|
|
_ -> f v Aeson.<?> Aeson.Key "v"
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
-- Every value, after the top-level row, is wrapped in this.
|
|
|
|
has_v_generic ::
|
2021-09-24 01:56:37 +03:00
|
|
|
(Aeson.Value -> Aeson.Parser a) ->
|
|
|
|
Aeson.Value ->
|
|
|
|
Aeson.Parser a
|
2021-04-12 13:18:29 +03:00
|
|
|
has_v_generic f =
|
|
|
|
Aeson.withObject
|
|
|
|
"HAS_V"
|
|
|
|
(\o -> o .: "v" >>= \v -> (f v Aeson.<?> Aeson.Key "v"))
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Generic JSON deserialization
|
|
|
|
|
|
|
|
instance Aeson.ToJSON BigQueryType where
|
|
|
|
toJSON =
|
|
|
|
\case
|
2021-09-24 01:56:37 +03:00
|
|
|
ARRAY t -> Aeson.object ["type" .= ("ARRAY" :: Text), "arrayType" .= t]
|
|
|
|
DECIMAL -> atomic "NUMERIC"
|
2021-04-12 13:18:29 +03:00
|
|
|
BIGDECIMAL -> atomic "BIGNUMERIC"
|
2021-09-24 01:56:37 +03:00
|
|
|
INTEGER -> atomic "INTEGER"
|
|
|
|
DATE -> atomic "DATE"
|
|
|
|
TIME -> atomic "TIME"
|
|
|
|
DATETIME -> atomic "DATETIME"
|
|
|
|
TIMESTAMP -> atomic "TIMESTAMP"
|
|
|
|
FLOAT -> atomic "FLOAT"
|
|
|
|
GEOGRAPHY -> atomic "GEOGRAPHY"
|
|
|
|
STRING -> atomic "STRING"
|
|
|
|
BYTES -> atomic "BYTES"
|
|
|
|
BOOL -> atomic "BOOL"
|
2021-04-12 13:18:29 +03:00
|
|
|
where
|
|
|
|
atomic ty = Aeson.object ["type" .= (ty :: Text)]
|
|
|
|
|
|
|
|
instance Aeson.FromJSON BigQueryField where
|
|
|
|
parseJSON =
|
|
|
|
Aeson.withObject
|
|
|
|
"BigQueryField"
|
2021-09-24 01:56:37 +03:00
|
|
|
( \o -> do
|
|
|
|
name <- o .: "name"
|
|
|
|
typ <-
|
|
|
|
do
|
|
|
|
flag :: Text <- o .: "type"
|
|
|
|
if
|
|
|
|
| flag == "NUMERIC" || flag == "DECIMAL" -> pure FieldDECIMAL
|
|
|
|
| flag == "BIGNUMERIC" || flag == "BIGDECIMAL" ->
|
|
|
|
pure FieldBIGDECIMAL
|
|
|
|
| flag == "INT64" || flag == "INTEGER" -> pure FieldINTEGER
|
|
|
|
| flag == "FLOAT64" || flag == "FLOAT" -> pure FieldFLOAT
|
|
|
|
| flag == "BOOLEAN" || flag == "BOOL" -> pure FieldBOOL
|
|
|
|
| flag == "STRING" -> pure FieldSTRING
|
|
|
|
| flag == "DATE" -> pure FieldDATE
|
|
|
|
| flag == "TIME" -> pure FieldTIME
|
|
|
|
| flag == "DATETIME" -> pure FieldDATETIME
|
|
|
|
| flag == "TIMESTAMP" -> pure FieldTIMESTAMP
|
|
|
|
| flag == "GEOGRAPHY" -> pure FieldGEOGRAPHY
|
|
|
|
| flag == "BYTES" -> pure FieldBYTES
|
|
|
|
| flag == "RECORD" || flag == "STRUCT" ->
|
|
|
|
do
|
|
|
|
fields <- o .: "fields"
|
2021-04-12 13:18:29 +03:00
|
|
|
pure (FieldSTRUCT fields)
|
2021-09-24 01:56:37 +03:00
|
|
|
| otherwise -> fail ("Unsupported field type: " ++ show flag)
|
2021-10-01 21:29:03 +03:00
|
|
|
mode <- o .:? "mode" .!= Nullable
|
2021-09-24 01:56:37 +03:00
|
|
|
pure BigQueryField {..}
|
|
|
|
)
|
2021-04-12 13:18:29 +03:00
|
|
|
|
|
|
|
instance Aeson.FromJSON Mode where
|
|
|
|
parseJSON j = do
|
|
|
|
s <- Aeson.parseJSON j
|
|
|
|
case s :: Text of
|
|
|
|
"NULLABLE" -> pure Nullable
|
|
|
|
"REPEATED" -> pure Repeated
|
2021-09-24 01:56:37 +03:00
|
|
|
_ -> pure NotNullable
|