graphql-engine/server/src-lib/Hasura/Backends/BigQuery/ToQuery.hs

574 lines
19 KiB
Haskell
Raw Normal View History

{-# OPTIONS_GHC -fno-warn-orphans #-}
-- | Convert the simple BigQuery AST to an SQL query, ready to be passed
-- to the odbc package's query/exec functions.
module Hasura.Backends.BigQuery.ToQuery
( fromSelect
, fromReselect
, fromExpression
, toBuilderFlat
, toBuilderPretty
, toTextFlat
, toTextPretty
, Printer(..)
, renderBuilderFlat
, renderBuilderPretty
, paramName
) where
import Hasura.Prelude hiding (second)
import qualified Data.HashMap.Strict.InsOrd as OMap
import qualified Data.List.NonEmpty as NE
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import qualified Data.Text.Lazy.Builder as LT
import qualified Data.Vector as V
import Data.Aeson (ToJSON (..))
import Data.Bifunctor
import Data.Containers.ListUtils
import Data.List (intersperse)
import Data.String
import Data.Text.Lazy.Builder (Builder)
import Data.Tuple
import Hasura.Backends.BigQuery.Types
--------------------------------------------------------------------------------
-- Types
data Printer
= SeqPrinter [Printer]
| SepByPrinter Printer [Printer]
| NewlinePrinter
| UnsafeTextPrinter Text
| IndentPrinter Int Printer
| ValuePrinter Value
deriving (Show, Eq)
instance IsString Printer where
fromString = UnsafeTextPrinter . fromString
(<+>) :: Printer -> Printer -> Printer
(<+>) x y = SeqPrinter [x,y]
--------------------------------------------------------------------------------
-- Instances
-- This is a debug instance, only here because it avoids a circular
-- dependency between this module and Types.hs.
instance ToJSON Expression where
toJSON = toJSON . toTextPretty . fromExpression
--------------------------------------------------------------------------------
-- Printer generators
fromExpression :: Expression -> Printer
fromExpression =
\case
CastExpression e scalarType ->
"CAST(" <+> fromExpression e <+> " AS " <+> fromScalarType scalarType <+> ")"
InExpression e value ->
"(" <+> fromExpression e <+> ") IN UNNEST(" <+> fromValue value <+> ")"
JsonQueryExpression e -> "JSON_QUERY(" <+> fromExpression e <+> ")"
JsonValueExpression e path ->
"JSON_VALUE(" <+> fromExpression e <+> fromPath path <+> ")"
ValueExpression value -> fromValue value
AndExpression xs ->
SepByPrinter
(NewlinePrinter <+> "AND ")
(toList
(fmap
(\x -> "(" <+> fromExpression x <+> ")")
(fromMaybe (pure trueExpression) (NE.nonEmpty xs))))
OrExpression xs ->
SepByPrinter
(NewlinePrinter <+> " OR ")
(toList
(fmap
(\x -> "(" <+> fromExpression x <+> ")")
(fromMaybe (pure falseExpression) (NE.nonEmpty xs))))
NotExpression expression -> "NOT " <+> (fromExpression expression)
ExistsExpression select ->
"EXISTS (" <+> IndentPrinter 9 (fromSelect select) <+> ")"
IsNullExpression expression ->
"(" <+> fromExpression expression <+> ") IS NULL"
IsNotNullExpression expression ->
"(" <+> fromExpression expression <+> ") IS NOT NULL"
ColumnExpression fieldName -> fromFieldName fieldName
EqualExpression x y ->
"(" <+> fromExpression x <+> ") = (" <+> fromExpression y <+> ")"
NotEqualExpression x y ->
"(" <+> fromExpression x <+> ") != (" <+> fromExpression y <+> ")"
ToStringExpression e -> "CONCAT(" <+> fromExpression e <+> ", '')"
SelectExpression s -> "(" <+> IndentPrinter 1 (fromSelect s) <+> ")"
OpExpression op x y ->
"(" <+>
fromExpression x <+>
") " <+> fromOp op <+> " (" <+> fromExpression y <+> ")"
ConditionalProjection expression fieldName ->
"(CASE WHEN(" <+> fromExpression expression <+>
") THEN " <+> fromFieldName fieldName <+>
" ELSE NULL END)"
fromScalarType :: ScalarType -> Printer
fromScalarType =
\case
StringScalarType -> "STRING"
BytesScalarType -> "BYTES"
IntegerScalarType -> "INT64"
FloatScalarType -> "FLOAT64"
BoolScalarType -> "BOOL"
TimestampScalarType -> "TIMESTAMP"
DateScalarType -> "DATE"
TimeScalarType -> "TIME"
DatetimeScalarType -> "DATETIME"
GeographyScalarType -> "GEOGRAPHY"
StructScalarType -> "STRUCT"
DecimalScalarType -> "DECIMAL"
BigDecimalScalarType -> "BIGDECIMAL"
fromOp :: Op -> Printer
fromOp =
\case
LessOp -> "<"
MoreOp -> ">"
MoreOrEqualOp -> ">="
LessOrEqualOp -> "<="
fromPath :: JsonPath -> Printer
fromPath path =
", " <+> string path
where
string = fromExpression .
ValueExpression . StringValue . LT.toStrict . LT.toLazyText . go
go =
\case
RootPath -> "$"
IndexPath r i -> go r <> "[" <> LT.fromString (show i) <> "]"
FieldPath r f -> go r <> "." <> LT.fromText f
fromFieldName :: FieldName -> Printer
fromFieldName (FieldName {..}) =
fromNameText fieldNameEntity <+> "." <+> fromNameText fieldName
fromSelect :: Select -> Printer
fromSelect Select {..} = finalExpression
where
finalExpression = inner
projections =
SepByPrinter
("," <+> NewlinePrinter)
(map fromProjection (toList (cleanProjections selectProjections)))
inner =
SepByPrinter
NewlinePrinter
[ "SELECT " <+> IndentPrinter 7 projections
, "FROM " <+> IndentPrinter 5 (fromFrom selectFrom)
, SepByPrinter
NewlinePrinter
(map
(\Join {..} ->
SeqPrinter
[ "LEFT OUTER JOIN " <+>
IndentPrinter 16 (fromJoinSource joinSource)
, NewlinePrinter
, "AS " <+> fromJoinAlias joinAlias
, NewlinePrinter
, "ON (" <+>
IndentPrinter
4
(SepByPrinter
(", " <+> NewlinePrinter)
(map fromOn joinOn)) <+>
")"
])
selectJoins)
, fromWhere selectWhere
, fromOrderBys selectTop selectOffset selectOrderBy
, case selectGroupBy of
[] -> ""
fieldNames ->
"GROUP BY " <+> SepByPrinter ", " (map fromFieldName fieldNames)
]
fromOn :: (FieldName, FieldName) -> Printer
fromOn (x,y) = fromFieldName x <+> " = " <+> fromFieldName y
fromJoinSource :: JoinSource -> Printer
fromJoinSource =
\case
JoinSelect select -> "(" <+> IndentPrinter 1 (fromSelect select) <+> ")"
-- We're not using existingJoins at the moment, which was used to
-- avoid re-joining on the same table twice.
-- JoinReselect reselect -> "(" <+> fromReselect reselect <+> ")"
fromReselect :: Reselect -> Printer
fromReselect Reselect {..} =
SepByPrinter
NewlinePrinter
[ "SELECT " <+>
IndentPrinter 7 projections
, fromWhere reselectWhere
]
where
projections =
SepByPrinter
("," <+> NewlinePrinter)
(map fromProjection (toList (cleanProjections reselectProjections)))
fromOrderBys ::
Top -> Maybe Expression -> Maybe (NonEmpty OrderBy) -> Printer
fromOrderBys NoTop Nothing Nothing = "" -- An ORDER BY is wasteful if not needed.
fromOrderBys top moffset morderBys =
SepByPrinter
NewlinePrinter
[ case morderBys of
Nothing -> ""
Just orderBys ->
SeqPrinter
[ "ORDER BY "
, SepByPrinter
("," <+> NewlinePrinter)
(map fromOrderBy (toList orderBys))
]
, case (top, moffset) of
(NoTop, Nothing) -> ""
(NoTop, Just offset) ->
"LIMIT 9223372036854775807 /* Maximum */"
-- Above: OFFSET is not supported without a LIMIT, therefore
-- we set LIMIT to the maximum integer value. Such a large
-- number of rows (9 quintillion) would not be possible to
-- service: 9223 petabytes. No machine has such capacity at
-- present.
<+>
" OFFSET " <+> fromExpression offset
(Top n, Nothing) -> "LIMIT " <+> fromValue (IntegerValue (intToInt64 n))
(Top n, Just offset) ->
"LIMIT " <+>
fromValue (IntegerValue (intToInt64 n)) <+>
" OFFSET " <+> fromExpression offset
]
fromOrderBy :: OrderBy -> Printer
fromOrderBy OrderBy {..} =
"(" <+>
fromFieldName orderByFieldName <+>
") " <+>
fromOrder orderByOrder <+>
fromNullsOrder orderByNullsOrder
fromOrder :: Order -> Printer
fromOrder =
\case
AscOrder -> "ASC"
DescOrder -> "DESC"
fromNullsOrder :: NullsOrder -> Printer
fromNullsOrder =
\case
NullsAnyOrder -> ""
NullsFirst -> " NULLS FIRST"
NullsLast -> " NULLS LAST"
fromJoinAlias :: EntityAlias -> Printer
fromJoinAlias EntityAlias {entityAliasText} =
fromNameText entityAliasText
fromProjection :: Projection -> Printer
fromProjection =
\case
WindowProjection aliasedWindowFunction ->
fromAliased (fmap fromWindowFunction aliasedWindowFunction)
ExpressionProjection aliasedExpression ->
fromAliased (fmap fromExpression aliasedExpression)
FieldNameProjection aliasedFieldName ->
fromAliased (fmap fromFieldName aliasedFieldName)
AggregateProjection aliasedAggregate ->
fromAliased (fmap fromAggregate aliasedAggregate)
AggregateProjections aliasedAggregates ->
fromAliased
(fmap
(\aggs ->
"STRUCT(" <+>
IndentPrinter
7
(SepByPrinter
", "
(fmap (fromAliased . fmap fromAggregate) (toList aggs))) <+>
")")
aliasedAggregates)
StarProjection -> "*"
ArrayAggProjection aliasedAgg -> fromAliased (fmap fromArrayAgg aliasedAgg)
EntityProjection aliasedEntity ->
fromAliased
(fmap
(\(fields :: [(FieldName, FieldOrigin)]) ->
-- Example:
-- STRUCT(
-- IFNULL(
-- `aa_articles1`.`aggregate`,
-- STRUCT(0 as count, struct(null as id) as sum)
-- ) as aggregate
-- ) AS `articles_aggregate`
--
-- The (AS `articles_aggregate`) part at the end is rendered by 'fromAliased' evaluating
-- at the root of this branch, and not by anything below
"STRUCT(" <+>
(SepByPrinter ", "
(fields <&>
\(fName@FieldName{..}, fieldOrigin :: FieldOrigin) ->
"IFNULL(" <+> fromFieldName fName <+> ", " <+> fromFieldOrigin fieldOrigin <+>
") AS " <+> fromNameText fieldName
)
) <+>
")")
aliasedEntity)
ArrayEntityProjection entityAlias aliasedEntity ->
fromAliased
(fmap
(\aggs ->
"ARRAY(SELECT AS STRUCT " <+>
IndentPrinter
7
(SepByPrinter ", " (fmap fromFieldNameNaked (toList aggs))) <+>
" FROM " <+> fromJoinAlias entityAlias <+> ".agg)")
aliasedEntity)
where fromFieldNameNaked :: FieldName -> Printer
fromFieldNameNaked (FieldName {..}) =
fromNameText fieldName
fromFieldOrigin :: FieldOrigin -> Printer
fromFieldOrigin = \case
NoOrigin -> "NULL"
AggregateOrigin aliasedAggregates ->
"STRUCT(" <+>
-- Example: "0 AS count, STRUCT(NULL AS id) AS sum"
SepByPrinter ", " (fromAliased . fmap fromNullAggregate <$> aliasedAggregates) <+>
")"
fromWindowFunction :: WindowFunction -> Printer
fromWindowFunction (RowNumberOverPartitionBy fieldNames morderBys) =
"ROW_NUMBER() OVER(PARTITION BY " <+>
SepByPrinter ", " (fmap fromFieldName (toList fieldNames)) <+>
(case morderBys of
Just {} -> " " <+> fromOrderBys NoTop Nothing morderBys
Nothing -> "") <+>
")"
fromArrayAgg :: ArrayAgg -> Printer
fromArrayAgg ArrayAgg {..} =
SeqPrinter
[ "ARRAY_AGG("
, IndentPrinter 10 $
SepByPrinter
" "
[ "STRUCT(" <+> IndentPrinter 7 projections <+> ")"
, fromOrderBys
arrayAggTop
Nothing
(fmap
(fmap
(\orderBy ->
orderBy
{ orderByNullsOrder = NullsAnyOrder
-- Because BigQuery reports:
-- > NULLS FIRST not supported with descending sort order in aggregate functions
-- And the same error with 'ascending'.
}))
arrayAggOrderBy)
]
, ")"
]
where
projections =
SepByPrinter
("," <+> NewlinePrinter)
(map fromProjection (toList (cleanProjections arrayAggProjections)))
fromNullAggregate :: Aggregate -> Printer
fromNullAggregate = \case
CountAggregate _ -> "0"
OpAggregate _text _exp -> "NULL"
OpAggregates _text exps ->
"STRUCT(" <+> SepByPrinter ", " (toList exps <&> \(alias, _exp) -> "NULL AS " <+> fromNameText alias) <+> ")"
TextAggregate _text -> "NULL"
fromAggregate :: Aggregate -> Printer
fromAggregate =
\case
CountAggregate countable -> "COUNT(" <+> fromCountable countable <+> ")"
OpAggregate text arg ->
UnsafeTextPrinter text <+> "(" <+> fromExpression arg <+> ")"
OpAggregates text args ->
"STRUCT(" <+>
IndentPrinter
7
(SepByPrinter
", "
(map
(\(alias, arg) ->
UnsafeTextPrinter text <+>
"(" <+> fromExpression arg <+> ") AS " <+> fromNameText alias)
(toList args))) <+>
")"
TextAggregate text -> fromExpression (ValueExpression (StringValue text))
fromCountable :: Countable FieldName -> Printer
fromCountable =
\case
StarCountable -> "*"
NonNullFieldCountable fields ->
SepByPrinter ", " (map fromFieldName (toList fields))
DistinctCountable fields ->
"DISTINCT " <+>
SepByPrinter ", " (map fromFieldName (toList fields))
fromWhere :: Where -> Printer
fromWhere =
\case
Where expressions ->
case (filter ((/= trueExpression) . collapse)) expressions of
[] -> ""
collapsedExpressions ->
"WHERE " <+>
IndentPrinter 6 (fromExpression (AndExpression collapsedExpressions))
where collapse (AndExpression [x]) = collapse x
collapse (AndExpression []) = trueExpression
collapse (OrExpression [x]) = collapse x
collapse x = x
fromFrom :: From -> Printer
fromFrom =
\case
FromQualifiedTable aliasedQualifiedTableName ->
fromAliased (fmap fromTableName aliasedQualifiedTableName)
FromSelect select -> fromAliased (fmap (parens . fromSelect) select)
fromTableName :: TableName -> Printer
fromTableName TableName {tableName, tableNameSchema} =
fromNameText tableNameSchema <+> "." <+> fromNameText tableName
fromAliased :: Aliased Printer -> Printer
fromAliased Aliased {..} =
aliasedThing <+>
((" AS " <+>) . fromNameText) aliasedAlias
fromNameText :: Text -> Printer
fromNameText t = UnsafeTextPrinter ("`" <> t <> "`")
trueExpression :: Expression
trueExpression = ValueExpression (BoolValue True)
falseExpression :: Expression
falseExpression = ValueExpression (BoolValue False)
fromValue :: Value -> Printer
fromValue = ValuePrinter
parens :: Printer -> Printer
parens x = "(" <+> IndentPrinter 1 x <+> ")"
--------------------------------------------------------------------------------
-- Quick and easy query printer
toBuilderFlat :: Printer -> Builder
toBuilderFlat = flip evalState mempty . runBuilderFlat
toBuilderPretty :: Printer -> Builder
toBuilderPretty = flip evalState mempty . runBuilderPretty
toTextPretty :: Printer -> Text
toTextPretty = LT.toStrict . LT.toLazyText . toBuilderPretty
toTextFlat :: Printer -> Text
toTextFlat = LT.toStrict . LT.toLazyText . toBuilderFlat
--------------------------------------------------------------------------------
-- Printer ready for consumption
-- | Produces a query with holes, and a mapping for each
renderBuilderFlat :: Printer -> (Builder, InsOrdHashMap Int Value)
renderBuilderFlat =
second (OMap.fromList . map swap . OMap.toList) . flip runState mempty .
runBuilderFlat
-- | Produces a query with holes, and a mapping for each
renderBuilderPretty :: Printer -> (Builder, InsOrdHashMap Int Value)
renderBuilderPretty =
second (OMap.fromList . map swap . OMap.toList) . flip runState mempty .
runBuilderPretty
--------------------------------------------------------------------------------
-- Real printer engines
paramName :: Int -> Builder
paramName next = "param" <> fromString (show next)
runBuilderFlat :: Printer -> State (InsOrdHashMap Value Int) Builder
runBuilderFlat = go 0
where
go level =
\case
UnsafeTextPrinter q -> pure (LT.fromText q)
SeqPrinter xs -> fmap (mconcat . filter notEmpty) (mapM (go level) xs)
SepByPrinter x xs -> do
i <- go level x
fmap (mconcat . intersperse i . filter notEmpty) (mapM (go level) xs)
NewlinePrinter -> pure " "
IndentPrinter n p -> go (level + n) p
ValuePrinter (ArrayValue x) | V.null x -> pure "[]"
ValuePrinter v -> do
themap <- get
next <- OMap.lookup v themap `onNothing` do
next <- gets OMap.size
modify (OMap.insert v next)
pure next
pure ("@" <> paramName next)
notEmpty = (/= mempty)
runBuilderPretty :: Printer -> State (InsOrdHashMap Value Int) Builder
runBuilderPretty = go 0
where
go level =
\case
UnsafeTextPrinter q -> pure (LT.fromText q)
SeqPrinter xs -> fmap (mconcat . filter notEmpty) (mapM (go level) xs)
SepByPrinter x xs -> do
i <- go level x
fmap (mconcat . intersperse i . filter notEmpty) (mapM (go level) xs)
NewlinePrinter -> pure ("\n" <> indentation level)
IndentPrinter n p -> go (level + n) p
ValuePrinter (ArrayValue x)
| V.null x -> pure "[]"
ValuePrinter v -> do
themap <- get
next <- OMap.lookup v themap `onNothing` do
next <- gets OMap.size
modify (OMap.insert v next)
pure next
pure ("@" <> paramName next)
indentation n = LT.fromText (T.replicate n " ")
notEmpty = (/= mempty)
--------------------------------------------------------------------------------
-- Projection cleanup
-- | TODO: For now, we're littering this around where projections are
-- built. I'd prefer to use ordered set, or else a newtype wrapper to
-- prove it's been sorted. But that would interrupt code
-- elsewhere. For now, this is an acceptable solution.
-- Plus, a warning issued about duplicates might be useful.
cleanProjections :: NonEmpty Projection -> NonEmpty Projection
cleanProjections = neOrdNub
where neOrdNub :: NonEmpty Projection -> NonEmpty Projection
neOrdNub = NE.fromList . nubOrdOn projectionAlias . NE.toList