Chris Done 67a9045328 Bigquery/cleanups
A pull request for cleaning up small issues, bugs, redundancies and missing things in the BigQuery backend.


1. Remove duplicate projection fields - BigQuery rejects these.
2. Add order_by to the test suite cases, as it was returning inconsistent results.
3. Add lots of in FromIr about how the dataloader approach is given support.
4. Produce the correct output structure for aggregates:
   a. Should be a singleton object for a top-level aggregate query.
   b. Should have appropriate aggregate{} and nodes{} labels.
   c. **Support for nodes** (via array_agg).
5. Smooth over support of array aggregates by removing the fields used for joining with an explicit projection of each wanted field.

Co-authored-by: Vamshi Surabhi <>
GitOrigin-RevId: cd3899f4667770a27055f94988ef2a6d5808f1f5
2021-06-15 08:59:11 +00:00

561 lines
19 KiB

-- | Convert the simple BigQuery AST to an SQL query, ready to be passed
-- to the odbc package's query/exec functions.
module Hasura.Backends.BigQuery.ToQuery
( fromSelect
, fromReselect
, fromExpression
, toBuilderFlat
, toBuilderPretty
, toTextFlat
, toTextPretty
, Printer(..)
, renderBuilderFlat
, renderBuilderPretty
, paramName
) where
import Control.Monad.State.Strict
import Data.Bifunctor
import Data.Containers.ListUtils
import Data.Foldable
import qualified Data.HashMap.Strict.InsOrd as OMap
import Data.List (intersperse)
import qualified Data.List.NonEmpty as NE
import Data.Maybe
import Data.String
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import Data.Text.Lazy.Builder (Builder)
import qualified Data.Text.Lazy.Builder as LT
import Data.Tuple
import qualified Data.Vector as V
import Hasura.Backends.BigQuery.Types
import Hasura.Prelude hiding (second)
-- Types
data Printer
= SeqPrinter [Printer]
| SepByPrinter Printer [Printer]
| NewlinePrinter
| UnsafeTextPrinter Text
| IndentPrinter Int Printer
| ValuePrinter Value
deriving (Show, Eq)
instance IsString Printer where
fromString = UnsafeTextPrinter . fromString
(<+>) :: Printer -> Printer -> Printer
(<+>) x y = SeqPrinter [x,y]
-- Printer generators
fromExpression :: Expression -> Printer
fromExpression =
CastExpression e scalarType ->
"CAST(" <+> fromExpression e <+> " AS " <+> fromScalarType scalarType <+> ")"
InExpression e value ->
"(" <+> fromExpression e <+> ") IN UNNEST(" <+> fromValue value <+> ")"
JsonQueryExpression e -> "JSON_QUERY(" <+> fromExpression e <+> ")"
JsonValueExpression e path ->
"JSON_VALUE(" <+> fromExpression e <+> fromPath path <+> ")"
ValueExpression value -> fromValue value
AndExpression xs ->
(NewlinePrinter <+> "AND ")
(\x -> "(" <+> fromExpression x <+> ")")
(fromMaybe (pure trueExpression) (NE.nonEmpty xs))))
OrExpression xs ->
(NewlinePrinter <+> " OR ")
(\x -> "(" <+> fromExpression x <+> ")")
(fromMaybe (pure falseExpression) (NE.nonEmpty xs))))
NotExpression expression -> "NOT " <+> (fromExpression expression)
ExistsExpression select ->
"EXISTS (" <+> IndentPrinter 9 (fromSelect select) <+> ")"
IsNullExpression expression ->
"(" <+> fromExpression expression <+> ") IS NULL"
IsNotNullExpression expression ->
"(" <+> fromExpression expression <+> ") IS NOT NULL"
ColumnExpression fieldName -> fromFieldName fieldName
EqualExpression x y ->
"(" <+> fromExpression x <+> ") = (" <+> fromExpression y <+> ")"
NotEqualExpression x y ->
"(" <+> fromExpression x <+> ") != (" <+> fromExpression y <+> ")"
ToStringExpression e -> "CONCAT(" <+> fromExpression e <+> ", '')"
SelectExpression s -> "(" <+> IndentPrinter 1 (fromSelect s) <+> ")"
OpExpression op x y ->
"(" <+>
fromExpression x <+>
") " <+> fromOp op <+> " (" <+> fromExpression y <+> ")"
ConditionalProjection expression fieldName ->
"(CASE WHEN(" <+> fromExpression expression <+>
") THEN " <+> fromFieldName fieldName <+>
fromScalarType :: ScalarType -> Printer
fromScalarType =
StringScalarType -> "STRING"
BytesScalarType -> "BYTES"
IntegerScalarType -> "INT64"
FloatScalarType -> "FLOAT64"
BoolScalarType -> "BOOL"
TimestampScalarType -> "TIMESTAMP"
DateScalarType -> "DATE"
TimeScalarType -> "TIME"
DatetimeScalarType -> "DATETIME"
GeographyScalarType -> "GEOGRAPHY"
StructScalarType -> "STRUCT"
DecimalScalarType -> "DECIMAL"
BigDecimalScalarType -> "BIGDECIMAL"
fromOp :: Op -> Printer
fromOp =
LessOp -> "<"
MoreOp -> ">"
MoreOrEqualOp -> ">="
LessOrEqualOp -> "<="
fromPath :: JsonPath -> Printer
fromPath path =
", " <+> string path
string = fromExpression .
ValueExpression . StringValue . LT.toStrict . LT.toLazyText . go
go =
RootPath -> "$"
IndexPath r i -> go r <> "[" <> LT.fromString (show i) <> "]"
FieldPath r f -> go r <> "." <> LT.fromText f
fromFieldName :: FieldName -> Printer
fromFieldName (FieldName {..}) =
fromNameText fieldNameEntity <+> "." <+> fromNameText fieldName
fromSelect :: Select -> Printer
fromSelect Select {..} = finalExpression
finalExpression = inner
projections =
("," <+> NewlinePrinter)
(map fromProjection (toList (cleanProjections selectProjections)))
inner =
[ "SELECT " <+> IndentPrinter 7 projections
, "FROM " <+> IndentPrinter 5 (fromFrom selectFrom)
, SepByPrinter
(\Join {..} ->
IndentPrinter 16 (fromJoinSource joinSource)
, NewlinePrinter
, "AS " <+> fromJoinAlias joinAlias
, NewlinePrinter
, "ON (" <+>
(", " <+> NewlinePrinter)
(map fromOn joinOn)) <+>
, fromWhere selectWhere
, fromOrderBys selectTop selectOffset selectOrderBy
, case selectGroupBy of
[] -> ""
fieldNames ->
"GROUP BY " <+> SepByPrinter ", " (map fromFieldName fieldNames)
fromOn :: (FieldName, FieldName) -> Printer
fromOn (x,y) = fromFieldName x <+> " = " <+> fromFieldName y
fromJoinSource :: JoinSource -> Printer
fromJoinSource =
JoinSelect select -> "(" <+> IndentPrinter 1 (fromSelect select) <+> ")"
-- We're not using existingJoins at the moment, which was used to
-- avoid re-joining on the same table twice.
-- JoinReselect reselect -> "(" <+> fromReselect reselect <+> ")"
fromReselect :: Reselect -> Printer
fromReselect Reselect {..} =
[ "SELECT " <+>
IndentPrinter 7 projections
, fromWhere reselectWhere
projections =
("," <+> NewlinePrinter)
(map fromProjection (toList (cleanProjections reselectProjections)))
fromOrderBys ::
Top -> Maybe Expression -> Maybe (NonEmpty OrderBy) -> Printer
fromOrderBys NoTop Nothing Nothing = "" -- An ORDER BY is wasteful if not needed.
fromOrderBys top moffset morderBys =
[ case morderBys of
Nothing -> ""
Just orderBys ->
, SepByPrinter
("," <+> NewlinePrinter)
(map fromOrderBy (toList orderBys))
, case (top, moffset) of
(NoTop, Nothing) -> ""
(NoTop, Just offset) ->
"LIMIT 9223372036854775807 /* Maximum */"
-- Above: OFFSET is not supported without a LIMIT, therefore
-- we set LIMIT to the maximum integer value. Such a large
-- number of rows (9 quintillion) would not be possible to
-- service: 9223 petabytes. No machine has such capacity at
-- present.
" OFFSET " <+> fromExpression offset
(Top n, Nothing) -> "LIMIT " <+> fromValue (IntegerValue (intToInt64 n))
(Top n, Just offset) ->
"LIMIT " <+>
fromValue (IntegerValue (intToInt64 n)) <+>
" OFFSET " <+> fromExpression offset
fromOrderBy :: OrderBy -> Printer
fromOrderBy OrderBy {..} =
"(" <+>
fromFieldName orderByFieldName <+>
") " <+>
fromOrder orderByOrder <+>
fromNullsOrder orderByNullsOrder
fromOrder :: Order -> Printer
fromOrder =
AscOrder -> "ASC"
DescOrder -> "DESC"
fromNullsOrder :: NullsOrder -> Printer
fromNullsOrder =
NullsAnyOrder -> ""
NullsFirst -> " NULLS FIRST"
NullsLast -> " NULLS LAST"
fromJoinAlias :: EntityAlias -> Printer
fromJoinAlias EntityAlias {entityAliasText} =
fromNameText entityAliasText
fromProjection :: Projection -> Printer
fromProjection =
WindowProjection aliasedWindowFunction ->
fromAliased (fmap fromWindowFunction aliasedWindowFunction)
ExpressionProjection aliasedExpression ->
fromAliased (fmap fromExpression aliasedExpression)
FieldNameProjection aliasedFieldName ->
fromAliased (fmap fromFieldName aliasedFieldName)
AggregateProjection aliasedAggregate ->
fromAliased (fmap fromAggregate aliasedAggregate)
AggregateProjections aliasedAggregates ->
(\aggs ->
"STRUCT(" <+>
", "
(fmap (fromAliased . fmap fromAggregate) (toList aggs))) <+>
StarProjection -> "*"
ArrayAggProjection aliasedAgg -> fromAliased (fmap fromArrayAgg aliasedAgg)
EntityProjection aliasedEntity ->
(\(fields :: [(FieldName, FieldOrigin)]) ->
-- Example:
-- `aa_articles1`.`aggregate`,
-- STRUCT(0 as count, struct(null as id) as sum)
-- ) as aggregate
-- ) AS `articles_aggregate`
-- The (AS `articles_aggregate`) part at the end is rendered by 'fromAliased' evaluating
-- at the root of this branch, and not by anything below
"STRUCT(" <+>
(SepByPrinter ", "
(fields <&>
\(fName@FieldName{..}, fieldOrigin :: FieldOrigin) ->
"IFNULL(" <+> fromFieldName fName <+> ", " <+> fromFieldOrigin fieldOrigin <+>
") AS " <+> fromNameText fieldName
) <+>
ArrayEntityProjection entityAlias aliasedEntity ->
(\aggs ->
(SepByPrinter ", " (fmap fromFieldNameNaked (toList aggs))) <+>
" FROM " <+> fromJoinAlias entityAlias <+> ".agg)")
where fromFieldNameNaked :: FieldName -> Printer
fromFieldNameNaked (FieldName {..}) =
fromNameText fieldName
fromFieldOrigin :: FieldOrigin -> Printer
fromFieldOrigin = \case
NoOrigin -> "NULL"
AggregateOrigin aliasedAggregates ->
"STRUCT(" <+>
-- Example: "0 AS count, STRUCT(NULL AS id) AS sum"
SepByPrinter ", " (fromAliased . fmap fromNullAggregate <$> aliasedAggregates) <+>
fromWindowFunction :: WindowFunction -> Printer
fromWindowFunction (RowNumberOverPartitionBy fieldNames morderBys) =
SepByPrinter ", " (fmap fromFieldName (toList fieldNames)) <+>
(case morderBys of
Just {} -> " " <+> fromOrderBys NoTop Nothing morderBys
Nothing -> "") <+>
fromArrayAgg :: ArrayAgg -> Printer
fromArrayAgg ArrayAgg {..} =
, IndentPrinter 10 $
" "
[ "STRUCT(" <+> IndentPrinter 7 projections <+> ")"
, fromOrderBys
(\orderBy ->
{ orderByNullsOrder = NullsAnyOrder
-- Because BigQuery reports:
-- > NULLS FIRST not supported with descending sort order in aggregate functions
-- And the same error with 'ascending'.
, ")"
projections =
("," <+> NewlinePrinter)
(map fromProjection (toList (cleanProjections arrayAggProjections)))
fromNullAggregate :: Aggregate -> Printer
fromNullAggregate = \case
CountAggregate _ -> "0"
OpAggregate _text _exp -> "NULL"
OpAggregates _text exps ->
"STRUCT(" <+> SepByPrinter ", " (toList exps <&> \(alias, _exp) -> "NULL AS " <+> fromNameText alias) <+> ")"
TextAggregate _text -> "NULL"
fromAggregate :: Aggregate -> Printer
fromAggregate =
CountAggregate countable -> "COUNT(" <+> fromCountable countable <+> ")"
OpAggregate text arg ->
UnsafeTextPrinter text <+> "(" <+> fromExpression arg <+> ")"
OpAggregates text args ->
"STRUCT(" <+>
", "
(\(alias, arg) ->
UnsafeTextPrinter text <+>
"(" <+> fromExpression arg <+> ") AS " <+> fromNameText alias)
(toList args))) <+>
TextAggregate text -> fromExpression (ValueExpression (StringValue text))
fromCountable :: Countable FieldName -> Printer
fromCountable =
StarCountable -> "*"
NonNullFieldCountable fields ->
SepByPrinter ", " (map fromFieldName (toList fields))
DistinctCountable fields ->
SepByPrinter ", " (map fromFieldName (toList fields))
fromWhere :: Where -> Printer
fromWhere =
Where expressions ->
case (filter ((/= trueExpression) . collapse)) expressions of
[] -> ""
collapsedExpressions ->
"WHERE " <+>
IndentPrinter 6 (fromExpression (AndExpression collapsedExpressions))
where collapse (AndExpression [x]) = collapse x
collapse (AndExpression []) = trueExpression
collapse (OrExpression [x]) = collapse x
collapse x = x
fromFrom :: From -> Printer
fromFrom =
FromQualifiedTable aliasedQualifiedTableName ->
fromAliased (fmap fromTableName aliasedQualifiedTableName)
FromSelect select -> fromAliased (fmap (parens . fromSelect) select)
fromTableName :: TableName -> Printer
fromTableName TableName {tableName, tableNameSchema} =
fromNameText tableNameSchema <+> "." <+> fromNameText tableName
fromAliased :: Aliased Printer -> Printer
fromAliased Aliased {..} =
aliasedThing <+>
((" AS " <+>) . fromNameText) aliasedAlias
fromNameText :: Text -> Printer
fromNameText t = UnsafeTextPrinter ("`" <> t <> "`")
trueExpression :: Expression
trueExpression = ValueExpression (BoolValue True)
falseExpression :: Expression
falseExpression = ValueExpression (BoolValue False)
fromValue :: Value -> Printer
fromValue = ValuePrinter
parens :: Printer -> Printer
parens x = "(" <+> IndentPrinter 1 x <+> ")"
-- Quick and easy query printer
toBuilderFlat :: Printer -> Builder
toBuilderFlat = flip evalState mempty . runBuilderFlat
toBuilderPretty :: Printer -> Builder
toBuilderPretty = flip evalState mempty . runBuilderPretty
toTextPretty :: Printer -> Text
toTextPretty = LT.toStrict . LT.toLazyText . toBuilderPretty
toTextFlat :: Printer -> Text
toTextFlat = LT.toStrict . LT.toLazyText . toBuilderFlat
-- Printer ready for consumption
-- | Produces a query with holes, and a mapping for each
renderBuilderFlat :: Printer -> (Builder, InsOrdHashMap Int Value)
renderBuilderFlat =
second (OMap.fromList . map swap . OMap.toList) . flip runState mempty .
-- | Produces a query with holes, and a mapping for each
renderBuilderPretty :: Printer -> (Builder, InsOrdHashMap Int Value)
renderBuilderPretty =
second (OMap.fromList . map swap . OMap.toList) . flip runState mempty .
-- Real printer engines
paramName :: Int -> Builder
paramName next = "param" <> fromString (show next)
runBuilderFlat :: Printer -> State (InsOrdHashMap Value Int) Builder
runBuilderFlat = go 0
go level =
UnsafeTextPrinter q -> pure (LT.fromText q)
SeqPrinter xs -> fmap (mconcat . filter notEmpty) (mapM (go level) xs)
SepByPrinter x xs -> do
i <- go level x
fmap (mconcat . intersperse i . filter notEmpty) (mapM (go level) xs)
NewlinePrinter -> pure " "
IndentPrinter n p -> go (level + n) p
ValuePrinter (ArrayValue x) | V.null x -> pure "[]"
ValuePrinter v -> do
themap <- get
next <- case OMap.lookup v themap of
Just next -> pure next
Nothing -> do next <- gets OMap.size
modify (OMap.insert v next)
pure next
pure ("@" <> paramName next)
notEmpty = (/= mempty)
runBuilderPretty :: Printer -> State (InsOrdHashMap Value Int) Builder
runBuilderPretty = go 0
go level =
UnsafeTextPrinter q -> pure (LT.fromText q)
SeqPrinter xs -> fmap (mconcat . filter notEmpty) (mapM (go level) xs)
SepByPrinter x xs -> do
i <- go level x
fmap (mconcat . intersperse i . filter notEmpty) (mapM (go level) xs)
NewlinePrinter -> pure ("\n" <> indentation level)
IndentPrinter n p -> go (level + n) p
ValuePrinter (ArrayValue x)
| V.null x -> pure "[]"
ValuePrinter v -> do
themap <- get
next <-
case OMap.lookup v themap of
Just next -> pure next
Nothing -> do
next <- gets OMap.size
modify (OMap.insert v next)
pure next
pure ("@" <> paramName next)
indentation n = LT.fromText (T.replicate n " ")
notEmpty = (/= mempty)
-- Projection cleanup
-- | TODO: For now, we're littering this around where projections are
-- built. I'd prefer to use ordered set, or else a newtype wrapper to
-- prove it's been sorted. But that would interrupt code
-- elsewhere. For now, this is an acceptable solution.
-- Plus, a warning issued about duplicates might be useful.
cleanProjections :: NonEmpty Projection -> NonEmpty Projection
cleanProjections = neOrdNub
where neOrdNub :: NonEmpty Projection -> NonEmpty Projection
neOrdNub = NE.fromList . nubOrdOn projectionAlias . NE.toList