Bigquery/fix limit offset for array aggregates

Blocked on https://github.com/hasura/graphql-engine-mono/pull/1640.

While fiddling with BigQuery I noticed a severe issue with offset/limit for array-aggregates. I've fixed it now.

The basic problem was that I was using a query like this:

```graphql
query MyQuery {
  hasura_Artist(order_by: {artist_self_id: asc}) {
    artist_self_id
    albums_aggregate(order_by: {album_self_id: asc}, limit: 2) {
      nodes {
        album_self_id
      }
      aggregate {
        count
      }
    }
  }
}
```

Producing this SQL:

```sql
SELECT `t_Artist1`.`artist_self_id` AS `artist_self_id`,
       STRUCT(IFNULL(`aa_albums1`.`nodes`, NULL) AS `nodes`, IFNULL(`aa_albums1`.`aggregate`, STRUCT(0 AS `count`)) AS `aggregate`) AS `albums_aggregate`
FROM `hasura`.`Artist` AS `t_Artist1`
LEFT OUTER JOIN (SELECT ARRAY_AGG(STRUCT(`t_Album1`.`album_self_id` AS `album_self_id`) ORDER BY (`t_Album1`.`album_self_id`) ASC) AS `nodes`,
                        STRUCT(COUNT(*) AS `count`) AS `aggregate`,
                        `t_Album1`.`artist_other_id` AS `artist_other_id`
                 FROM (SELECT *
                       FROM `hasura`.`Album` AS `t_Album1`
                       ORDER BY (`t_Album1`.`album_self_id`) ASC NULLS FIRST
                       -- PROBLEM HERE
                       LIMIT @param0) AS `t_Album1`
                 GROUP BY `t_Album1`.`artist_other_id`)
AS `aa_albums1`
ON (`aa_albums1`.`artist_other_id` = `t_Artist1`.`artist_self_id`)
ORDER BY (`t_Artist1`.`artist_self_id`) ASC NULLS FIRST
```

Note the `LIMIT @param0` -- that is incorrect because we want to limit
per artist. Instead, we want:

```sql
SELECT `t_Artist1`.`artist_self_id` AS `artist_self_id`,
       STRUCT(IFNULL(`aa_albums1`.`nodes`, NULL) AS `nodes`, IFNULL(`aa_albums1`.`aggregate`, STRUCT(0 AS `count`)) AS `aggregate`) AS `albums_aggregate`
FROM `hasura`.`Artist` AS `t_Artist1`
LEFT OUTER JOIN (SELECT ARRAY_AGG(STRUCT(`t_Album1`.`album_self_id` AS `album_self_id`) ORDER BY (`t_Album1`.`album_self_id`) ASC) AS `nodes`,
                        STRUCT(COUNT(*) AS `count`) AS `aggregate`,
                        `t_Album1`.`artist_other_id` AS `artist_other_id`
                 FROM (SELECT *,
                            -- ADDED
                            ROW_NUMBER() OVER(PARTITION BY artist_other_id) artist_album_index
                       FROM `hasura`.`Album` AS `t_Album1`
                       ORDER BY (`t_Album1`.`album_self_id`) ASC NULLS FIRST
                       ) AS `t_Album1`
                 -- CHANGED
                 WHERE artist_album_index <= @param
                 GROUP BY `t_Album1`.`artist_other_id`)
AS `aa_albums1`
ON (`aa_albums1`.`artist_other_id` = `t_Artist1`.`artist_self_id`)
ORDER BY (`t_Artist1`.`artist_self_id`) ASC NULLS FIRST
```

That serves both the LIMIT/OFFSET function in the where clause. Then,
both the ARRAY_AGG and the COUNT are correct per artist.

I've updated my Haskell test suite to add regression tests for this. I'll push a commit for Python tests shortly. The tests still pass there.

This just fixes a case that we hadn't noticed.

https://github.com/hasura/graphql-engine-mono/pull/1641

GitOrigin-RevId: 49933fa5e09a9306c89565743ecccf2cb54eaa80
This commit is contained in:
Chris Done 2021-07-06 09:28:42 +01:00 committed by hasura-bot
parent 6ed800abaa
commit 614c0dab80
4 changed files with 279 additions and 9 deletions

View File

@ -14,7 +14,7 @@ module Hasura.Backends.BigQuery.FromIr
, Top(..) -- Re-export for FromIrConfig.
) where
import Hasura.Backends.BigQuery.Source (BigQuerySourceConfig(..))
import Hasura.Backends.BigQuery.Source (BigQuerySourceConfig (..))
import Hasura.Prelude
import qualified Data.HashMap.Strict as HM
@ -159,7 +159,7 @@ fromRootField =
\case
(Ir.QDBSingleRow s) -> mkSQLSelect Rql.JASSingleObject s
(Ir.QDBMultipleRows s) -> mkSQLSelect Rql.JASMultipleRows s
(Ir.QDBAggregation s) -> fromSelectAggregate s
(Ir.QDBAggregation s) -> fromSelectAggregate Nothing s
(Ir.QDBConnection _) -> refute $ pure ConnectionsNotSupported
--------------------------------------------------------------------------------
@ -219,9 +219,10 @@ fromSelectRows annSelectG = do
else LeaveNumbersAlone
fromSelectAggregate ::
Ir.AnnSelectG 'BigQuery (Const Void) [(Rql.FieldName, Ir.TableAggregateFieldG 'BigQuery (Const Void) Expression)] Expression
Maybe (EntityAlias, HashMap ColumnName ColumnName)
-> Ir.AnnSelectG 'BigQuery (Const Void) [( Rql.FieldName , Ir.TableAggregateFieldG 'BigQuery (Const Void) Expression)] Expression
-> FromIr BigQuery.Select
fromSelectAggregate annSelectG = do
fromSelectAggregate minnerJoinFields annSelectG = do
selectFrom <-
case from of
Ir.FromTable qualifiedObject -> fromQualifiedTable qualifiedObject
@ -230,6 +231,11 @@ fromSelectAggregate annSelectG = do
runReaderT (fromSelectArgsG args) (fromAlias selectFrom)
filterExpression <-
runReaderT (fromAnnBoolExp permFilter) (fromAlias selectFrom)
mforeignKeyConditions <-
for minnerJoinFields $ \(entityAlias, mapping) ->
runReaderT
(fromMappingFieldNames (fromAlias selectFrom) mapping)
entityAlias
fieldSources <-
runReaderT
(traverse
@ -244,6 +250,7 @@ fromSelectAggregate annSelectG = do
case NE.nonEmpty (concatMap (toList . fieldSourceProjections) fieldSources) of
Nothing -> refute (pure NoProjectionFields)
Just ne -> pure ne
indexAlias <- generateEntityAlias IndexTemplate
pure
Select
{ selectCardinality = One
@ -256,7 +263,36 @@ fromSelectAggregate annSelectG = do
(Aliased
{ aliasedThing =
Select
{ selectProjections = pure StarProjection
{ selectProjections =
case mforeignKeyConditions of
Nothing -> pure StarProjection
Just innerJoinFields ->
pure StarProjection <>
-- We setup an index over every row in
-- the sub select. Then if you look at
-- the outer Select, you can see we apply
-- a WHERE that uses this index for
-- LIMIT/OFFSET.
pure
(WindowProjection
(Aliased
{ aliasedAlias = unEntityAlias indexAlias
, aliasedThing =
RowNumberOverPartitionBy
-- The row numbers start from 1.
(NE.fromList
(map
(\(fieldName', _) -> fieldName')
(innerJoinFields)))
argsOrderBy
-- Above: Having the order by
-- in here ensures that the
-- row numbers are ordered by
-- this ordering. Below, we
-- order again for the
-- general row order. Both
-- are needed!
}))
, selectFrom
, selectJoins =
argsJoins <> mapMaybe fieldSourceJoin fieldSources
@ -269,14 +305,53 @@ fromSelectAggregate annSelectG = do
-- putting this elsewhere.
, selectFinalWantedFields = Nothing
, selectCardinality = Many
, selectTop = argsTop
, selectOffset = argsOffset
, selectTop = maybe argsTop (const NoTop) mforeignKeyConditions
, selectOffset = maybe argsOffset (const Nothing) mforeignKeyConditions
, selectGroupBy = mempty
}
, aliasedAlias = entityAliasText (fromAlias selectFrom)
})
, selectJoins = mempty
, selectWhere = mempty
, selectWhere =
case mforeignKeyConditions of
Nothing -> mempty
Just {} ->
let offset =
case argsOffset of
Nothing -> mempty
Just offset' ->
Where
-- Apply an offset using the row_number from above.
[ OpExpression
MoreOp
(ColumnExpression
FieldName
{ fieldNameEntity =
coerce (fromAlias selectFrom)
, fieldName = unEntityAlias indexAlias
})
offset'
]
limit =
case argsTop of
NoTop -> mempty
Top limit' ->
Where
-- Apply a limit using the row_number from above.
[ OpExpression
LessOp
(ColumnExpression
FieldName
{ fieldNameEntity =
coerce (fromAlias selectFrom)
, fieldName = unEntityAlias indexAlias
})
(ValueExpression . IntegerValue . Int64 . tshow $
limit' + 1 -- Because the row_number() indexing starts at 1.
-- So idx<l+1 means idx<2 where l = 1 i.e. "limit to 1 row".
)
]
in offset <> limit
, selectOrderBy = Nothing
, selectOffset = Nothing
}
@ -976,7 +1051,9 @@ fromArrayAggregateSelectG ::
-> ReaderT EntityAlias FromIr Join
fromArrayAggregateSelectG annRelationSelectG = do
joinFieldName <- lift (fromRelName aarRelationshipName)
select <- lift (fromSelectAggregate annSelectG)
select <- do
lhsEntityAlias <- ask
lift (fromSelectAggregate (pure (lhsEntityAlias, mapping)) annSelectG)
alias <- lift (generateEntityAlias (ArrayAggregateTemplate joinFieldName))
joinOn <- fromMappingFieldNames alias mapping
innerJoinFields <-

View File

@ -20,6 +20,7 @@ import qualified Hasura.Base.Error as E
import qualified Hasura.SQL.AnyBackend as AB
import qualified Hasura.Tracing as Tracing
import Hasura.Backends.BigQuery.Plan
import Hasura.Base.Error
import Hasura.EncJSON

View File

@ -0,0 +1,189 @@
- description: Aggregate with a limit using nodes, count
url: /v1/graphql
status: 200
response:
data:
hasura_Artist:
- artist_self_id: '1000'
albums_aggregate:
nodes:
- album_self_id: '2002'
aggregate:
count: '1'
- artist_self_id: '1001'
albums_aggregate:
nodes:
- album_self_id: '2000'
aggregate:
count: '1'
- artist_self_id: '1002'
albums_aggregate:
nodes:
- album_self_id: '2003'
aggregate:
count: '1'
query:
query: |
query MyQuery {
hasura_Artist(order_by: {artist_self_id: asc}) {
artist_self_id
albums_aggregate(order_by: {album_self_id: asc}, limit: 1) {
nodes {
album_self_id
}
aggregate {
count
}
}
}
}
- description: Aggregate with order by using nodes, count, sum
url: /v1/graphql
status: 200
response:
data:
hasura_Artist:
- artist_self_id: '1001'
albums_aggregate:
nodes:
- album_self_id: '2000'
- album_self_id: '2001'
aggregate:
count: '2'
sum:
album_self_id: '4001'
- artist_self_id: '1002'
albums_aggregate:
nodes:
- album_self_id: '2003'
- album_self_id: '2004'
aggregate:
count: '2'
sum:
album_self_id: '4007'
- artist_self_id: '1000'
albums_aggregate:
nodes:
- album_self_id: '2002'
- album_self_id: '2005'
aggregate:
count: '2'
sum:
album_self_id: '4007'
query:
query: |
query MyQuery {
hasura_Artist {
artist_self_id
albums_aggregate(order_by: {album_self_id: asc}) {
nodes {
album_self_id
}
aggregate {
count
sum {
album_self_id
}
}
}
}
}
- description: Aggregate with limit using sum, nodes
url: /v1/graphql
status: 200
response:
data:
hasura_Artist:
- artist_self_id: '1001'
albums_aggregate:
nodes:
- album_self_id: '2000'
aggregate:
count: '1'
sum:
album_self_id: '2000'
- artist_self_id: '1002'
albums_aggregate:
nodes:
- album_self_id: '2003'
aggregate:
count: '1'
sum:
album_self_id: '2003'
- artist_self_id: '1000'
albums_aggregate:
nodes:
- album_self_id: '2002'
aggregate:
count: '1'
sum:
album_self_id: '2002'
query:
query: |
query MyQuery {
hasura_Artist {
artist_self_id
albums_aggregate(order_by: {album_self_id: asc}, limit: 1) {
nodes {
album_self_id
}
aggregate {
count
sum {
album_self_id
}
}
}
}
}
- description: Aggregate with offset using sum, nodes
url: /v1/graphql
status: 200
response:
data:
hasura_Artist:
- artist_self_id: '1001'
albums_aggregate:
nodes:
- album_self_id: '2001'
aggregate:
count: '1'
sum:
album_self_id: '2001'
- artist_self_id: '1002'
albums_aggregate:
nodes:
- album_self_id: '2004'
aggregate:
count: '1'
sum:
album_self_id: '2004'
- artist_self_id: '1000'
albums_aggregate:
nodes:
- album_self_id: '2005'
aggregate:
count: '1'
sum:
album_self_id: '2005'
query:
query: |
query MyQuery {
hasura_Artist {
artist_self_id
albums_aggregate(order_by: {album_self_id: asc}, offset: 1) {
nodes {
album_self_id
}
aggregate {
count
sum {
album_self_id
}
}
}
}
}

View File

@ -21,6 +21,9 @@ class TestGraphQLQueryBasicBigquery:
def test_global_limit(self, hge_ctx, transport):
check_query_f(hge_ctx, self.dir() + "/global_limit.yaml", transport)
def test_offset_regression(self, hge_ctx, transport):
check_query_f(hge_ctx, self.dir() + "/offset_regression.yaml", transport)
def test_user_perms(self, hge_ctx, transport):
check_query_f(hge_ctx, self.dir() + "/user_perms.yaml", transport)