Simplify eval to run directly inside the Query monad

@tomjaguarpaw at ZuriHac questioned whether the `Evaluation` monad was really unnecessary.

And yes, it turns out that the `Evaluation` monad wasn't actually really adding any value. The real issue was Postgres's unspecified evaluation order (which in practice behaved like the broken `ListT` from transformers).

We now maintain a stack of bindings from previous subselects in the `Query` monad, which future queries can reference. So for `evalulation`, to ensure that Postgres doesn't try to run a function once where we expect it to be run multiple times, we modify the expression to contain a bunch of superfluous lateral references to the previous queries. This ensures that it gets run every time.
This commit is contained in:
Shane O'Brien 2021-06-19 11:43:30 +01:00
parent 294543e530
commit d63063940d
No known key found for this signature in database
GPG Key ID: 35A00ED1B695C1A1
6 changed files with 286 additions and 66 deletions

View File

@ -267,9 +267,6 @@ module Rel8
-- ** Sequences
, nextval
, Evaluate
, eval
, evaluate
-- * Implementation details

View File

@ -1,5 +1,3 @@
{-# language DerivingVia #-}
{-# language GeneralizedNewtypeDeriving #-}
{-# language StandaloneKindSignatures #-}
module Rel8.Query
@ -8,14 +6,22 @@ module Rel8.Query
where
-- base
import Control.Applicative ( liftA2 )
import Control.Monad ( liftM2 )
import Data.Kind ( Type )
import Data.Monoid ( Any( Any ) )
import Prelude
-- opaleye
import qualified Opaleye.Select as Opaleye
import qualified Opaleye.Internal.HaskellDB.PrimQuery as Opaleye
import qualified Opaleye.Internal.PackMap as Opaleye
import qualified Opaleye.Internal.PrimQuery as Opaleye
import qualified Opaleye.Internal.QueryArr as Opaleye
import qualified Opaleye.Internal.Tag as Opaleye
-- rel8
import Rel8.Query.Set ( unionAll )
import Rel8.Query.Opaleye ( fromOpaleye )
import Rel8.Query.Values ( values )
import Rel8.Table.Alternative
( AltTable, (<|>:)
@ -23,22 +29,172 @@ import Rel8.Table.Alternative
)
-- semigroupoids
import Data.Functor.Apply ( Apply, WrappedApplicative(..) )
import Data.Functor.Apply ( Apply, (<.>) )
import Data.Functor.Bind ( Bind, (>>-) )
-- | The @Query@ monad allows you to compose a @SELECT@ query. This monad has
-- semantics similar to the list (@[]@) monad.
type Query :: Type -> Type
newtype Query a = Query (Opaleye.Select a)
deriving newtype (Functor, Applicative, Monad)
deriving Apply via (WrappedApplicative Opaleye.Select)
newtype Query a =
Query (
-- This is based on Opaleye's Select monad, but with two addtions. We
-- maintain a stack of PrimExprs from parent previous subselects. In
-- practice, these are always the results of dummy calls to random().
--
-- We also return a Bool that indicates to the parent subselect whether
-- or not that stack of PrimExprs were used at any point. If they weren't,
-- then the call to random() is never added to the query.
--
-- This is all needed to implement evaluate. Consider the following code:
--
-- do
-- x <- values [lit 'a', lit 'b', lit 'c']
-- y <- evaluate $ nextval "user_id_seq"
-- pure (x, y)
--
-- If we just used Opaleye's Select monad directly, the SQL would come out
-- like this:
--
-- SELECT
-- a, b
-- FROM
-- (VALUES ('a'), ('b'), ('c')) Q1(a),
-- LATERAL (SELECT nextval('user_id_seq')) Q2(b);
--
-- From the Haskell code, you would intuitively expect to get back the
-- results of three different calls to nextval(), but from Postgres' point
-- of view, because the Q2 subquery doesn't reference anything from the Q1
-- query, it thinks it only needs to call nextval() once. This is actually
-- exactly the same problem you get with the deprecated ListT IO monad from
-- the transformers package — *> behaves differently to >>=, so
-- using ApplicativeDo can change the results of a program. ApplicativeDo
-- is exactly the optimisation Postgres does on a "LATERAL" query that
-- doesn't make any references to previous subselects.
--
-- Rel8's solution is generate the following SQL instead:
--
-- SELECT
-- a, b
-- FROM
-- (SELECT
-- random() AS dummy,
-- *
-- FROM
-- (VALUES ('a'), ('b'), ('c')) Q1(a)) Q1,
-- LATERAL (SELECT
-- CASE
-- WHEN dummy IS NOT NULL
-- THEN nextval('user_id_seq')
-- END) Q2(b);
--
-- We use random() here as the dummy value (and not some constant) because
-- Postgres will again optimize if it sees that a value is constant
-- (and thus only call nextval() once), but because random() is marked as
-- VOLATILE, this inhibits Postgres from doing that optimisation.
--
-- Why not just reference the a column from the previous query directly
-- instead of adding a dummy value? Basically, even if we extract out all
-- the bindings introduced in a PrimQuery, we can't always be sure which
-- ones refer to constant values, so if we end up laterally referencing a
-- constant value, then all of this would be for nothing.
--
-- Why not just add the call to the previous subselect directly, like so:
--
-- SELECT
-- a, b
-- FROM
-- (SELECT
-- nextval('user_id_seq') AS eval,
-- *
-- FROM
-- (VALUES ('a'), ('b'), ('c')) Q1(a)) Q1,
-- LATERAL (SELECT eval) Q2(b);
--
-- That would work in this case. But consider the following Rel8 code:
--
-- do
-- x <- values [lit 'a', lit 'b', lit 'c']
-- y <- values [lit 'd', lit 'e', lit 'f']
-- z <- evaluate $ nextval "user_id_seq"
-- pure (x, y, z)
--
-- How many calls to nextval should there be? Our Haskell intuition says
-- nine. But that's not what you would get if you used the above
-- technique. The problem is, which VALUES query should the nextval be
-- added to? You can choose one or the other to get three calls to
-- nextval, but you still need to make a superfluous LATERAL references to
-- the other if you want nine calls. So for the above Rel8 code we generate
-- the following SQL:
--
-- SELECT
-- a, b, c
-- FROM
-- (SELECT
-- random() AS dummy,
-- *
-- FROM
-- (VALUES ('a'), ('b'), ('c')) Q1(a)) Q1,
-- (SELECT
-- random() AS dummy,
-- *
-- FROM
-- (VALUES ('d'), ('e'), ('f')) Q2(b)) Q2,
-- LATERAL (SELECT
-- CASE
-- WHEN Q1.dummy IS NOT NULL AND Q2.dummy IS NOT NULL
-- THEN nextval('user_id_seq')
-- END) Q3(c);
--
-- This gives nine calls to nextval() as we would expect.
[Opaleye.PrimExpr] -> Opaleye.Select (Any, a)
)
instance Functor Query where
fmap f (Query a) = Query (fmap (fmap (fmap f)) a)
instance Apply Query where
(<.>) = (<*>)
instance Applicative Query where
pure = fromOpaleye . pure
liftA2 = liftM2
instance Bind Query where
(>>-) = (>>=)
instance Monad Query where
Query q >>= f = Query $ \dummies -> Opaleye.QueryArr $ \(_, query, tag) ->
let
Opaleye.QueryArr qa = q dummies
((m, a), query', tag') = qa ((), query, tag)
Query q' = f a
(dummies', lquery, tag'') =
( dummy : dummies
, Opaleye.Rebind True bindings query'
, Opaleye.next tag'
)
where
(dummy, bindings) = Opaleye.run $ name random
where
random = Opaleye.FunExpr "random" []
name = Opaleye.extractAttr "dummy" tag'
Opaleye.QueryArr qa' = Opaleye.lateral $ \_ -> q' dummies'
((m'@(Any needsDummies), b), rquery, tag''') = qa' ((), Opaleye.Unit, tag'')
lquery'
| needsDummies = lquery
| otherwise = query'
query'''' = Opaleye.times lquery' rquery
m'' = m <> m'
in
((m'', b), query'''', tag''')
-- | '<|>:' = 'unionAll'.
instance AltTable Query where
(<|>:) = unionAll

View File

@ -7,11 +7,13 @@ where
-- base
import Data.Kind ( Type )
import Data.Monoid ( Any )
import Prelude ()
-- opaleye
import qualified Opaleye.Internal.HaskellDB.PrimQuery as Opaleye
import qualified Opaleye.Select as Opaleye
type Query :: Type -> Type
newtype Query a = Query (Opaleye.Select a)
newtype Query a = Query ([Opaleye.PrimExpr] -> Opaleye.Select (Any, a))

View File

@ -1,20 +1,18 @@
{-# language DerivingStrategies #-}
{-# language FlexibleContexts #-}
{-# language GeneralizedNewtypeDeriving #-}
{-# language NamedFieldPuns #-}
{-# language StandaloneKindSignatures #-}
{-# language TupleSections #-}
module Rel8.Query.Evaluate
( Evaluate
, eval
, evaluate
( evaluate
, rebind
)
where
-- base
import Data.Kind ( Type )
import Data.Monoid ( Endo ( Endo ), appEndo )
import Prelude
import Control.Monad ( (>=>) )
import Data.Foldable ( foldl' )
import Data.List.NonEmpty ( NonEmpty( (:|) ), nonEmpty )
import Data.Monoid ( Any( Any ) )
import Prelude hiding ( undefined )
-- opaleye
import qualified Opaleye.Internal.HaskellDB.PrimQuery as Opaleye
@ -26,57 +24,45 @@ import qualified Opaleye.Internal.Unpackspec as Opaleye
-- rel8
import Rel8.Expr ( Expr )
import Rel8.Expr.Bool ( (&&.) )
import Rel8.Expr.Opaleye ( fromPrimExpr )
import Rel8.Query ( Query( Query ) )
import Rel8.Table ( Table )
import Rel8.Table.Bool ( case_ )
import Rel8.Table.Opaleye ( unpackspec )
-- semigroupoids
import Data.Functor.Apply ( Apply )
import Data.Functor.Bind ( Bind, (>>-) )
-- transformers
import Control.Monad.Trans.State.Strict ( State, get, put, runState )
import Rel8.Table.Undefined
type Evaluations :: Type
data Evaluations = Evaluations
{ tag :: !Opaleye.Tag
, bindings :: !(Endo (Opaleye.Bindings Opaleye.PrimExpr))
}
-- | 'evaluate' takes expressions that could potentially have side effects and
-- \"runs\" them in the 'Query' monad. The returned expressions have no side
-- effects and can safely be reused.
evaluate :: Table Expr a => a -> Query a
evaluate = laterally >=> rebind
-- | Some PostgreSQL functions, such as 'Rel8.nextval', have side effects,
-- breaking the referential transparency we would otherwise enjoy.
--
-- To try to recover our ability to reason about such expressions, 'Evaluate'
-- allows us to control the evaluation order of side-effects by sequencing
-- them monadically.
type Evaluate :: Type -> Type
newtype Evaluate a = Evaluate (State Evaluations a)
deriving newtype (Functor, Apply, Applicative, Monad)
laterally :: Table Expr a => a -> Query a
laterally a = Query $ \bindings -> pure $ (Any True,) $
case nonEmpty bindings of
Nothing -> a
Just bindings' -> case_ [(condition, a)] undefined
where
condition = foldl1' (&&.) (fmap go bindings')
where
go = fromPrimExpr . Opaleye.UnExpr Opaleye.OpIsNotNull
instance Bind Evaluate where
(>>-) = (>>=)
-- | 'eval' takes expressions that could potentially have side effects and
-- \"runs\" them in the 'Evaluate' monad. The returned expressions have no
-- side effetcs and can safely be reused.
eval :: Table Expr a => a -> Evaluate a
eval a = Evaluate $ do
Evaluations {tag, bindings} <- get
-- | 'rebind' takes some expressions, and binds each of them to a new
-- variable in the SQL. The @a@ returned consists only of these
-- variables. It's essentially a @let@ binding for Postgres expressions.
rebind :: Table Expr a => a -> Query a
rebind a = Query $ \_ -> Opaleye.QueryArr $ \(_, query, tag) ->
let
tag' = Opaleye.next tag
(a', bindings') = Opaleye.run $
(a', bindings) = Opaleye.run $
Opaleye.runUnpackspec unpackspec (Opaleye.extractAttr "eval" tag') a
put Evaluations {tag = tag', bindings = bindings <> Endo (bindings' ++)}
pure a'
in
((mempty, a'), Opaleye.Rebind True bindings query, tag')
-- | 'evaluate' runs an 'Evaluate' inside the 'Query' monad.
evaluate :: Evaluate a -> Query a
evaluate (Evaluate m) = Query $ Opaleye.QueryArr $ \(_, query, tag) ->
case runState m (Evaluations tag mempty) of
(a, Evaluations {tag = tag', bindings}) ->
(a, Opaleye.Rebind True (appEndo bindings mempty) query, tag')
foldl1' :: (a -> a -> a) -> NonEmpty a -> a
foldl1' f (a :| as) = foldl' f a as

View File

@ -1,3 +1,5 @@
{-# language TupleSections #-}
module Rel8.Query.Opaleye
( fromOpaleye
, toOpaleye
@ -7,28 +9,54 @@ module Rel8.Query.Opaleye
where
-- base
import Control.Applicative ( liftA2 )
import Prelude
-- opaleye
import qualified Opaleye.Select as Opaleye
import qualified Opaleye.Internal.QueryArr as Opaleye
-- rel8
import {-# SOURCE #-} Rel8.Query ( Query( Query ) )
fromOpaleye :: Opaleye.Select a -> Query a
fromOpaleye = Query
fromOpaleye = Query . pure . fmap pure
toOpaleye :: Query a -> Opaleye.Select a
toOpaleye (Query a) = a
toOpaleye (Query a) = snd <$> a mempty
mapOpaleye :: (Opaleye.Select a -> Opaleye.Select b) -> Query a -> Query b
mapOpaleye f = fromOpaleye . f . toOpaleye
mapOpaleye f (Query a) = Query (fmap (mapping f) a)
zipOpaleyeWith :: ()
=> (Opaleye.Select a -> Opaleye.Select b -> Opaleye.Select c)
-> Query a -> Query b -> Query c
zipOpaleyeWith f a b = fromOpaleye $ f (toOpaleye a) (toOpaleye b)
zipOpaleyeWith f (Query a) (Query b) = Query $ liftA2 (zipping f) a b
mapping :: ()
=> (Opaleye.Select a -> Opaleye.Select b)
-> Opaleye.Select (m, a) -> Opaleye.Select (m, b)
mapping f q@(Opaleye.QueryArr qa) = Opaleye.QueryArr $ \(_, query, tag) ->
let
((m, _), _, _) = qa ((), query, tag)
Opaleye.QueryArr qa' = (m,) <$> f (snd <$> q)
in
qa' ((), query, tag)
zipping :: Semigroup m
=> (Opaleye.Select a -> Opaleye.Select b -> Opaleye.Select c)
-> Opaleye.Select (m, a) -> Opaleye.Select (m, b) -> Opaleye.Select (m, c)
zipping f q@(Opaleye.QueryArr qa) q'@(Opaleye.QueryArr qa') =
Opaleye.QueryArr $ \(_, query, tag) ->
let
((m, _), _, _) = qa ((), query, tag)
((m', _), _, _) = qa' ((), query, tag)
m'' = m <> m'
Opaleye.QueryArr qa'' = (m'',) <$> f (snd <$> q) (snd <$> q')
in
qa'' ((), query, tag)

View File

@ -121,6 +121,7 @@ tests =
, testSelectNestedPairs getTestDatabase
, testSelectArray getTestDatabase
, testNestedMaybeTable getTestDatabase
, testEvaluate getTestDatabase
]
where
@ -132,6 +133,7 @@ tests =
flip run conn do
sql "CREATE EXTENSION citext"
sql "CREATE TABLE test_table ( column1 text not null, column2 bool not null )"
sql "CREATE SEQUENCE test_seq"
return db
@ -728,3 +730,52 @@ testNestedMaybeTable = databasePropertyTest "Can nest MaybeTable within other ta
pure $ Rel8.maybeTable (Rel8.lit False) (\_ -> Rel8.lit True) (nmt2 x)
selected === [True]
testEvaluate :: IO TmpPostgres.DB -> TestTree
testEvaluate = databasePropertyTest "evaluate has the evaluation order we expect" \transaction -> do
transaction \connection -> do
selected <- liftIO $ Rel8.select connection do
x <- Rel8.values (Rel8.lit <$> ['a', 'b', 'c'])
y <- Rel8.evaluate (Rel8.nextval "test_seq")
pure (x, (y, y))
normalize selected ===
[ ('a', (0, 0))
, ('b', (1, 1))
, ('c', (2, 2))
]
selected' <- liftIO $ Rel8.select connection do
x <- Rel8.values (Rel8.lit <$> ['a', 'b', 'c'])
y <- Rel8.values (Rel8.lit <$> ['d', 'e', 'f'])
z <- Rel8.evaluate (Rel8.nextval "test_seq")
pure ((x, y), (z, z))
normalize selected' ===
[ (('a', 'd'), (0, 0))
, (('b', 'd'), (1, 1))
, (('c', 'd'), (2, 2))
, (('a', 'e'), (3, 3))
, (('b', 'e'), (4, 4))
, (('c', 'e'), (5, 5))
, (('a', 'f'), (6, 6))
, (('b', 'f'), (7, 7))
, (('c', 'f'), (8, 8))
]
where
normalize :: [(x, (Int64, Int64))] -> [(x, (Int64, Int64))]
normalize [] = []
normalize xs@((_, (i, _)) : _) = map (fmap (\(a, b) -> (a - i, b - i))) xs
{-
selected <- liftIO $ Rel8.select connection do
x <- Rel8.values (Rel8.lit <$> ['a', 'b', 'c'])
x <- Rel8.values (Rel8.lit <$> ['d', 'e', 'f'])
y <- Rel8.evaluate (Rel8.nextval "test_seq")
pure (x, y, y)
selected === [('a', 1, 1), ('b', 2, 2), ('c', 3, 3)]
-}