refactor syncInternal to use transaction tech

This commit is contained in:
Mitchell Rosen 2022-04-07 22:19:10 -04:00
parent af9be6743c
commit 186b76530a
6 changed files with 109 additions and 126 deletions

View File

@ -10,8 +10,6 @@
module U.Codebase.Sqlite.Sync22 where module U.Codebase.Sqlite.Sync22 where
import Control.Monad.Except (MonadError (throwError)) import Control.Monad.Except (MonadError (throwError))
import Control.Monad.RWS (MonadReader)
import qualified Control.Monad.Reader as Reader
import Control.Monad.Validate (ValidateT, runValidateT) import Control.Monad.Validate (ValidateT, runValidateT)
import qualified Control.Monad.Validate as Validate import qualified Control.Monad.Validate as Validate
import Data.Bifunctor (bimap) import Data.Bifunctor (bimap)
@ -39,7 +37,7 @@ import qualified U.Codebase.WatchKind as WK
import U.Util.Cache (Cache) import U.Util.Cache (Cache)
import qualified U.Util.Cache as Cache import qualified U.Util.Cache as Cache
import Unison.Prelude import Unison.Prelude
import Unison.Sqlite (Connection, Transaction, unsafeUnTransaction) import Unison.Sqlite (Transaction)
data Entity data Entity
= O ObjectId = O ObjectId
@ -47,8 +45,6 @@ data Entity
| W WK.WatchKind Sqlite.Reference.IdH | W WK.WatchKind Sqlite.Reference.IdH
deriving (Eq, Ord, Show) deriving (Eq, Ord, Show)
data DbTag = SrcDb | DestDb
data DecodeError data DecodeError
= ErrTermComponent = ErrTermComponent
| ErrDeclComponent | ErrDeclComponent
@ -67,41 +63,50 @@ data Error
| SourceDbNotExist | SourceDbNotExist
deriving (Show) deriving (Show)
data Env = Env data Env m = Env
{ srcDB :: Connection, { runSrc :: forall a. Transaction a -> m a,
destDB :: Connection, runDest :: forall a. Transaction a -> m a,
-- | there are three caches of this size -- | there are three caches of this size
idCacheSize :: Word idCacheSize :: Word
} }
mapEnv :: (forall x. m x -> n x) -> Env m -> Env n
mapEnv f Env {runSrc, runDest, idCacheSize} =
Env
{ runSrc = f . runSrc,
runDest = f . runDest,
idCacheSize
}
debug :: Bool debug :: Bool
debug = False debug = False
-- data Mappings -- data Mappings
sync22 :: sync22 ::
( MonadIO m, ( MonadIO m,
MonadError Error m, MonadError Error m
MonadReader Env m
) => ) =>
Env m ->
m (Sync m Entity) m (Sync m Entity)
sync22 = do sync22 Env {runSrc, runDest, idCacheSize = size} = do
size <- Reader.reader idCacheSize
tCache <- Cache.semispaceCache size tCache <- Cache.semispaceCache size
hCache <- Cache.semispaceCache size hCache <- Cache.semispaceCache size
oCache <- Cache.semispaceCache size oCache <- Cache.semispaceCache size
cCache <- Cache.semispaceCache size cCache <- Cache.semispaceCache size
pure $ Sync (trySync tCache hCache oCache cCache) pure $ Sync (trySync runSrc runDest tCache hCache oCache cCache)
trySync :: trySync ::
forall m. forall m.
(MonadIO m, MonadError Error m, MonadReader Env m) => (MonadIO m, MonadError Error m) =>
(forall a. Transaction a -> m a) ->
(forall a. Transaction a -> m a) ->
Cache TextId TextId -> Cache TextId TextId ->
Cache HashId HashId -> Cache HashId HashId ->
Cache ObjectId ObjectId -> Cache ObjectId ObjectId ->
Cache CausalHashId CausalHashId -> Cache CausalHashId CausalHashId ->
Entity -> Entity ->
m (TrySyncResult Entity) m (TrySyncResult Entity)
trySync tCache hCache oCache cCache = \case trySync runSrc runDest tCache hCache oCache cCache = \case
-- for causals, we need to get the value_hash_id of the thingo -- for causals, we need to get the value_hash_id of the thingo
-- - maybe enqueue their parents -- - maybe enqueue their parents
-- - enqueue the self_ and value_ hashes -- - enqueue the self_ and value_ hashes
@ -111,14 +116,14 @@ trySync tCache hCache oCache cCache = \case
Just {} -> pure Sync.PreviouslyDone Just {} -> pure Sync.PreviouslyDone
Nothing -> do Nothing -> do
result <- runValidateT @(Set Entity) @m @() do result <- runValidateT @(Set Entity) @m @() do
bhId <- runSrc $ Q.expectCausalValueHashId chId bhId <- lift . runSrc $ Q.expectCausalValueHashId chId
mayBoId <- runSrc . Q.loadObjectIdForAnyHashId $ unBranchHashId bhId mayBoId <- lift . runSrc . Q.loadObjectIdForAnyHashId $ unBranchHashId bhId
traverse_ syncLocalObjectId mayBoId traverse_ syncLocalObjectId mayBoId
parents' :: [CausalHashId] <- findParents' chId parents' :: [CausalHashId] <- findParents' chId
bhId' <- lift $ syncBranchHashId bhId bhId' <- lift $ syncBranchHashId bhId
chId' <- lift $ syncCausalHashId chId chId' <- lift $ syncCausalHashId chId
runDest do (lift . runDest) do
Q.saveCausal chId' bhId' Q.saveCausal chId' bhId'
Q.saveCausalParents chId' parents' Q.saveCausalParents chId' parents'
@ -157,7 +162,7 @@ trySync tCache hCache oCache cCache = \case
let bytes' = let bytes' =
runPutS $ runPutS $
putWord8 fmt >> S.recomposeComponent (zip localIds' bytes) putWord8 fmt >> S.recomposeComponent (zip localIds' bytes)
oId' <- runDest $ Q.saveObject hId' objType bytes' oId' <- lift . runDest $ Q.saveObject hId' objType bytes'
lift do lift do
-- copy reference-specific stuff -- copy reference-specific stuff
for_ [0 .. length localIds - 1] \(fromIntegral -> idx) -> do for_ [0 .. length localIds - 1] \(fromIntegral -> idx) -> do
@ -188,7 +193,7 @@ trySync tCache hCache oCache cCache = \case
runPutS $ runPutS $
putWord8 fmt putWord8 fmt
>> S.recomposeComponent (zip localIds' declBytes) >> S.recomposeComponent (zip localIds' declBytes)
oId' <- runDest $ Q.saveObject hId' objType bytes' oId' <- lift . runDest $ Q.saveObject hId' objType bytes'
lift do lift do
-- copy per-element-of-the-component stuff -- copy per-element-of-the-component stuff
for_ [0 .. length localIds - 1] \(fromIntegral -> idx) -> do for_ [0 .. length localIds - 1] \(fromIntegral -> idx) -> do
@ -202,26 +207,26 @@ trySync tCache hCache oCache cCache = \case
Right (BL.SyncFull ids body) -> do Right (BL.SyncFull ids body) -> do
ids' <- syncBranchLocalIds ids ids' <- syncBranchLocalIds ids
let bytes' = runPutS $ S.recomposeBranchFormat (BL.SyncFull ids' body) let bytes' = runPutS $ S.recomposeBranchFormat (BL.SyncFull ids' body)
oId' <- runDest $ Q.saveObject hId' objType bytes' oId' <- lift . runDest $ Q.saveObject hId' objType bytes'
pure oId' pure oId'
Right (BL.SyncDiff boId ids body) -> do Right (BL.SyncDiff boId ids body) -> do
boId' <- syncBranchObjectId boId boId' <- syncBranchObjectId boId
ids' <- syncBranchLocalIds ids ids' <- syncBranchLocalIds ids
let bytes' = runPutS $ S.recomposeBranchFormat (BL.SyncDiff boId' ids' body) let bytes' = runPutS $ S.recomposeBranchFormat (BL.SyncDiff boId' ids' body)
oId' <- runDest $ Q.saveObject hId' objType bytes' oId' <- lift . runDest $ Q.saveObject hId' objType bytes'
pure oId' pure oId'
Left s -> throwError $ DecodeError ErrBranchFormat bytes s Left s -> throwError $ DecodeError ErrBranchFormat bytes s
OT.Patch -> case flip runGetS bytes S.decomposePatchFormat of OT.Patch -> case flip runGetS bytes S.decomposePatchFormat of
Right (PL.SyncFull ids body) -> do Right (PL.SyncFull ids body) -> do
ids' <- syncPatchLocalIds ids ids' <- syncPatchLocalIds ids
let bytes' = runPutS $ S.recomposePatchFormat (PL.SyncFull ids' body) let bytes' = runPutS $ S.recomposePatchFormat (PL.SyncFull ids' body)
oId' <- runDest $ Q.saveObject hId' objType bytes' oId' <- lift . runDest $ Q.saveObject hId' objType bytes'
pure oId' pure oId'
Right (PL.SyncDiff poId ids body) -> do Right (PL.SyncDiff poId ids body) -> do
poId' <- syncPatchObjectId poId poId' <- syncPatchObjectId poId
ids' <- syncPatchLocalIds ids ids' <- syncPatchLocalIds ids
let bytes' = runPutS $ S.recomposePatchFormat (PL.SyncDiff poId' ids' body) let bytes' = runPutS $ S.recomposePatchFormat (PL.SyncDiff poId' ids' body)
oId' <- runDest $ Q.saveObject hId' objType bytes' oId' <- lift . runDest $ Q.saveObject hId' objType bytes'
pure oId' pure oId'
Left s -> throwError $ DecodeError ErrPatchFormat bytes s Left s -> throwError $ DecodeError ErrPatchFormat bytes s
case result of case result of
@ -273,7 +278,7 @@ trySync tCache hCache oCache cCache = \case
-- workaround for requiring components to compute component lengths for references. -- workaround for requiring components to compute component lengths for references.
-- this line requires objects in the destination for any hashes referenced in the source, -- this line requires objects in the destination for any hashes referenced in the source,
-- (making those objects dependencies of this patch). See Sync21.filter{Term,Type}Edit -- (making those objects dependencies of this patch). See Sync21.filter{Term,Type}Edit
traverse_ syncLocalObjectId =<< traverse (runSrc . Q.expectObjectIdForAnyHashId) hIds traverse_ syncLocalObjectId =<< traverse (lift . runSrc . Q.expectObjectIdForAnyHashId) hIds
pure $ PL.LocalIds tIds' hIds' oIds' pure $ PL.LocalIds tIds' hIds' oIds'
@ -355,7 +360,7 @@ trySync tCache hCache oCache cCache = \case
findParents' :: CausalHashId -> ValidateT (Set Entity) m [CausalHashId] findParents' :: CausalHashId -> ValidateT (Set Entity) m [CausalHashId]
findParents' chId = do findParents' chId = do
srcParents <- runSrc $ Q.loadCausalParents chId srcParents <- lift . runSrc $ Q.loadCausalParents chId
traverse syncCausal srcParents traverse syncCausal srcParents
-- Sync any watches of the given kinds to the dest if and only if watches of those kinds -- Sync any watches of the given kinds to the dest if and only if watches of those kinds
@ -409,11 +414,3 @@ trySync tCache hCache oCache cCache = \case
(runDest $ Q.isCausalHash hId') (runDest $ Q.isCausalHash hId')
(pure . Just $ CausalHashId hId') (pure . Just $ CausalHashId hId')
(pure Nothing) (pure Nothing)
runSrc,
runDest ::
(MonadIO m, MonadReader Env m) =>
Transaction a ->
m a
runSrc ma = Reader.reader srcDB >>= liftIO . unsafeUnTransaction ma
runDest ma = Reader.reader destDB >>= liftIO . unsafeUnTransaction ma

View File

@ -19,6 +19,10 @@ module Unison.Sqlite
Transaction, Transaction,
runTransaction, runTransaction,
runTransactionWithAbort, runTransactionWithAbort,
runReadOnlyTransaction,
runReadOnlyTransactionIO,
runWriteTransaction,
runWriteTransactionIO,
unsafeUnTransaction, unsafeUnTransaction,
savepoint, savepoint,
idempotentIO, idempotentIO,

View File

@ -163,6 +163,7 @@ default-extensions:
- ApplicativeDo - ApplicativeDo
- BangPatterns - BangPatterns
- BlockArguments - BlockArguments
- DeriveAnyClass
- DeriveFunctor - DeriveFunctor
- DeriveGeneric - DeriveGeneric
- DeriveTraversable - DeriveTraversable

View File

@ -529,95 +529,75 @@ syncInternal ::
syncInternal progress srcConn destConn b = time "syncInternal" do syncInternal progress srcConn destConn b = time "syncInternal" do
UnliftIO runInIO <- askUnliftIO UnliftIO runInIO <- askUnliftIO
-- We start a savepoint on the src connection because it seemed to speed things up. Sqlite.runReadOnlyTransactionIO srcConn \runSrc -> do
-- Mitchell says: that doesn't sound right... why would that be the case? Sqlite.runWriteTransactionIO destConn \runDest -> do
-- TODO: look into this; this connection should be used only for reads. throwExceptT do
liftIO (Sqlite.Connection.savepoint srcConn "sync") let syncEnv = Sync22.Env runSrc runDest (16 * 1024 * 1024)
liftIO (Sqlite.Connection.savepoint destConn "sync") -- we want to use sync22 wherever possible
-- FIXME don't savepoint above, instead BEGIN -- so for each source branch, we'll check if it exists in the destination codebase
result <- runExceptT do -- or if it exists in the source codebase, then we can sync22 it
let syncEnv = Sync22.Env srcConn destConn (16 * 1024 * 1024) -- if it doesn't exist in the dest or source branch,
-- we want to use sync22 wherever possible -- then just use putBranch to the dest
-- so for each source branch, we'll check if it exists in the destination codebase let se :: forall m a. Functor m => (ExceptT Sync22.Error m a -> ExceptT SyncEphemeral.Error m a)
-- or if it exists in the source codebase, then we can sync22 it se = Except.withExceptT SyncEphemeral.Sync22Error
-- if it doesn't exist in the dest or source branch, let r :: forall a. (ReaderT (Sync22.Env m) m a -> m a)
-- then just use putBranch to the dest r = flip runReaderT syncEnv
let se :: forall m a. Functor m => (ExceptT Sync22.Error m a -> ExceptT SyncEphemeral.Error m a) processBranches ::
se = Except.withExceptT SyncEphemeral.Sync22Error Sync.Sync (ExceptT Sync22.Error m) Sync22.Entity ->
let r :: forall m a. (ReaderT Sync22.Env m a -> m a) Sync.Progress (ExceptT Sync22.Error m) Sync22.Entity ->
r = flip runReaderT syncEnv [Entity m] ->
processBranches :: ExceptT Sync22.Error m ()
Sync.Sync (ReaderT Sync22.Env (ExceptT Sync22.Error m)) Sync22.Entity -> processBranches _ _ [] = pure ()
Sync.Progress (ReaderT Sync22.Env (ExceptT Sync22.Error m)) Sync22.Entity -> processBranches sync progress (b0@(B h mb) : rest) = do
[Entity m] -> when debugProcessBranches do
ExceptT Sync22.Error m () traceM $ "processBranches " ++ show b0
processBranches _ _ [] = pure () traceM $ " queue: " ++ show rest
processBranches sync progress (b0@(B h mb) : rest) = do ifM @(ExceptT Sync22.Error m)
when debugProcessBranches do (lift (runDest (Ops2.isCausalHash h)))
traceM $ "processBranches " ++ show b0 do
traceM $ " queue: " ++ show rest when debugProcessBranches $ traceM $ " " ++ show b0 ++ " already exists in dest db"
ifM @(ExceptT Sync22.Error m)
(liftIO (Sqlite.unsafeUnTransaction (Ops2.isCausalHash h) destConn))
do
when debugProcessBranches $ traceM $ " " ++ show b0 ++ " already exists in dest db"
processBranches sync progress rest
do
when debugProcessBranches $ traceM $ " " ++ show b0 ++ " doesn't exist in dest db"
let h2 = CausalHash . Cv.hash1to2 $ Causal.unRawHash h
liftIO (Sqlite.unsafeUnTransaction (Q.loadCausalHashIdByCausalHash h2) srcConn) >>= \case
Just chId -> do
when debugProcessBranches $ traceM $ " " ++ show b0 ++ " exists in source db, so delegating to direct sync"
r $ Sync.sync' sync progress [Sync22.C chId]
processBranches sync progress rest processBranches sync progress rest
Nothing -> do
lift mb >>= \b -> do when debugProcessBranches $ traceM $ " " ++ show b0 ++ " doesn't exist in dest db"
when debugProcessBranches $ traceM $ " " ++ show b0 ++ " doesn't exist in either db, so delegating to Codebase.putBranch" let h2 = CausalHash . Cv.hash1to2 $ Causal.unRawHash h
let (branchDeps, BD.to' -> BD.Dependencies' es ts ds) = BD.fromBranch b lift (runSrc (Q.loadCausalHashIdByCausalHash h2)) >>= \case
when debugProcessBranches do Just chId -> do
traceM $ " branchDeps: " ++ show (fst <$> branchDeps) when debugProcessBranches $ traceM $ " " ++ show b0 ++ " exists in source db, so delegating to direct sync"
traceM $ " terms: " ++ show ts Sync.sync' sync progress [Sync22.C chId]
traceM $ " decls: " ++ show ds processBranches sync progress rest
traceM $ " edits: " ++ show es Nothing ->
(cs, es, ts, ds) <- liftIO $ flip Sqlite.unsafeUnTransaction destConn do lift mb >>= \b -> do
cs <- filterM (fmap not . Ops2.isCausalHash . fst) branchDeps when debugProcessBranches $ traceM $ " " ++ show b0 ++ " doesn't exist in either db, so delegating to Codebase.putBranch"
es <- filterM (fmap not . Ops2.patchExists) es let (branchDeps, BD.to' -> BD.Dependencies' es ts ds) = BD.fromBranch b
ts <- filterM (fmap not . Ops2.termExists) ts when debugProcessBranches do
ds <- filterM (fmap not . Ops2.declExists) ds traceM $ " branchDeps: " ++ show (fst <$> branchDeps)
pure (cs, es, ts, ds) traceM $ " terms: " ++ show ts
if null cs && null es && null ts && null ds traceM $ " decls: " ++ show ds
then do traceM $ " edits: " ++ show es
liftIO (Sqlite.unsafeUnTransaction (Ops2.putBranch (Branch.transform (Sqlite.idempotentIO . runInIO) b)) destConn) (cs, es, ts, ds) <-
processBranches sync progress rest (lift . runDest) do
else do cs <- filterM (fmap not . Ops2.isCausalHash . fst) branchDeps
let bs = map (uncurry B) cs es <- filterM (fmap not . Ops2.patchExists) es
os = map O (es <> ts <> ds) ts <- filterM (fmap not . Ops2.termExists) ts
processBranches sync progress (os ++ bs ++ b0 : rest) ds <- filterM (fmap not . Ops2.declExists) ds
processBranches sync progress (O h : rest) = do pure (cs, es, ts, ds)
when debugProcessBranches $ traceM $ "processBranches O " ++ take 10 (show h) if null cs && null es && null ts && null ds
oId <- then do
liftIO do lift (runDest (Ops2.putBranch (Branch.transform (Sqlite.idempotentIO . runInIO) b)))
Sqlite.unsafeUnTransaction (Q.expectHashIdByHash (Cv.hash1to2 h) >>= Q.expectObjectIdForAnyHashId) srcConn processBranches sync progress rest
r $ Sync.sync' sync progress [Sync22.O oId] else do
processBranches sync progress rest let bs = map (uncurry B) cs
sync <- se . r $ Sync22.sync22 os = map O (es <> ts <> ds)
let progress' = Sync.transformProgress (lift . lift) progress processBranches sync progress (os ++ bs ++ b0 : rest)
bHash = Branch.headHash b processBranches sync progress (O h : rest) = do
se $ time "SyncInternal.processBranches" $ processBranches sync progress' [B bHash (pure b)] when debugProcessBranches $ traceM $ "processBranches O " ++ take 10 (show h)
-- FIXME COMMIT/ROLLBACK here, no savepoint so no release oId <- lift (runSrc (Q.expectHashIdByHash (Cv.hash1to2 h) >>= Q.expectObjectIdForAnyHashId))
let onSuccess a = do Sync.sync' sync progress [Sync22.O oId]
liftIO (Sqlite.Connection.release destConn "sync") processBranches sync progress rest
pure a sync <- se (Sync22.sync22 (Sync22.mapEnv lift syncEnv))
onFailure e = liftIO do let progress' = Sync.transformProgress lift progress
if debugCommitFailedTransaction bHash = Branch.headHash b
then Sqlite.Connection.release destConn "sync" se $ time "SyncInternal.processBranches" $ processBranches sync progress' [B bHash (pure b)]
else do
Sqlite.Connection.rollbackTo destConn "sync"
Sqlite.Connection.release destConn "sync"
error (show e)
-- (we don't write to the src anyway)
liftIO (Sqlite.Connection.rollbackTo srcConn "sync")
liftIO (Sqlite.Connection.release srcConn "sync")
either onFailure onSuccess result
data Entity m data Entity m
= B Branch.Hash (m (Branch m)) = B Branch.Hash (m (Branch m))

View File

@ -1,12 +1,10 @@
{-# LANGUAGE ScopedTypeVariables #-}
module Unison.Codebase.SqliteCodebase.SyncEphemeral where module Unison.Codebase.SqliteCodebase.SyncEphemeral where
import Data.Set (Set)
import U.Codebase.HashTags (CausalHash) import U.Codebase.HashTags (CausalHash)
import U.Codebase.Sqlite.DbId (SchemaVersion) import U.Codebase.Sqlite.DbId (SchemaVersion)
import qualified U.Codebase.Sqlite.Sync22 as Sync22 import qualified U.Codebase.Sqlite.Sync22 as Sync22
import Unison.Hash (Hash) import Unison.Hash (Hash)
import Unison.Prelude
data Dependencies = Dependencies data Dependencies = Dependencies
{ definitions :: Set Hash, { definitions :: Set Hash,
@ -18,4 +16,5 @@ data Error
| SrcWrongSchema SchemaVersion | SrcWrongSchema SchemaVersion
| DestWrongSchema SchemaVersion | DestWrongSchema SchemaVersion
| DisappearingBranch CausalHash | DisappearingBranch CausalHash
deriving (Show) deriving stock (Show)
deriving anyclass (Exception)

View File

@ -187,6 +187,7 @@ library
ApplicativeDo ApplicativeDo
BangPatterns BangPatterns
BlockArguments BlockArguments
DeriveAnyClass
DeriveFunctor DeriveFunctor
DeriveGeneric DeriveGeneric
DeriveTraversable DeriveTraversable
@ -353,6 +354,7 @@ executable tests
ApplicativeDo ApplicativeDo
BangPatterns BangPatterns
BlockArguments BlockArguments
DeriveAnyClass
DeriveFunctor DeriveFunctor
DeriveGeneric DeriveGeneric
DeriveTraversable DeriveTraversable