first group commit

This commit is contained in:
Arya Irani 2021-10-19 11:01:26 -04:00
parent 3c8d2ac83f
commit 6d5ac667a5
4 changed files with 296 additions and 6 deletions

View File

@ -6,12 +6,18 @@ import U.Codebase.Sqlite.Branch.Full (LocalBranch)
import U.Codebase.Sqlite.DbId (CausalHashId, BranchObjectId, ObjectId, PatchObjectId, TextId)
import Data.ByteString (ByteString)
-- |you can use the exact same `BranchLocalIds` when converting between `Full` and `Diff`
-- | A 'BranchFormat' is a deserialized namespace object (@object.bytes@).
--
-- you can use the exact same `BranchLocalIds` when converting between `Full` and `Diff`
data BranchFormat
= Full BranchLocalIds LocalBranch
| Diff BranchObjectId BranchLocalIds LocalDiff
deriving Show
-- | A 'BranchLocalIds' is a mapping between local ids (local to this object) encoded as offsets, and actual database ids.
--
-- For example, a @branchTextLookup@ vector of @[50, 74]@ means "local id 0 corresponds to database text id 50, and
-- local id 1 corresponds to database text id 74".
data BranchLocalIds = LocalIds
{ branchTextLookup :: Vector TextId,
branchDefnLookup :: Vector ObjectId,
@ -23,3 +29,85 @@ data BranchLocalIds = LocalIds
data SyncBranchFormat
= SyncFull BranchLocalIds ByteString
| SyncDiff BranchObjectId BranchLocalIds ByteString
{-
projects.arya.message = "hello, world" -> <text constant> -> #abc
projects.arya.program = printLine message -> printLine #abc -> #def
projects.arya {
terms = { "message" -> #abc
, "program" -> #def
}
}
text table =
{ 1 -> "hello, world"
, 2 -> "message"
, 3 -> "program"
}
hash table =
{ 10 -> "abc"
, 11 -> "def"
}
object table =
{ ...
}
projects.arya {
terms = { TextId 2 -> Reference { builtin = null, object = ObjectId 20, position = 0 }
, TextId 3 -> Reference { builtin = null, object = ObjectId 21, position = 0 }
}
}
stored in original codebase:
projects.arya = BranchFormat.Full {
localIds = {
text = [2, 3]
hash = [10, 11]
object = [20, 21]
}
localBranch = {
terms = { LocalTextId 0 -> Reference { builtin = null, object = LocalObjectId 0, position = 0 }
, LocalTextId 1 -> Reference { builtin = null, object = LocalObjectId 1, position = 0 }
}
...
}
}
write to dest codebase:
text table =
{ ...
, 901 -> "hello, world"
, 902 -> "message"
, 903 -> "program"
}
hash table =
{ ...
, 500 -> "abc"
, 501 -> "def"
}
projects.arya {
-- updated copy of original localIds, with new mapping
localIds = {
text = [902, 903]
hash = [500, 501]
object = [300, 301]
}
-- copy unmodified from original
localBranch = {
terms = { LocalTextId 0 -> Reference { builtin = null, object = LocalObjectId 0, position = 0 }
, LocalTextId 1 -> Reference { builtin = null, object = LocalObjectId 1, position = 0 }
}
...
}
}
-}

View File

@ -276,8 +276,8 @@ sqliteCodebase debugName root = do
termCache <- Cache.semispaceCache 8192 -- pure Cache.nullCache -- to disable
typeOfTermCache <- Cache.semispaceCache 8192
declCache <- Cache.semispaceCache 1024
runReaderT Q.schemaVersion conn >>= \case
SchemaVersion 1 -> do
let
startCodebase = do
rootBranchCache <- newTVarIO Nothing
-- The v1 codebase interface has operations to read and write individual definitions
-- whereas the v2 codebase writes them as complete components. These two fields buffer
@ -799,6 +799,9 @@ sqliteCodebase debugName root = do
(Just \l r -> runDB conn $ fromJust <$> before l r)
in code
)
runReaderT Q.schemaVersion conn >>= \case
SchemaVersion 2 -> startCodebase
SchemaVersion 1 -> _migrate12 conn >> startCodebase
v -> shutdownConnection conn $> Left v
-- well one or the other. :zany_face: the thinking being that they wouldn't hash-collide
@ -833,14 +836,16 @@ syncInternal ::
Branch m ->
m ()
syncInternal progress srcConn destConn b = time "syncInternal" do
-- We start a savepoint on the src connection because it seemed to speed things up.
-- Mitchell says: that doesn't sound right... why would that be the case?
-- TODO: look into this; this connection should be used only for reads.
runDB srcConn $ Q.savepoint "sync"
runDB destConn $ Q.savepoint "sync"
result <- runExceptT do
let syncEnv = Sync22.Env srcConn destConn (16 * 1024 * 1024)
-- we want to use sync22 wherever possible
-- so for each branch, we'll check if it exists in the destination branch
-- or if it exists in the source branch, then we can sync22 it
-- oh god but we have to figure out the dbid
-- so for each source branch, we'll check if it exists in the destination codebase
-- or if it exists in the source codebase, then we can sync22 it
-- if it doesn't exist in the dest or source branch,
-- then just use putBranch to the dest
let se :: forall m a. Functor m => (ExceptT Sync22.Error m a -> ExceptT SyncEphemeral.Error m a)

View File

@ -0,0 +1,196 @@
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE PatternSynonyms #-}
module Unison.Codebase.SqliteCodebase.MigrateSchema12 where
import Control.Monad.Reader (MonadReader)
import Control.Monad.State (MonadState)
import U.Codebase.Sqlite.Connection (Connection)
import U.Codebase.Sqlite.DbId (CausalHashId, HashId, ObjectId)
import U.Codebase.Sqlite.ObjectType (ObjectType)
import qualified U.Codebase.Sqlite.ObjectType as OT
import qualified U.Codebase.Sqlite.Queries as Q
import qualified U.Codebase.Sqlite.Reference as S.Reference
import U.Codebase.Sync (Sync (Sync))
import qualified U.Codebase.Sync as Sync
import qualified U.Codebase.WatchKind as WK
import Unison.Prelude (ByteString, Map, MonadIO)
import Unison.Reference (Pos)
import Unison.Referent (ConstructorId)
-- lookupCtor :: ConstructorMapping -> ObjectId -> Pos -> ConstructorId -> Maybe (Pos, ConstructorId)
-- lookupCtor (ConstructorMapping cm) oid pos cid =
-- Map.lookup oid cm >>= (Vector.!? fromIntegral pos) >>= (Vector.!? cid)
-- lookupTermRef :: TermLookup -> S.Reference -> Maybe S.Reference
-- lookupTermRef _tl (ReferenceBuiltin t) = Just (ReferenceBuiltin t)
-- lookupTermRef tl (ReferenceDerived id) = ReferenceDerived <$> lookupTermRefId tl id
-- lookupTermRefId :: TermLookup -> S.Reference.Id -> Maybe S.Reference.Id
-- lookupTermRefId tl (Id oid pos) = Id oid <$> lookupTermPos tl oid pos
-- lookupTermPos :: TermLookup -> ObjectId -> Pos -> Maybe Pos
-- lookupTermPos (TermLookup tl) oid pos = Map.lookup oid tl >>= (Vector.!? fromIntegral pos)
-- newtype ConstructorMapping = ConstructorMapping (Map ObjectId (Vector (Vector (Pos, ConstructorId))))
-- newtype TermLookup = TermLookup (Map ObjectId (Vector Pos))
data MigrationState = MigrationState
{ declLookup :: Map ObjectId (Map Pos Pos),
ctorLookup :: Map (ObjectId, Pos) (Map ConstructorId ConstructorId),
termLookup :: Map ObjectId (Map Pos Pos)
}
{-
* Load entire codebase as a list
* Pick a term from the codebase
* Look up the references inside the term
* If any haven't been processed, add them to the "to process" stack, push the term you were working on back onto that stack
* Rebuild & rehash the term, store that
* For any data constructor terms inside,
* Store a map from old ConstructorId to new, based on the old and new reference hashes
* After rebuilding a cycle, map old Pos to new
-}
-- Q: can we plan to hold the whole mapping in memory? ✅
-- Q: a) update database in-place? or b) write to separate database and then overwrite? leaning (b).
-- note: we do need to rebuild namespaces, although we don't need to rehash them.
-- cycle position index `Pos`
-- constructor index `ConstructorId`
{-
data Maybe a = (Just Bar | Nothing X)
-- changes due to missing size from ref(Y)
data X = MkX Y
-- know old hash and old cycle positions
data Y = MkY Int
-}
data Entity
= O ObjectId
| C CausalHashId
| W WK.WatchKind S.Reference.IdH
deriving (Eq, Ord, Show)
data Env = Env {db :: Connection}
-- -> m (TrySyncResult h)
migrationSync ::
(MonadIO m, MonadState MigrationState m, MonadReader Env m) =>
Sync m Entity
migrationSync = Sync \case
-- To sync an object,
-- * If we have already synced it, we are done.
-- * Otherwise, read the object from the database and switch on its object type.
-- * See next steps below v
--
-- To sync a term component object,
-- * If we have not already synced all dependencies, push syncing them onto the front of the work queue.
-- * Otherwise, ???
--
-- To sync a decl component object,
-- * If we have not already synced all dependencies, push syncing them onto the front of the work queue.
-- * Otherwise, ???
--
-- To sync a namespace object,
-- * Deserialize it and compute its dependencies (terms, types, patches, children).
-- * If we have not already synced all of its dependencies, push syncing them onto the front of the work queue.
-- * To sync a 'BranchFull',
-- * We need to make a new 'BranchFull' in memory, then insert it into the database under a new object id.
-- * Wait, we need to preserve the ordering of the types/terms, either by not changing them (but the orderings of the
-- reference ids used in keys is definitely not preserved by this migration), or by permuting the local id vectors,
-- but we may be at a level too low or high for us to care?
-- * Its 'LocalBranch' must have all references changed in-place per the (old (object id, pos) => new (object id, pos)) mapping.
-- position of any term/decl within its component has changed. Therefore, we need to adjust each referent's component position
-- * We need to recompute the References contained in the Branch body since the pos within the reference may have changed.
-- We can look up the position changes in our Migration State, they must have been added when computing the objects pointed to by the reference.
-- * The normalized object IDs within the body _likely_ don't need to change.
-- * Its 'BranchLocalIds' must be translated from the old codebase object IDs to the new object IDs,
-- we can use our MigrationState to look these up, since they must have already been migrated.
-- * To sync a 'BranchDiff',
-- * ???
--
-- To sync a patch object, ???
--
-- To sync a Causal
--- * If we haven't yet synced its parents, push them onto the work queue
--- * If we haven't yet synced the causal's value (namespace), push it onto the work queue.
--- * Rehash the Causal's valueHash AND CausalHash, and add the new causal, its hashes, and hash objects to the codebase under a fresh object ID
O objId -> do
let alreadySynced :: m Bool
alreadySynced = undefined
alreadySynced >>= \case
False -> do
(hId, objType, bytes) <- runSrc $ Q.loadObjectWithHashIdAndTypeById oId
migrateObject objType hId bytes
True -> pure Sync.PreviouslyDone
-- result <- runValidateT @(Set Entity) @m @ObjectId case objType of
-- To sync a causal,
-- 1. ???
-- 2. Synced
C causalHashID -> _
-- To sync a watch result,
-- 1. ???
-- 2. Synced
W watchKind idH -> _
-- data ObjectType
-- = TermComponent -- 0
-- | DeclComponent -- 1
-- | Namespace -- 2
-- | Patch -- 3
migrateObject :: ObjectType -> HashId -> ByteString -> m _
migrateObject objType hash bytes = case objType of
OT.TermComponent -> migrateTermComponent hash bytes
OT.DeclComponent -> migrateTermComponent hash bytes
OT.Namespace -> migrateNamespace hash bytes
OT.Patch -> migratePatch hash bytes
migratePatch :: HashId -> ByteString -> m _
migratePatch = error "not implemented"
migrateNamespace :: HashId -> ByteString -> m _
migrateNamespace = error "not implemented"
migrateTermComponent :: HashId -> ByteString -> m _
migrateTermComponent = error "not implemented"
-- | migrate sqlite codebase from version 1 to 2, return False and rollback on failure
migrateSchema12 :: Applicative m => Connection -> m Bool
migrateSchema12 db = do
-- todo: drop and recreate corrected type/mentions index schema
-- do we want to garbage collect at this time? ✅
-- or just convert everything without going in dependency order? ✅
error "todo: go through "
-- todo: double-hash all the types and produce an constructor mapping
-- object ids will stay the same
-- todo: rehash all the terms using the new constructor mapping
-- and adding the type to the term
-- do we want to diff namespaces at this time? ❌
-- do we want to look at supporting multiple simultaneous representations of objects at this time?
pure "todo: migrate12"
pure True
-- -- remember that the component order might be different
-- rehashDeclComponent :: [Decl v a] -> (Hash, ConstructorMappings)
-- rehashDeclComponent decls = fmap decls <&> \case
--
-- --
-- error "todo: rehashDeclComponent"
-- rewriteDeclComponent :: DeclFormat.LocallyIndexedComponent -> (Hash, DeclFormat.LocallyIndexedComponent, ConstructorMappings)
-- rewriteDeclComponent =
-- --
-- error "todo: rehashDeclComponent"
-- rehashDeclComponent :: [Decl v a] -> (Hash, DeclFormat.LocallyIndexedComponent, ConstructorMappings)
-- rehashTermComponent :: ConstructorMappings -> TermFormat.LocallyIndexedComponent -> (Hash, TermFormat.LocallyIndexedComponent)
-- rehashTermComponent = error "todo: rehashTermComponent"
-- -- getConstructor :: ConstructorMappings -> ObjectId -> Pos -> ConstructorId
-- -- getConstructor cm

View File

@ -73,6 +73,7 @@ library
Unison.Codebase.SqliteCodebase.Branch.Dependencies
Unison.Codebase.SqliteCodebase.Conversions
Unison.Codebase.SqliteCodebase.GitError
Unison.Codebase.SqliteCodebase.MigrateSchema12
Unison.Codebase.SqliteCodebase.SyncEphemeral
Unison.Codebase.SyncMode
Unison.Codebase.TermEdit