mirror of
https://github.com/unisonweb/unison.git
synced 2024-09-20 14:57:41 +03:00
first group commit
This commit is contained in:
parent
3c8d2ac83f
commit
6d5ac667a5
@ -6,12 +6,18 @@ import U.Codebase.Sqlite.Branch.Full (LocalBranch)
|
||||
import U.Codebase.Sqlite.DbId (CausalHashId, BranchObjectId, ObjectId, PatchObjectId, TextId)
|
||||
import Data.ByteString (ByteString)
|
||||
|
||||
-- |you can use the exact same `BranchLocalIds` when converting between `Full` and `Diff`
|
||||
-- | A 'BranchFormat' is a deserialized namespace object (@object.bytes@).
|
||||
--
|
||||
-- you can use the exact same `BranchLocalIds` when converting between `Full` and `Diff`
|
||||
data BranchFormat
|
||||
= Full BranchLocalIds LocalBranch
|
||||
| Diff BranchObjectId BranchLocalIds LocalDiff
|
||||
deriving Show
|
||||
|
||||
-- | A 'BranchLocalIds' is a mapping between local ids (local to this object) encoded as offsets, and actual database ids.
|
||||
--
|
||||
-- For example, a @branchTextLookup@ vector of @[50, 74]@ means "local id 0 corresponds to database text id 50, and
|
||||
-- local id 1 corresponds to database text id 74".
|
||||
data BranchLocalIds = LocalIds
|
||||
{ branchTextLookup :: Vector TextId,
|
||||
branchDefnLookup :: Vector ObjectId,
|
||||
@ -23,3 +29,85 @@ data BranchLocalIds = LocalIds
|
||||
data SyncBranchFormat
|
||||
= SyncFull BranchLocalIds ByteString
|
||||
| SyncDiff BranchObjectId BranchLocalIds ByteString
|
||||
|
||||
{-
|
||||
projects.arya.message = "hello, world" -> <text constant> -> #abc
|
||||
projects.arya.program = printLine message -> printLine #abc -> #def
|
||||
|
||||
projects.arya {
|
||||
terms = { "message" -> #abc
|
||||
, "program" -> #def
|
||||
}
|
||||
}
|
||||
|
||||
text table =
|
||||
{ 1 -> "hello, world"
|
||||
, 2 -> "message"
|
||||
, 3 -> "program"
|
||||
}
|
||||
|
||||
hash table =
|
||||
{ 10 -> "abc"
|
||||
, 11 -> "def"
|
||||
}
|
||||
|
||||
object table =
|
||||
{ ...
|
||||
}
|
||||
|
||||
projects.arya {
|
||||
terms = { TextId 2 -> Reference { builtin = null, object = ObjectId 20, position = 0 }
|
||||
, TextId 3 -> Reference { builtin = null, object = ObjectId 21, position = 0 }
|
||||
}
|
||||
}
|
||||
|
||||
stored in original codebase:
|
||||
projects.arya = BranchFormat.Full {
|
||||
localIds = {
|
||||
text = [2, 3]
|
||||
hash = [10, 11]
|
||||
object = [20, 21]
|
||||
}
|
||||
localBranch = {
|
||||
terms = { LocalTextId 0 -> Reference { builtin = null, object = LocalObjectId 0, position = 0 }
|
||||
, LocalTextId 1 -> Reference { builtin = null, object = LocalObjectId 1, position = 0 }
|
||||
}
|
||||
...
|
||||
}
|
||||
}
|
||||
|
||||
write to dest codebase:
|
||||
text table =
|
||||
{ ...
|
||||
, 901 -> "hello, world"
|
||||
, 902 -> "message"
|
||||
, 903 -> "program"
|
||||
}
|
||||
|
||||
hash table =
|
||||
{ ...
|
||||
, 500 -> "abc"
|
||||
, 501 -> "def"
|
||||
}
|
||||
|
||||
projects.arya {
|
||||
|
||||
-- updated copy of original localIds, with new mapping
|
||||
localIds = {
|
||||
text = [902, 903]
|
||||
hash = [500, 501]
|
||||
object = [300, 301]
|
||||
}
|
||||
|
||||
-- copy unmodified from original
|
||||
localBranch = {
|
||||
terms = { LocalTextId 0 -> Reference { builtin = null, object = LocalObjectId 0, position = 0 }
|
||||
, LocalTextId 1 -> Reference { builtin = null, object = LocalObjectId 1, position = 0 }
|
||||
}
|
||||
...
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
-}
|
||||
|
@ -276,8 +276,8 @@ sqliteCodebase debugName root = do
|
||||
termCache <- Cache.semispaceCache 8192 -- pure Cache.nullCache -- to disable
|
||||
typeOfTermCache <- Cache.semispaceCache 8192
|
||||
declCache <- Cache.semispaceCache 1024
|
||||
runReaderT Q.schemaVersion conn >>= \case
|
||||
SchemaVersion 1 -> do
|
||||
let
|
||||
startCodebase = do
|
||||
rootBranchCache <- newTVarIO Nothing
|
||||
-- The v1 codebase interface has operations to read and write individual definitions
|
||||
-- whereas the v2 codebase writes them as complete components. These two fields buffer
|
||||
@ -799,6 +799,9 @@ sqliteCodebase debugName root = do
|
||||
(Just \l r -> runDB conn $ fromJust <$> before l r)
|
||||
in code
|
||||
)
|
||||
runReaderT Q.schemaVersion conn >>= \case
|
||||
SchemaVersion 2 -> startCodebase
|
||||
SchemaVersion 1 -> _migrate12 conn >> startCodebase
|
||||
v -> shutdownConnection conn $> Left v
|
||||
|
||||
-- well one or the other. :zany_face: the thinking being that they wouldn't hash-collide
|
||||
@ -833,14 +836,16 @@ syncInternal ::
|
||||
Branch m ->
|
||||
m ()
|
||||
syncInternal progress srcConn destConn b = time "syncInternal" do
|
||||
-- We start a savepoint on the src connection because it seemed to speed things up.
|
||||
-- Mitchell says: that doesn't sound right... why would that be the case?
|
||||
-- TODO: look into this; this connection should be used only for reads.
|
||||
runDB srcConn $ Q.savepoint "sync"
|
||||
runDB destConn $ Q.savepoint "sync"
|
||||
result <- runExceptT do
|
||||
let syncEnv = Sync22.Env srcConn destConn (16 * 1024 * 1024)
|
||||
-- we want to use sync22 wherever possible
|
||||
-- so for each branch, we'll check if it exists in the destination branch
|
||||
-- or if it exists in the source branch, then we can sync22 it
|
||||
-- oh god but we have to figure out the dbid
|
||||
-- so for each source branch, we'll check if it exists in the destination codebase
|
||||
-- or if it exists in the source codebase, then we can sync22 it
|
||||
-- if it doesn't exist in the dest or source branch,
|
||||
-- then just use putBranch to the dest
|
||||
let se :: forall m a. Functor m => (ExceptT Sync22.Error m a -> ExceptT SyncEphemeral.Error m a)
|
||||
|
@ -0,0 +1,196 @@
|
||||
{-# LANGUAGE PartialTypeSignatures #-}
|
||||
{-# LANGUAGE PatternSynonyms #-}
|
||||
|
||||
module Unison.Codebase.SqliteCodebase.MigrateSchema12 where
|
||||
|
||||
import Control.Monad.Reader (MonadReader)
|
||||
import Control.Monad.State (MonadState)
|
||||
import U.Codebase.Sqlite.Connection (Connection)
|
||||
import U.Codebase.Sqlite.DbId (CausalHashId, HashId, ObjectId)
|
||||
import U.Codebase.Sqlite.ObjectType (ObjectType)
|
||||
import qualified U.Codebase.Sqlite.ObjectType as OT
|
||||
import qualified U.Codebase.Sqlite.Queries as Q
|
||||
import qualified U.Codebase.Sqlite.Reference as S.Reference
|
||||
import U.Codebase.Sync (Sync (Sync))
|
||||
import qualified U.Codebase.Sync as Sync
|
||||
import qualified U.Codebase.WatchKind as WK
|
||||
import Unison.Prelude (ByteString, Map, MonadIO)
|
||||
import Unison.Reference (Pos)
|
||||
import Unison.Referent (ConstructorId)
|
||||
|
||||
-- lookupCtor :: ConstructorMapping -> ObjectId -> Pos -> ConstructorId -> Maybe (Pos, ConstructorId)
|
||||
-- lookupCtor (ConstructorMapping cm) oid pos cid =
|
||||
-- Map.lookup oid cm >>= (Vector.!? fromIntegral pos) >>= (Vector.!? cid)
|
||||
|
||||
-- lookupTermRef :: TermLookup -> S.Reference -> Maybe S.Reference
|
||||
-- lookupTermRef _tl (ReferenceBuiltin t) = Just (ReferenceBuiltin t)
|
||||
-- lookupTermRef tl (ReferenceDerived id) = ReferenceDerived <$> lookupTermRefId tl id
|
||||
|
||||
-- lookupTermRefId :: TermLookup -> S.Reference.Id -> Maybe S.Reference.Id
|
||||
-- lookupTermRefId tl (Id oid pos) = Id oid <$> lookupTermPos tl oid pos
|
||||
|
||||
-- lookupTermPos :: TermLookup -> ObjectId -> Pos -> Maybe Pos
|
||||
-- lookupTermPos (TermLookup tl) oid pos = Map.lookup oid tl >>= (Vector.!? fromIntegral pos)
|
||||
|
||||
-- newtype ConstructorMapping = ConstructorMapping (Map ObjectId (Vector (Vector (Pos, ConstructorId))))
|
||||
-- newtype TermLookup = TermLookup (Map ObjectId (Vector Pos))
|
||||
|
||||
data MigrationState = MigrationState
|
||||
{ declLookup :: Map ObjectId (Map Pos Pos),
|
||||
ctorLookup :: Map (ObjectId, Pos) (Map ConstructorId ConstructorId),
|
||||
termLookup :: Map ObjectId (Map Pos Pos)
|
||||
}
|
||||
|
||||
{-
|
||||
* Load entire codebase as a list
|
||||
* Pick a term from the codebase
|
||||
* Look up the references inside the term
|
||||
* If any haven't been processed, add them to the "to process" stack, push the term you were working on back onto that stack
|
||||
* Rebuild & rehash the term, store that
|
||||
* For any data constructor terms inside,
|
||||
* Store a map from old ConstructorId to new, based on the old and new reference hashes
|
||||
* After rebuilding a cycle, map old Pos to new
|
||||
-}
|
||||
|
||||
-- Q: can we plan to hold the whole mapping in memory? ✅
|
||||
-- Q: a) update database in-place? or b) write to separate database and then overwrite? leaning (b).
|
||||
-- note: we do need to rebuild namespaces, although we don't need to rehash them.
|
||||
|
||||
-- cycle position index `Pos`
|
||||
-- constructor index `ConstructorId`
|
||||
|
||||
{-
|
||||
data Maybe a = (Just Bar | Nothing X)
|
||||
|
||||
-- changes due to missing size from ref(Y)
|
||||
data X = MkX Y
|
||||
|
||||
-- know old hash and old cycle positions
|
||||
data Y = MkY Int
|
||||
-}
|
||||
|
||||
data Entity
|
||||
= O ObjectId
|
||||
| C CausalHashId
|
||||
| W WK.WatchKind S.Reference.IdH
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data Env = Env {db :: Connection}
|
||||
|
||||
-- -> m (TrySyncResult h)
|
||||
migrationSync ::
|
||||
(MonadIO m, MonadState MigrationState m, MonadReader Env m) =>
|
||||
Sync m Entity
|
||||
migrationSync = Sync \case
|
||||
-- To sync an object,
|
||||
-- * If we have already synced it, we are done.
|
||||
-- * Otherwise, read the object from the database and switch on its object type.
|
||||
-- * See next steps below v
|
||||
--
|
||||
-- To sync a term component object,
|
||||
-- * If we have not already synced all dependencies, push syncing them onto the front of the work queue.
|
||||
-- * Otherwise, ???
|
||||
--
|
||||
-- To sync a decl component object,
|
||||
-- * If we have not already synced all dependencies, push syncing them onto the front of the work queue.
|
||||
-- * Otherwise, ???
|
||||
--
|
||||
-- To sync a namespace object,
|
||||
-- * Deserialize it and compute its dependencies (terms, types, patches, children).
|
||||
-- * If we have not already synced all of its dependencies, push syncing them onto the front of the work queue.
|
||||
-- * To sync a 'BranchFull',
|
||||
-- * We need to make a new 'BranchFull' in memory, then insert it into the database under a new object id.
|
||||
-- * Wait, we need to preserve the ordering of the types/terms, either by not changing them (but the orderings of the
|
||||
-- reference ids used in keys is definitely not preserved by this migration), or by permuting the local id vectors,
|
||||
-- but we may be at a level too low or high for us to care?
|
||||
-- * Its 'LocalBranch' must have all references changed in-place per the (old (object id, pos) => new (object id, pos)) mapping.
|
||||
-- position of any term/decl within its component has changed. Therefore, we need to adjust each referent's component position
|
||||
-- * We need to recompute the References contained in the Branch body since the pos within the reference may have changed.
|
||||
-- We can look up the position changes in our Migration State, they must have been added when computing the objects pointed to by the reference.
|
||||
-- * The normalized object IDs within the body _likely_ don't need to change.
|
||||
-- * Its 'BranchLocalIds' must be translated from the old codebase object IDs to the new object IDs,
|
||||
-- we can use our MigrationState to look these up, since they must have already been migrated.
|
||||
-- * To sync a 'BranchDiff',
|
||||
-- * ???
|
||||
--
|
||||
-- To sync a patch object, ???
|
||||
--
|
||||
-- To sync a Causal
|
||||
--- * If we haven't yet synced its parents, push them onto the work queue
|
||||
--- * If we haven't yet synced the causal's value (namespace), push it onto the work queue.
|
||||
--- * Rehash the Causal's valueHash AND CausalHash, and add the new causal, its hashes, and hash objects to the codebase under a fresh object ID
|
||||
O objId -> do
|
||||
let alreadySynced :: m Bool
|
||||
alreadySynced = undefined
|
||||
alreadySynced >>= \case
|
||||
False -> do
|
||||
(hId, objType, bytes) <- runSrc $ Q.loadObjectWithHashIdAndTypeById oId
|
||||
migrateObject objType hId bytes
|
||||
True -> pure Sync.PreviouslyDone
|
||||
-- result <- runValidateT @(Set Entity) @m @ObjectId case objType of
|
||||
-- To sync a causal,
|
||||
-- 1. ???
|
||||
-- 2. Synced
|
||||
C causalHashID -> _
|
||||
-- To sync a watch result,
|
||||
-- 1. ???
|
||||
-- 2. Synced
|
||||
W watchKind idH -> _
|
||||
|
||||
-- data ObjectType
|
||||
-- = TermComponent -- 0
|
||||
-- | DeclComponent -- 1
|
||||
-- | Namespace -- 2
|
||||
-- | Patch -- 3
|
||||
|
||||
migrateObject :: ObjectType -> HashId -> ByteString -> m _
|
||||
migrateObject objType hash bytes = case objType of
|
||||
OT.TermComponent -> migrateTermComponent hash bytes
|
||||
OT.DeclComponent -> migrateTermComponent hash bytes
|
||||
OT.Namespace -> migrateNamespace hash bytes
|
||||
OT.Patch -> migratePatch hash bytes
|
||||
|
||||
migratePatch :: HashId -> ByteString -> m _
|
||||
migratePatch = error "not implemented"
|
||||
|
||||
migrateNamespace :: HashId -> ByteString -> m _
|
||||
migrateNamespace = error "not implemented"
|
||||
|
||||
migrateTermComponent :: HashId -> ByteString -> m _
|
||||
migrateTermComponent = error "not implemented"
|
||||
|
||||
-- | migrate sqlite codebase from version 1 to 2, return False and rollback on failure
|
||||
migrateSchema12 :: Applicative m => Connection -> m Bool
|
||||
migrateSchema12 db = do
|
||||
-- todo: drop and recreate corrected type/mentions index schema
|
||||
-- do we want to garbage collect at this time? ✅
|
||||
-- or just convert everything without going in dependency order? ✅
|
||||
error "todo: go through "
|
||||
-- todo: double-hash all the types and produce an constructor mapping
|
||||
-- object ids will stay the same
|
||||
-- todo: rehash all the terms using the new constructor mapping
|
||||
-- and adding the type to the term
|
||||
-- do we want to diff namespaces at this time? ❌
|
||||
-- do we want to look at supporting multiple simultaneous representations of objects at this time?
|
||||
pure "todo: migrate12"
|
||||
pure True
|
||||
|
||||
-- -- remember that the component order might be different
|
||||
-- rehashDeclComponent :: [Decl v a] -> (Hash, ConstructorMappings)
|
||||
-- rehashDeclComponent decls = fmap decls <&> \case
|
||||
--
|
||||
-- --
|
||||
-- error "todo: rehashDeclComponent"
|
||||
|
||||
-- rewriteDeclComponent :: DeclFormat.LocallyIndexedComponent -> (Hash, DeclFormat.LocallyIndexedComponent, ConstructorMappings)
|
||||
-- rewriteDeclComponent =
|
||||
-- --
|
||||
-- error "todo: rehashDeclComponent"
|
||||
|
||||
-- rehashDeclComponent :: [Decl v a] -> (Hash, DeclFormat.LocallyIndexedComponent, ConstructorMappings)
|
||||
|
||||
-- rehashTermComponent :: ConstructorMappings -> TermFormat.LocallyIndexedComponent -> (Hash, TermFormat.LocallyIndexedComponent)
|
||||
-- rehashTermComponent = error "todo: rehashTermComponent"
|
||||
|
||||
-- -- getConstructor :: ConstructorMappings -> ObjectId -> Pos -> ConstructorId
|
||||
-- -- getConstructor cm
|
@ -73,6 +73,7 @@ library
|
||||
Unison.Codebase.SqliteCodebase.Branch.Dependencies
|
||||
Unison.Codebase.SqliteCodebase.Conversions
|
||||
Unison.Codebase.SqliteCodebase.GitError
|
||||
Unison.Codebase.SqliteCodebase.MigrateSchema12
|
||||
Unison.Codebase.SqliteCodebase.SyncEphemeral
|
||||
Unison.Codebase.SyncMode
|
||||
Unison.Codebase.TermEdit
|
||||
|
Loading…
Reference in New Issue
Block a user