first group commit

2024-09-20 14:57:41 +03:00 · 2021-10-19 11:01:26 -04:00 · 2021-10-19 11:01:26 -04:00 · 6d5ac667a5
commit 6d5ac667a5
parent 3c8d2ac83f
4 changed files with 296 additions and 6 deletions
--- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Branch/Format.hs
+++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Branch/Format.hs
@ -6,12 +6,18 @@ import U.Codebase.Sqlite.Branch.Full (LocalBranch)
 import U.Codebase.Sqlite.DbId (CausalHashId, BranchObjectId, ObjectId, PatchObjectId, TextId)
 import Data.ByteString (ByteString)

-- |you can use the exact same `BranchLocalIds` when converting between `Full` and `Diff`
+-- | A 'BranchFormat' is a deserialized namespace object (@object.bytes@).
+--
+-- you can use the exact same `BranchLocalIds` when converting between `Full` and `Diff`
 data BranchFormat
  = Full BranchLocalIds LocalBranch
  | Diff BranchObjectId BranchLocalIds LocalDiff
  deriving Show

+-- | A 'BranchLocalIds' is a mapping between local ids (local to this object) encoded as offsets, and actual database ids.
+--
+-- For example, a @branchTextLookup@ vector of @[50, 74]@ means "local id 0 corresponds to database text id 50, and
+-- local id 1 corresponds to database text id 74".
 data BranchLocalIds = LocalIds
  { branchTextLookup :: Vector TextId,
    branchDefnLookup :: Vector ObjectId,
@ -23,3 +29,85 @@ data BranchLocalIds = LocalIds
 data SyncBranchFormat
  = SyncFull BranchLocalIds ByteString
  | SyncDiff BranchObjectId BranchLocalIds ByteString
+
+{-
+projects.arya.message = "hello, world"     -> <text constant> -> #abc
+projects.arya.program = printLine message  -> printLine #abc  -> #def
+
+projects.arya {
+  terms = { "message" -> #abc
+          , "program" -> #def
+          }
+}
+
+text table =
+  { 1 -> "hello, world"
+  , 2 -> "message"
+  , 3 -> "program"
+  }
+
+hash table =
+  { 10 -> "abc"
+  , 11 -> "def"
+  }
+
+object table =
+  { ...
+  }
+
+projects.arya {
+  terms = { TextId 2 -> Reference { builtin = null, object = ObjectId 20, position = 0 }
+          , TextId 3 -> Reference { builtin = null, object = ObjectId 21, position = 0 }
+          }
+}
+
+stored in original codebase:
+projects.arya = BranchFormat.Full {
+  localIds = {
+    text = [2, 3]
+    hash = [10, 11]
+    object = [20, 21]
+  }
+  localBranch = {
+    terms = { LocalTextId 0 -> Reference { builtin = null, object = LocalObjectId 0, position = 0 }
+            , LocalTextId 1 -> Reference { builtin = null, object = LocalObjectId 1, position = 0 }
+            }
+    ...
+  }
+}
+
+write to dest codebase:
+text table =
+  { ...
+  , 901 -> "hello, world"
+  , 902 -> "message"
+  , 903 -> "program"
+  }
+
+hash table =
+  { ...
+  , 500 -> "abc"
+  , 501 -> "def"
+  }
+
+projects.arya {
+
+  -- updated copy of original localIds, with new mapping
+  localIds = {
+    text = [902, 903]
+    hash = [500, 501]
+    object = [300, 301]
+  }
+
+  -- copy unmodified from original
+  localBranch = {
+    terms = { LocalTextId 0 -> Reference { builtin = null, object = LocalObjectId 0, position = 0 }
+            , LocalTextId 1 -> Reference { builtin = null, object = LocalObjectId 1, position = 0 }
+            }
+    ...
+  }
+}
+
+
+
+-}
--- a/parser-typechecker/src/Unison/Codebase/SqliteCodebase.hs
+++ b/parser-typechecker/src/Unison/Codebase/SqliteCodebase.hs
@ -276,8 +276,8 @@ sqliteCodebase debugName root = do
  termCache <- Cache.semispaceCache 8192 -- pure Cache.nullCache -- to disable
  typeOfTermCache <- Cache.semispaceCache 8192
  declCache <- Cache.semispaceCache 1024
-  runReaderT Q.schemaVersion conn >>= \case
-    SchemaVersion 1 -> do
+  let
+    startCodebase = do
      rootBranchCache <- newTVarIO Nothing
      -- The v1 codebase interface has operations to read and write individual definitions
      -- whereas the v2 codebase writes them as complete components.  These two fields buffer
@ -799,6 +799,9 @@ sqliteCodebase debugName root = do
            (Just \l r -> runDB conn $ fromJust <$> before l r)
          in code
        )
+  runReaderT Q.schemaVersion conn >>= \case
+    SchemaVersion 2 -> startCodebase
+    SchemaVersion 1 -> _migrate12 conn >> startCodebase
    v -> shutdownConnection conn $> Left v

 -- well one or the other. :zany_face: the thinking being that they wouldn't hash-collide
@ -833,14 +836,16 @@ syncInternal ::
  Branch m ->
  m ()
 syncInternal progress srcConn destConn b = time "syncInternal" do
+  -- We start a savepoint on the src connection because it seemed to speed things up.
+  -- Mitchell says: that doesn't sound right... why would that be the case?
+  -- TODO: look into this; this connection should be used only for reads.
  runDB srcConn $ Q.savepoint "sync"
  runDB destConn $ Q.savepoint "sync"
  result <- runExceptT do
    let syncEnv = Sync22.Env srcConn destConn (16 * 1024 * 1024)
    -- we want to use sync22 wherever possible
-    -- so for each branch, we'll check if it exists in the destination branch
-    -- or if it exists in the source branch, then we can sync22 it
-    -- oh god but we have to figure out the dbid
+    -- so for each source branch, we'll check if it exists in the destination codebase
+    -- or if it exists in the source codebase, then we can sync22 it
    -- if it doesn't exist in the dest or source branch,
    -- then just use putBranch to the dest
    let se :: forall m a. Functor m => (ExceptT Sync22.Error m a -> ExceptT SyncEphemeral.Error m a)
--- a/parser-typechecker/src/Unison/Codebase/SqliteCodebase/MigrateSchema12.hs
+++ b/parser-typechecker/src/Unison/Codebase/SqliteCodebase/MigrateSchema12.hs
@ -0,0 +1,196 @@
+{-# LANGUAGE PartialTypeSignatures #-}
+{-# LANGUAGE PatternSynonyms #-}
+
+module Unison.Codebase.SqliteCodebase.MigrateSchema12 where
+
+import Control.Monad.Reader (MonadReader)
+import Control.Monad.State (MonadState)
+import U.Codebase.Sqlite.Connection (Connection)
+import U.Codebase.Sqlite.DbId (CausalHashId, HashId, ObjectId)
+import U.Codebase.Sqlite.ObjectType (ObjectType)
+import qualified U.Codebase.Sqlite.ObjectType as OT
+import qualified U.Codebase.Sqlite.Queries as Q
+import qualified U.Codebase.Sqlite.Reference as S.Reference
+import U.Codebase.Sync (Sync (Sync))
+import qualified U.Codebase.Sync as Sync
+import qualified U.Codebase.WatchKind as WK
+import Unison.Prelude (ByteString, Map, MonadIO)
+import Unison.Reference (Pos)
+import Unison.Referent (ConstructorId)
+
+-- lookupCtor :: ConstructorMapping -> ObjectId -> Pos -> ConstructorId -> Maybe (Pos, ConstructorId)
+-- lookupCtor (ConstructorMapping cm) oid pos cid =
+--   Map.lookup oid cm >>= (Vector.!? fromIntegral pos) >>= (Vector.!? cid)
+
+-- lookupTermRef :: TermLookup -> S.Reference -> Maybe S.Reference
+-- lookupTermRef _tl (ReferenceBuiltin t) = Just (ReferenceBuiltin t)
+-- lookupTermRef tl (ReferenceDerived id) = ReferenceDerived <$> lookupTermRefId tl id
+
+-- lookupTermRefId :: TermLookup -> S.Reference.Id -> Maybe S.Reference.Id
+-- lookupTermRefId tl (Id oid pos) = Id oid <$> lookupTermPos tl oid pos
+
+-- lookupTermPos :: TermLookup -> ObjectId -> Pos -> Maybe Pos
+-- lookupTermPos (TermLookup tl) oid pos = Map.lookup oid tl >>= (Vector.!? fromIntegral pos)
+
+-- newtype ConstructorMapping = ConstructorMapping (Map ObjectId (Vector (Vector (Pos, ConstructorId))))
+-- newtype TermLookup = TermLookup (Map ObjectId (Vector Pos))
+
+data MigrationState = MigrationState
+  { declLookup :: Map ObjectId (Map Pos Pos),
+    ctorLookup :: Map (ObjectId, Pos) (Map ConstructorId ConstructorId),
+    termLookup :: Map ObjectId (Map Pos Pos)
+  }
+
+{-
+* Load entire codebase as a list
+* Pick a term from the codebase
+* Look up the references inside the term
+* If any haven't been processed, add them to the "to process" stack, push the term you were working on back onto that stack
+* Rebuild & rehash the term, store that
+* For any data constructor terms inside,
+  * Store a map from old ConstructorId to new, based on the old and new reference hashes
+* After rebuilding a cycle, map old Pos to new
+-}
+
+-- Q: can we plan to hold the whole mapping in memory? ✅
+-- Q: a) update database in-place? or b) write to separate database and then overwrite? leaning (b).
+-- note: we do need to rebuild namespaces, although we don't need to rehash them.
+
+-- cycle position index `Pos`
+-- constructor index `ConstructorId`
+
+{-
+data Maybe a = (Just Bar | Nothing X)
+
+-- changes due to missing size from ref(Y)
+data X = MkX Y
+
+-- know old hash and old cycle positions
+data Y = MkY Int
+-}
+
+data Entity
+  = O ObjectId
+  | C CausalHashId
+  | W WK.WatchKind S.Reference.IdH
+  deriving (Eq, Ord, Show)
+
+data Env = Env {db :: Connection}
+
+--  -> m (TrySyncResult h)
+migrationSync ::
+  (MonadIO m, MonadState MigrationState m, MonadReader Env m) =>
+  Sync m Entity
+migrationSync = Sync \case
+  -- To sync an object,
+  --   * If we have already synced it, we are done.
+  --   * Otherwise, read the object from the database and switch on its object type.
+  --   * See next steps below v
+  --
+  -- To sync a term component object,
+  --   * If we have not already synced all dependencies, push syncing them onto the front of the work queue.
+  --   * Otherwise, ???
+  --
+  -- To sync a decl component object,
+  --   * If we have not already synced all dependencies, push syncing them onto the front of the work queue.
+  --   * Otherwise, ???
+  --
+  -- To sync a namespace object,
+  --   * Deserialize it and compute its dependencies (terms, types, patches, children).
+  --   * If we have not already synced all of its dependencies, push syncing them onto the front of the work queue.
+  --   * To sync a 'BranchFull',
+  --     * We need to make a new 'BranchFull' in memory, then insert it into the database under a new object id.
+  --       * Wait, we need to preserve the ordering of the types/terms, either by not changing them (but the orderings of the
+  --         reference ids used in keys is definitely not preserved by this migration), or by permuting the local id vectors,
+  --         but we may be at a level too low or high for us to care?
+  --     * Its 'LocalBranch' must have all references changed in-place per the (old (object id, pos) => new (object id, pos)) mapping.
+  --       position of any term/decl within its component has changed. Therefore, we need to adjust each referent's component position
+  --     * We need to recompute the References contained in the Branch body since the pos within the reference may have changed.
+  --       We can look up the position changes in our Migration State, they must have been added when computing the objects pointed to by the reference.
+  --     * The normalized object IDs within the body _likely_ don't need to change.
+  --     * Its 'BranchLocalIds' must be translated from the old codebase object IDs to the new object IDs,
+  --       we can use our MigrationState to look these up, since they must have already been migrated.
+  --   * To sync a 'BranchDiff',
+  --     * ???
+  --
+  -- To sync a patch object, ???
+  --
+  -- To sync a Causal
+  --- * If we haven't yet synced its parents, push them onto the work queue
+  --- * If we haven't yet synced the causal's value (namespace), push it onto the work queue.
+  --- * Rehash the Causal's valueHash AND CausalHash, and add the new causal, its hashes, and hash objects to the codebase under a fresh object ID
+  O objId -> do
+    let alreadySynced :: m Bool
+        alreadySynced = undefined
+    alreadySynced >>= \case
+      False -> do
+        (hId, objType, bytes) <- runSrc $ Q.loadObjectWithHashIdAndTypeById oId
+        migrateObject objType hId bytes
+      True -> pure Sync.PreviouslyDone
+  -- result <- runValidateT @(Set Entity) @m @ObjectId case objType of
+  -- To sync a causal,
+  --   1. ???
+  --   2. Synced
+  C causalHashID -> _
+  -- To sync a watch result,
+  --   1. ???
+  --   2. Synced
+  W watchKind idH -> _
+
+-- data ObjectType
+--   = TermComponent -- 0
+--   | DeclComponent -- 1
+--   | Namespace -- 2
+--   | Patch -- 3
+
+migrateObject :: ObjectType -> HashId -> ByteString -> m _
+migrateObject objType hash bytes = case objType of
+  OT.TermComponent -> migrateTermComponent hash bytes
+  OT.DeclComponent -> migrateTermComponent hash bytes
+  OT.Namespace -> migrateNamespace hash bytes
+  OT.Patch -> migratePatch hash bytes
+
+migratePatch :: HashId -> ByteString -> m _
+migratePatch = error "not implemented"
+
+migrateNamespace :: HashId -> ByteString -> m _
+migrateNamespace = error "not implemented"
+
+migrateTermComponent :: HashId -> ByteString -> m _
+migrateTermComponent = error "not implemented"
+
+-- | migrate sqlite codebase from version 1 to 2, return False and rollback on failure
+migrateSchema12 :: Applicative m => Connection -> m Bool
+migrateSchema12 db = do
+  -- todo: drop and recreate corrected type/mentions index schema
+  -- do we want to garbage collect at this time? ✅
+  -- or just convert everything without going in dependency order? ✅
+  error "todo: go through "
+  -- todo: double-hash all the types and produce an constructor mapping
+  -- object ids will stay the same
+  -- todo: rehash all the terms using the new constructor mapping
+  -- and adding the type to the term
+  -- do we want to diff namespaces at this time? ❌
+  -- do we want to look at supporting multiple simultaneous representations of objects at this time?
+  pure "todo: migrate12"
+  pure True
+
+-- -- remember that the component order might be different
+-- rehashDeclComponent :: [Decl v a] -> (Hash, ConstructorMappings)
+-- rehashDeclComponent decls = fmap decls <&> \case
+--
+--     --
+--     error "todo: rehashDeclComponent"
+
+-- rewriteDeclComponent :: DeclFormat.LocallyIndexedComponent -> (Hash, DeclFormat.LocallyIndexedComponent, ConstructorMappings)
+-- rewriteDeclComponent =
+--     --
+--     error "todo: rehashDeclComponent"
+
+-- rehashDeclComponent :: [Decl v a] -> (Hash, DeclFormat.LocallyIndexedComponent, ConstructorMappings)
+
+-- rehashTermComponent :: ConstructorMappings -> TermFormat.LocallyIndexedComponent -> (Hash, TermFormat.LocallyIndexedComponent)
+-- rehashTermComponent = error "todo: rehashTermComponent"
+
+-- -- getConstructor :: ConstructorMappings -> ObjectId -> Pos -> ConstructorId
+-- -- getConstructor cm
--- a/parser-typechecker/unison-parser-typechecker.cabal
+++ b/parser-typechecker/unison-parser-typechecker.cabal
@ -73,6 +73,7 @@ library
      Unison.Codebase.SqliteCodebase.Branch.Dependencies
      Unison.Codebase.SqliteCodebase.Conversions
      Unison.Codebase.SqliteCodebase.GitError
+      Unison.Codebase.SqliteCodebase.MigrateSchema12
      Unison.Codebase.SqliteCodebase.SyncEphemeral
      Unison.Codebase.SyncMode
      Unison.Codebase.TermEdit