Fill out more scoped operations

2024-10-05 06:07:21 +03:00 · 2023-01-24 10:14:23 -06:00 · 2023-01-24 10:14:23 -06:00 · ac3838fce7
commit ac3838fce7
parent 19011b6710
4 changed files with 284 additions and 32 deletions
--- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/NamedRef.hs
+++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/NamedRef.hs
@ -5,7 +5,8 @@ import qualified Data.List.NonEmpty as NEL
 import qualified Data.List.NonEmpty as NonEmpty
 import qualified Data.Text as Text
 import Unison.Prelude
-import Unison.Sqlite (FromField (..), FromRow (..), SQLData (..), ToField (..), ToRow (..), field)
+import Unison.Sqlite
+import qualified Unison.Sqlite as Sqlite

 type ReversedSegments = NonEmpty Text

@ -44,3 +45,19 @@ toRowWithNamespace :: ToRow ref => NamedRef ref -> [SQLData]
 toRowWithNamespace nr = toRow nr <> [SQLText namespace]
  where
    namespace = Text.intercalate "." . reverse . NEL.tail . reversedSegments $ nr
+
+-- | The new 'scoped' name lookup format is different than the old version.
+--
+-- Namely, this adds the 'lastNameSegment' as well as adding a trailing '.' to the db format
+-- of both the namespace and reversed_name.
+--
+--
+-- Converts a NamedRef to SQLData of the form:
+-- [reversedName, namespace, lastNameSegment] <> ref fields...
+namedRefToScopedRow :: ToRow ref => NamedRef ref -> [SQLData]
+namedRefToScopedRow (NamedRef {reversedSegments = revSegments, ref}) =
+  toRow $ (SQLText reversedName, SQLText namespace, SQLText lastNameSegment) Sqlite.:. ref
+  where
+    reversedName = (Text.intercalate "." . toList $ revSegments) <> "."
+    namespace = (Text.intercalate "." . reverse . NEL.tail $ revSegments) <> "."
+    lastNameSegment = NEL.head revSegments
--- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Operations.hs
+++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Operations.hs
@ -70,6 +70,7 @@ module U.Codebase.Sqlite.Operations
    updateNameIndex,
    rootNamesByPath,
    NamesByPath (..),
+    checkBranchHashNameLookupExists,

    -- * reflog
    getReflog,
@ -1081,6 +1082,35 @@ updateNameIndex (newTermNames, removedTermNames) (newTypeNames, removedTypeNames
  Q.insertTermNames (fmap (c2sTextReferent *** fmap c2sConstructorType) <$> newTermNames)
  Q.insertTypeNames (fmap c2sTextReference <$> newTypeNames)

+buildNameLookupForBranchHash ::
+  -- The existing name lookup index to copy before applying the diff.
+  -- If Nothing, run the diff against an empty index.
+  Maybe BranchHash ->
+  BranchHash ->
+  -- |  (add terms, remove terms)
+  ([S.NamedRef (C.Referent, Maybe C.ConstructorType)], [S.NamedRef C.Referent]) ->
+  -- |  (add types, remove types)
+  ([S.NamedRef C.Reference], [S.NamedRef C.Reference]) ->
+  Transaction ()
+buildNameLookupForBranchHash mayExistingBranchIndex newBranchHash (newTermNames, removedTermNames) (newTypeNames, removedTypeNames) = do
+  Q.ensureScopedNameLookupTables
+  case mayExistingBranchIndex of
+    Nothing -> pure ()
+    Just existingBranchIndex -> do
+      existingBranchHashId <- Q.saveBranchHash existingBranchIndex
+      newBranchHashId <- Q.saveBranchHash newBranchHash
+      Q.copyScopedNameLookup existingBranchHashId newBranchHashId
+  Q.removeTermNames ((fmap c2sTextReferent <$> removedTermNames))
+  Q.removeTypeNames ((fmap c2sTextReference <$> removedTypeNames))
+  Q.insertTermNames (fmap (c2sTextReferent *** fmap c2sConstructorType) <$> newTermNames)
+  Q.insertTypeNames (fmap c2sTextReference <$> newTypeNames)
+
+-- | Check whether we've already got an index for a given causal hash.
+checkBranchHashNameLookupExists :: BranchHash -> Transaction Bool
+checkBranchHashNameLookupExists bh = do
+  bhId <- Q.saveBranchHash bh
+  Q.checkBranchHashNameLookupExists bhId
+
 data NamesByPath = NamesByPath
  { termNamesInPath :: [S.NamedRef (C.Referent, Maybe C.ConstructorType)],
    typeNamesInPath :: [S.NamedRef C.Reference]
--- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs
+++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs
@ -135,14 +135,20 @@ module U.Codebase.Sqlite.Queries
    -- * Name Lookup
    ensureNameLookupTables,
    ensureScopedNameLookupTables,
+    copyScopedNameLookup,
    dropNameLookupTables,
    insertTermNames,
    insertTypeNames,
    removeTermNames,
    removeTypeNames,
+    insertScopedTermNames,
+    insertScopedTypeNames,
+    removeScopedTermNames,
+    removeScopedTypeNames,
    rootTermNamesByPath,
    rootTypeNamesByPath,
    getNamespaceDefinitionCount,
+    checkBranchHashNameLookupExists,

    -- * Reflog
    appendReflog,
@ -1610,11 +1616,6 @@ ensureNameLookupTables = do
    [here|
      CREATE INDEX IF NOT EXISTS term_names_by_namespace ON term_name_lookup(namespace)
    |]
-  -- Don't need this index at the moment, but will likely be useful later.
-  -- execute_
-  --   [here|
-  --     CREATE INDEX IF NOT EXISTS term_name_by_referent_lookup ON term_name_lookup(referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index)
-  --   |]
  execute_
    [here|
      CREATE TABLE IF NOT EXISTS type_name_lookup (
@ -1642,42 +1643,84 @@ ensureScopedNameLookupTables = do
  execute_
    [here|
      CREATE TABLE IF NOT EXISTS name_lookups (
-        root_causal_hash_id INTEGER PRIMARY KEY REFERENCES causal(self_hash_id) ON DELETE CASCADE,
+        root_branch_hash_id INTEGER PRIMARY KEY REFERENCES hash(id) ON DELETE CASCADE
      )
    |]

  execute_
    [here|
      CREATE TABLE IF NOT EXISTS scoped_term_name_lookup (
-        root_causal_hash_id INTEGER NOT NULL REFERENCES causal(self_hash_id) ON DELETE CASCADE,
-        -- The name of the term: E.g. map.List.base
+        root_branch_hash_id INTEGER NOT NULL REFERENCES hash(id) ON DELETE CASCADE,
+
+        -- The name of the term in reversed form, with a trailing '.':
+        -- E.g. map.List.base.
+        --
+        -- The trailing '.' is helpful when performing suffix queries where we may not know
+        -- whether the suffix is complete or not, e.g. we could suffix search using any of the
+        -- following globs and it would still find 'map.List.base.':
+        --  map.List.base.*
+        --  map.List.*
+        --  map.*
        reversed_name TEXT NOT NULL,
-        -- The namespace containing this term, not reversed: E.g. base.List
+
+        -- The last name segment of the name. This is used when looking up names for
+        -- suffixification when building PPEs.
+        last_name_segment TEXT NOT NULL,
+
+        -- The namespace containing this definition, not reversed, with a trailing '.'
+        -- The trailing '.' simplifies GLOB queries, so that 'base.*' matches both things in
+        -- 'base' and 'base.List', but not 'base1', which allows us to avoid an OR in our where
+        -- clauses which in turn helps the sqlite query planner use indexes more effectively.
+        --
+        -- example value: 'base.List.'
        namespace TEXT NOT NULL,
        referent_builtin TEXT NULL,
        referent_component_hash TEXT NULL,
        referent_component_index INTEGER NULL,
        referent_constructor_index INTEGER NULL,
        referent_constructor_type INTEGER NULL,
-        PRIMARY KEY (root_causal_hash_id, reversed_name, referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index)
+        PRIMARY KEY (root_branch_hash_id, reversed_name, referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index)
      )
    |]
+
+  -- This index allows finding all names we need to consider within a given namespace for
+  -- suffixification of a name.
+  -- It may seem strange to use last_name_segment rather than a suffix search over reversed_name name here;
+  -- but SQLite will only optimize for a single prefix-glob at once, so we can't glob search
+  -- over both namespace and reversed_name, but we can EXACT match on last_name_segment and
+  -- then glob search on the namespace prefix, and have SQLite do the final glob search on
+  -- reversed_name over rows with a matching last segment without using an index and should be plenty fast.
  execute_
    [here|
-      CREATE INDEX IF NOT EXISTS term_names_by_namespace ON scoped_term_name_lookup(root_causal_hash_id, namespace)
+      CREATE INDEX IF NOT EXISTS scoped_term_names_by_namespace_and_last_name_segment ON term_name_lookup(root_branch_hash_id, last_name_segment, namespace)
+    |]
+  -- This index allows us to find all names with a given ref within a specific namespace
+  execute_
+    [here|
+      CREATE INDEX IF NOT EXISTS scoped_term_name_by_referent_lookup ON term_name_lookup(root_branch_hash_id, referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index, namespace)
+    |]
+
+  -- Allows fetching ALL names within a specific namespace prefix. We currently use this to
+  -- pretty-print on share, but will be replaced with a more precise set of queries soon.
+  execute_
+    [here|
+      CREATE INDEX IF NOT EXISTS scoped_term_names_by_namespace ON scoped_term_name_lookup(root_branch_hash_id, namespace)
    |]
-  -- Don't need this index at the moment, but will likely be useful later.
-  -- execute_
-  --   [here|
-  --     CREATE INDEX IF NOT EXISTS term_name_by_referent_lookup ON term_name_lookup(referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index)
-  --   |]
  execute_
    [here|
      CREATE TABLE IF NOT EXISTS scoped_type_name_lookup (
-        root_causal_hash_id INTEGER NOT NULL,
+        root_branch_hash_id INTEGER NOT NULL REFERENCES hash(id),
        -- The name of the term: E.g. List.base
        reversed_name TEXT NOT NULL,
-        -- The namespace containing this term, not reversed: E.g. base.List
+        -- The last name segment of the name. This is used when looking up names for
+        -- suffixification when building PPEs.
+        last_name_segment TEXT NOT NULL,
+        -- The namespace containing this definition, not reversed, with a trailing '.'
+        -- The trailing '.' simplifies GLOB queries, so that 'base.*' matches both things in
+        -- 'base' and 'base.List', but not 'base1', which allows us to avoid an OR in our where
+        -- clauses which in turn helps the sqlite query planner use indexes more effectively.
+        --
+        -- example value: 'base.List.'
        namespace TEXT NOT NULL,
        reference_builtin TEXT NULL,
        reference_component_hash INTEGER NULL,
@ -1685,11 +1728,67 @@ ensureScopedNameLookupTables = do
        PRIMARY KEY (reversed_name, reference_builtin, reference_component_hash, reference_component_index)
      );
    |]
+
+  -- This index allows finding all names we need to consider within a given namespace for
+  -- suffixification of a name.
+  -- It may seem strange to use last_name_segment rather than a suffix search over reversed_name name here;
+  -- but SQLite will only optimize for a single prefix-glob at once, so we can't glob search
+  -- over both namespace and reversed_name, but we can EXACT match on last_name_segment and
+  -- then glob search on the namespace prefix, and have SQLite do the final glob search on
+  -- reversed_name over rows with a matching last segment without using an index and should be plenty fast.
  execute_
    [here|
-      CREATE INDEX IF NOT EXISTS scoped_type_names_by_namespace ON type_name_lookup(root_causal_hash_id, namespace)
+      CREATE INDEX IF NOT EXISTS scoped_type_names_by_namespace_and_last_name_segment ON type_name_lookup(root_branch_hash_id, last_name_segment, namespace)
    |]

+  -- This index allows us to find all names with a given ref within a specific namespace.
+  execute_
+    [here|
+      CREATE INDEX IF NOT EXISTS scoped_type_name_by_reference_lookup ON type_name_lookup(root_branch_hash_id, reference_builtin, reference_component_hash, reference_component_index, namespace)
+    |]
+
+  -- Allows fetching ALL names within a specific namespace prefix. We currently use this to
+  -- pretty-print on share, but will be replaced with a more precise set of queries soon.
+  execute_
+    [here|
+      CREATE INDEX IF NOT EXISTS scoped_type_names_by_namespace ON type_name_lookup(root_branch_hash_id, namespace)
+    |]
+
+copyScopedNameLookup :: BranchHashId -> BranchHashId -> Transaction ()
+copyScopedNameLookup fromBHId toBHId = do
+  execute termsCopySql (toBHId, fromBHId)
+  execute typesCopySql (toBHId, fromBHId)
+  where
+    termsCopySql =
+      [here|
+        INSERT INTO scoped_term_name_lookup
+        SELECT ?, reversed_name, last_name_segment, namespace, referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index
+        FROM scoped_term_name_lookup
+        WHERE root_branch_hash_id = ?
+      |]
+    typesCopySql =
+      [here|
+        INSERT INTO scoped_type_name_lookup
+        SELECT ?, reversed_name, last_name_segment, namespace, reference_builtin, reference_component_hash, reference_component_index
+        FROM scoped_type_name_lookup
+        WHERE root_branch_hash_id = ?
+      |]
+
+-- | Check if we've already got an index for the desired root branch hash.
+checkBranchHashNameLookupExists :: BranchHashId -> Transaction Bool
+checkBranchHashNameLookupExists hashId = do
+  queryOneCol sql (Only hashId)
+  where
+    sql =
+      [here|
+        SELECT EXISTS (
+          SELECT 1
+          FROM name_lookups
+          WHERE root_branch_hash_id = ?
+          LIMIT 1
+        )
+       |]
+
 -- | Insert the given set of term names into the name lookup table
 insertTermNames :: [NamedRef (Referent.TextReferent, Maybe NamedRef.ConstructorType)] -> Transaction ()
 insertTermNames names = do
@ -1704,6 +1803,18 @@ insertTermNames names = do
        ON CONFLICT DO NOTHING
        |]

+-- | Insert the given set of type names into the name lookup table
+insertTypeNames :: [NamedRef (Reference.TextReference)] -> Transaction ()
+insertTypeNames names =
+  executeMany sql (NamedRef.toRowWithNamespace <$> names)
+  where
+    sql =
+      [here|
+      INSERT INTO type_name_lookup (reversed_name, reference_builtin, reference_component_hash, reference_component_index, namespace)
+        VALUES (?, ?, ?, ?, ?)
+        ON CONFLICT DO NOTHING
+        |]
+
 -- | Remove the given set of term names into the name lookup table
 removeTermNames :: [NamedRef Referent.TextReferent] -> Transaction ()
 removeTermNames names = do
@ -1735,6 +1846,71 @@ removeTypeNames names = do
        AND reference_component_index IS ?
        |]

+-- | Insert the given set of term names into the name lookup table
+insertScopedTermNames :: BranchHashId -> [NamedRef (Referent.TextReferent, Maybe NamedRef.ConstructorType)] -> Transaction ()
+insertScopedTermNames bhId names = do
+  executeMany sql (namedRefToRow <$> names)
+  where
+    namedRefToRow :: NamedRef (S.Referent.TextReferent, Maybe NamedRef.ConstructorType) -> (Only BranchHashId :. [SQLData])
+    namedRefToRow namedRef =
+      namedRef
+        & fmap refToRow
+        & NamedRef.namedRefToScopedRow
+        & \nr -> (Only bhId :. nr)
+    refToRow :: (Referent.TextReferent, Maybe NamedRef.ConstructorType) -> (Referent.TextReferent :. Only (Maybe NamedRef.ConstructorType))
+    refToRow (ref, ct) = ref :. Only ct
+    sql =
+      [here|
+      INSERT INTO scoped_term_name_lookup (root_branch_hash_id, reversed_name, namespace, last_name_segment, referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index, referent_constructor_type)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+        ON CONFLICT DO NOTHING
+        |]
+
+-- | Insert the given set of type names into the name lookup table
+insertScopedTypeNames :: BranchHashId -> [NamedRef (Reference.TextReference)] -> Transaction ()
+insertScopedTypeNames bhId names =
+  executeMany sql ((Only bhId :.) . NamedRef.namedRefToScopedRow <$> names)
+  where
+    sql =
+      [here|
+      INSERT INTO type_name_lookup (root_branch_hash_id, reversed_name, namespace, last_name_segment, reference_builtin, reference_component_hash, reference_component_index)
+        VALUES (?, ?, ?, ?, ?, ?, ?)
+        ON CONFLICT DO NOTHING
+        |]
+
+-- | Remove the given set of term names into the name lookup table
+removeScopedTermNames :: BranchHashId -> [NamedRef Referent.TextReferent] -> Transaction ()
+removeScopedTermNames bhId names = do
+  executeMany sql ((Only bhId :.) <$> names)
+  where
+    sql =
+      [here|
+      DELETE FROM term_name_lookup
+        WHERE
+        root_branch_hash_id IS ?
+        AND reversed_name IS ?
+        AND referent_builtin IS ?
+        AND referent_component_hash IS ?
+        AND referent_component_index IS ?
+        AND referent_constructor_index IS ?
+        |]
+
+-- | Remove the given set of term names into the name lookup table
+removeScopedTypeNames :: BranchHashId -> [NamedRef (Reference.TextReference)] -> Transaction ()
+removeScopedTypeNames bhId names = do
+  executeMany sql ((Only bhId :.) <$> names)
+  where
+    sql =
+      [here|
+      DELETE FROM type_name_lookup
+        WHERE
+        root_branch_hash_id IS ?
+        AND reversed_name IS ?
+        AND reference_builtin IS ?
+        AND reference_component_hash IS ?
+        AND reference_component_index IS ?
+        |]
+
 -- | We need to escape any special characters for globbing.
 --
 -- >>> globEscape "Nat.*.doc"
@ -1792,18 +1968,6 @@ getNamespaceDefinitionCount namespace = do
        )
      |]

-- | Insert the given set of type names into the name lookup table
-insertTypeNames :: [NamedRef (Reference.TextReference)] -> Transaction ()
-insertTypeNames names =
-  executeMany sql (NamedRef.toRowWithNamespace <$> names)
-  where
-    sql =
-      [here|
-      INSERT INTO type_name_lookup (reversed_name, reference_builtin, reference_component_hash, reference_component_index, namespace)
-        VALUES (?, ?, ?, ?, ?)
-        ON CONFLICT DO NOTHING
-        |]
-
 -- | Get the list of a term names in the root namespace according to the name lookup index
 rootTermNamesByPath :: Maybe Text -> Transaction [NamedRef (Referent.TextReferent, Maybe NamedRef.ConstructorType)]
 rootTermNamesByPath mayNamespace = do
--- a/parser-typechecker/src/Unison/Codebase/SqliteCodebase/Operations.hs
+++ b/parser-typechecker/src/Unison/Codebase/SqliteCodebase/Operations.hs
@ -655,6 +655,47 @@ updateNameLookupIndex getDeclType pathPrefix mayFromBranchHash toBranchHash = do
        ct <- getDeclType ref
        pure (referent, Just $ Cv.constructorType1to2 ct)

+-- | Add an index for the provided causal hash.
+ensureNameLookupForCausalHash ::
+  (C.Reference.Reference -> Sqlite.Transaction CT.ConstructorType) ->
+  -- | An optional branch which we already have an index for.
+  -- If provided, we can build the name index much faster by copying the index then computing only the changes we need to make between the two indexes.
+  Maybe CausalHash ->
+  BranchHash ->
+  Sqlite.Transaction ()
+ensureNameLookupForCausalHash getDeclType mayFromBranchHash toBranchHash = do
+  Ops.checkBranchHashNameLookupExists toBranchHash >>= \case
+    True -> pure ()
+    False -> do
+      fromBranch <- case mayFromBranchHash of
+        Nothing -> pure V2Branch.empty
+        Just fromBH -> do
+          Ops.checkBranchHashNameLookupExists fromBH >>= \case
+            True -> Ops.expectBranchByBranchHash fromBH
+            False -> pure V2Branch.empty
+      toBranch <- Ops.expectBranchByBranchHash toBranchHash
+      treeDiff <- BranchDiff.diffBranches fromBranch toBranch
+      let namePrefix = case pathPrefix of
+            Path.Empty -> Nothing
+            (p Path.:< ps) -> Just $ Name.fromSegments (p :| Path.toList ps)
+      let BranchDiff.NameChanges {termNameAdds, termNameRemovals, typeNameAdds, typeNameRemovals} = BranchDiff.nameChanges namePrefix treeDiff
+      termNameAddsWithCT <- do
+        for termNameAdds \(name, ref) -> do
+          refWithCT <- addReferentCT ref
+          pure $ toNamedRef (name, refWithCT)
+      Ops.buildNameLookupForBranchHash mayFromBranchHash toBranchHash (termNameAddsWithCT, toNamedRef <$> termNameRemovals) (toNamedRef <$> typeNameAdds, toNamedRef <$> typeNameRemovals)
+  where
+    inferStartBranch :: BranchHash -> Sqlite.Transaction (Maybe BranchHash)
+    inferStartBranch = _
+    toNamedRef :: (Name, ref) -> S.NamedRef ref
+    toNamedRef (name, ref) = S.NamedRef {reversedSegments = coerce $ Name.reverseSegments name, ref = ref}
+    addReferentCT :: C.Referent.Referent -> Transaction (C.Referent.Referent, Maybe C.Referent.ConstructorType)
+    addReferentCT referent = case referent of
+      C.Referent.Ref {} -> pure (referent, Nothing)
+      C.Referent.Con ref _conId -> do
+        ct <- getDeclType ref
+        pure (referent, Just $ Cv.constructorType1to2 ct)
+
 -- | Compute the root namespace names index which is used by the share server for serving api
 -- requests. Using 'updateNameLookupIndex' is preferred whenever possible, since it's
 -- considerably faster. This can be used to reset the index if it ever gets out of sync due to