Fill out more scoped operations

This commit is contained in:
Chris Penner 2023-01-24 10:14:23 -06:00
parent 19011b6710
commit ac3838fce7
4 changed files with 284 additions and 32 deletions

View File

@ -5,7 +5,8 @@ import qualified Data.List.NonEmpty as NEL
import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.Text as Text
import Unison.Prelude
import Unison.Sqlite (FromField (..), FromRow (..), SQLData (..), ToField (..), ToRow (..), field)
import Unison.Sqlite
import qualified Unison.Sqlite as Sqlite
type ReversedSegments = NonEmpty Text
@ -44,3 +45,19 @@ toRowWithNamespace :: ToRow ref => NamedRef ref -> [SQLData]
toRowWithNamespace nr = toRow nr <> [SQLText namespace]
where
namespace = Text.intercalate "." . reverse . NEL.tail . reversedSegments $ nr
-- | The new 'scoped' name lookup format is different than the old version.
--
-- Namely, this adds the 'lastNameSegment' as well as adding a trailing '.' to the db format
-- of both the namespace and reversed_name.
--
--
-- Converts a NamedRef to SQLData of the form:
-- [reversedName, namespace, lastNameSegment] <> ref fields...
namedRefToScopedRow :: ToRow ref => NamedRef ref -> [SQLData]
namedRefToScopedRow (NamedRef {reversedSegments = revSegments, ref}) =
toRow $ (SQLText reversedName, SQLText namespace, SQLText lastNameSegment) Sqlite.:. ref
where
reversedName = (Text.intercalate "." . toList $ revSegments) <> "."
namespace = (Text.intercalate "." . reverse . NEL.tail $ revSegments) <> "."
lastNameSegment = NEL.head revSegments

View File

@ -70,6 +70,7 @@ module U.Codebase.Sqlite.Operations
updateNameIndex,
rootNamesByPath,
NamesByPath (..),
checkBranchHashNameLookupExists,
-- * reflog
getReflog,
@ -1081,6 +1082,35 @@ updateNameIndex (newTermNames, removedTermNames) (newTypeNames, removedTypeNames
Q.insertTermNames (fmap (c2sTextReferent *** fmap c2sConstructorType) <$> newTermNames)
Q.insertTypeNames (fmap c2sTextReference <$> newTypeNames)
buildNameLookupForBranchHash ::
-- The existing name lookup index to copy before applying the diff.
-- If Nothing, run the diff against an empty index.
Maybe BranchHash ->
BranchHash ->
-- | (add terms, remove terms)
([S.NamedRef (C.Referent, Maybe C.ConstructorType)], [S.NamedRef C.Referent]) ->
-- | (add types, remove types)
([S.NamedRef C.Reference], [S.NamedRef C.Reference]) ->
Transaction ()
buildNameLookupForBranchHash mayExistingBranchIndex newBranchHash (newTermNames, removedTermNames) (newTypeNames, removedTypeNames) = do
Q.ensureScopedNameLookupTables
case mayExistingBranchIndex of
Nothing -> pure ()
Just existingBranchIndex -> do
existingBranchHashId <- Q.saveBranchHash existingBranchIndex
newBranchHashId <- Q.saveBranchHash newBranchHash
Q.copyScopedNameLookup existingBranchHashId newBranchHashId
Q.removeTermNames ((fmap c2sTextReferent <$> removedTermNames))
Q.removeTypeNames ((fmap c2sTextReference <$> removedTypeNames))
Q.insertTermNames (fmap (c2sTextReferent *** fmap c2sConstructorType) <$> newTermNames)
Q.insertTypeNames (fmap c2sTextReference <$> newTypeNames)
-- | Check whether we've already got an index for a given causal hash.
checkBranchHashNameLookupExists :: BranchHash -> Transaction Bool
checkBranchHashNameLookupExists bh = do
bhId <- Q.saveBranchHash bh
Q.checkBranchHashNameLookupExists bhId
data NamesByPath = NamesByPath
{ termNamesInPath :: [S.NamedRef (C.Referent, Maybe C.ConstructorType)],
typeNamesInPath :: [S.NamedRef C.Reference]

View File

@ -135,14 +135,20 @@ module U.Codebase.Sqlite.Queries
-- * Name Lookup
ensureNameLookupTables,
ensureScopedNameLookupTables,
copyScopedNameLookup,
dropNameLookupTables,
insertTermNames,
insertTypeNames,
removeTermNames,
removeTypeNames,
insertScopedTermNames,
insertScopedTypeNames,
removeScopedTermNames,
removeScopedTypeNames,
rootTermNamesByPath,
rootTypeNamesByPath,
getNamespaceDefinitionCount,
checkBranchHashNameLookupExists,
-- * Reflog
appendReflog,
@ -1610,11 +1616,6 @@ ensureNameLookupTables = do
[here|
CREATE INDEX IF NOT EXISTS term_names_by_namespace ON term_name_lookup(namespace)
|]
-- Don't need this index at the moment, but will likely be useful later.
-- execute_
-- [here|
-- CREATE INDEX IF NOT EXISTS term_name_by_referent_lookup ON term_name_lookup(referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index)
-- |]
execute_
[here|
CREATE TABLE IF NOT EXISTS type_name_lookup (
@ -1642,42 +1643,84 @@ ensureScopedNameLookupTables = do
execute_
[here|
CREATE TABLE IF NOT EXISTS name_lookups (
root_causal_hash_id INTEGER PRIMARY KEY REFERENCES causal(self_hash_id) ON DELETE CASCADE,
root_branch_hash_id INTEGER PRIMARY KEY REFERENCES hash(id) ON DELETE CASCADE
)
|]
execute_
[here|
CREATE TABLE IF NOT EXISTS scoped_term_name_lookup (
root_causal_hash_id INTEGER NOT NULL REFERENCES causal(self_hash_id) ON DELETE CASCADE,
-- The name of the term: E.g. map.List.base
root_branch_hash_id INTEGER NOT NULL REFERENCES hash(id) ON DELETE CASCADE,
-- The name of the term in reversed form, with a trailing '.':
-- E.g. map.List.base.
--
-- The trailing '.' is helpful when performing suffix queries where we may not know
-- whether the suffix is complete or not, e.g. we could suffix search using any of the
-- following globs and it would still find 'map.List.base.':
-- map.List.base.*
-- map.List.*
-- map.*
reversed_name TEXT NOT NULL,
-- The namespace containing this term, not reversed: E.g. base.List
-- The last name segment of the name. This is used when looking up names for
-- suffixification when building PPEs.
last_name_segment TEXT NOT NULL,
-- The namespace containing this definition, not reversed, with a trailing '.'
-- The trailing '.' simplifies GLOB queries, so that 'base.*' matches both things in
-- 'base' and 'base.List', but not 'base1', which allows us to avoid an OR in our where
-- clauses which in turn helps the sqlite query planner use indexes more effectively.
--
-- example value: 'base.List.'
namespace TEXT NOT NULL,
referent_builtin TEXT NULL,
referent_component_hash TEXT NULL,
referent_component_index INTEGER NULL,
referent_constructor_index INTEGER NULL,
referent_constructor_type INTEGER NULL,
PRIMARY KEY (root_causal_hash_id, reversed_name, referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index)
PRIMARY KEY (root_branch_hash_id, reversed_name, referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index)
)
|]
-- This index allows finding all names we need to consider within a given namespace for
-- suffixification of a name.
-- It may seem strange to use last_name_segment rather than a suffix search over reversed_name name here;
-- but SQLite will only optimize for a single prefix-glob at once, so we can't glob search
-- over both namespace and reversed_name, but we can EXACT match on last_name_segment and
-- then glob search on the namespace prefix, and have SQLite do the final glob search on
-- reversed_name over rows with a matching last segment without using an index and should be plenty fast.
execute_
[here|
CREATE INDEX IF NOT EXISTS term_names_by_namespace ON scoped_term_name_lookup(root_causal_hash_id, namespace)
CREATE INDEX IF NOT EXISTS scoped_term_names_by_namespace_and_last_name_segment ON term_name_lookup(root_branch_hash_id, last_name_segment, namespace)
|]
-- This index allows us to find all names with a given ref within a specific namespace
execute_
[here|
CREATE INDEX IF NOT EXISTS scoped_term_name_by_referent_lookup ON term_name_lookup(root_branch_hash_id, referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index, namespace)
|]
-- Allows fetching ALL names within a specific namespace prefix. We currently use this to
-- pretty-print on share, but will be replaced with a more precise set of queries soon.
execute_
[here|
CREATE INDEX IF NOT EXISTS scoped_term_names_by_namespace ON scoped_term_name_lookup(root_branch_hash_id, namespace)
|]
-- Don't need this index at the moment, but will likely be useful later.
-- execute_
-- [here|
-- CREATE INDEX IF NOT EXISTS term_name_by_referent_lookup ON term_name_lookup(referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index)
-- |]
execute_
[here|
CREATE TABLE IF NOT EXISTS scoped_type_name_lookup (
root_causal_hash_id INTEGER NOT NULL,
root_branch_hash_id INTEGER NOT NULL REFERENCES hash(id),
-- The name of the term: E.g. List.base
reversed_name TEXT NOT NULL,
-- The namespace containing this term, not reversed: E.g. base.List
-- The last name segment of the name. This is used when looking up names for
-- suffixification when building PPEs.
last_name_segment TEXT NOT NULL,
-- The namespace containing this definition, not reversed, with a trailing '.'
-- The trailing '.' simplifies GLOB queries, so that 'base.*' matches both things in
-- 'base' and 'base.List', but not 'base1', which allows us to avoid an OR in our where
-- clauses which in turn helps the sqlite query planner use indexes more effectively.
--
-- example value: 'base.List.'
namespace TEXT NOT NULL,
reference_builtin TEXT NULL,
reference_component_hash INTEGER NULL,
@ -1685,11 +1728,67 @@ ensureScopedNameLookupTables = do
PRIMARY KEY (reversed_name, reference_builtin, reference_component_hash, reference_component_index)
);
|]
-- This index allows finding all names we need to consider within a given namespace for
-- suffixification of a name.
-- It may seem strange to use last_name_segment rather than a suffix search over reversed_name name here;
-- but SQLite will only optimize for a single prefix-glob at once, so we can't glob search
-- over both namespace and reversed_name, but we can EXACT match on last_name_segment and
-- then glob search on the namespace prefix, and have SQLite do the final glob search on
-- reversed_name over rows with a matching last segment without using an index and should be plenty fast.
execute_
[here|
CREATE INDEX IF NOT EXISTS scoped_type_names_by_namespace ON type_name_lookup(root_causal_hash_id, namespace)
CREATE INDEX IF NOT EXISTS scoped_type_names_by_namespace_and_last_name_segment ON type_name_lookup(root_branch_hash_id, last_name_segment, namespace)
|]
-- This index allows us to find all names with a given ref within a specific namespace.
execute_
[here|
CREATE INDEX IF NOT EXISTS scoped_type_name_by_reference_lookup ON type_name_lookup(root_branch_hash_id, reference_builtin, reference_component_hash, reference_component_index, namespace)
|]
-- Allows fetching ALL names within a specific namespace prefix. We currently use this to
-- pretty-print on share, but will be replaced with a more precise set of queries soon.
execute_
[here|
CREATE INDEX IF NOT EXISTS scoped_type_names_by_namespace ON type_name_lookup(root_branch_hash_id, namespace)
|]
copyScopedNameLookup :: BranchHashId -> BranchHashId -> Transaction ()
copyScopedNameLookup fromBHId toBHId = do
execute termsCopySql (toBHId, fromBHId)
execute typesCopySql (toBHId, fromBHId)
where
termsCopySql =
[here|
INSERT INTO scoped_term_name_lookup
SELECT ?, reversed_name, last_name_segment, namespace, referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index
FROM scoped_term_name_lookup
WHERE root_branch_hash_id = ?
|]
typesCopySql =
[here|
INSERT INTO scoped_type_name_lookup
SELECT ?, reversed_name, last_name_segment, namespace, reference_builtin, reference_component_hash, reference_component_index
FROM scoped_type_name_lookup
WHERE root_branch_hash_id = ?
|]
-- | Check if we've already got an index for the desired root branch hash.
checkBranchHashNameLookupExists :: BranchHashId -> Transaction Bool
checkBranchHashNameLookupExists hashId = do
queryOneCol sql (Only hashId)
where
sql =
[here|
SELECT EXISTS (
SELECT 1
FROM name_lookups
WHERE root_branch_hash_id = ?
LIMIT 1
)
|]
-- | Insert the given set of term names into the name lookup table
insertTermNames :: [NamedRef (Referent.TextReferent, Maybe NamedRef.ConstructorType)] -> Transaction ()
insertTermNames names = do
@ -1704,6 +1803,18 @@ insertTermNames names = do
ON CONFLICT DO NOTHING
|]
-- | Insert the given set of type names into the name lookup table
insertTypeNames :: [NamedRef (Reference.TextReference)] -> Transaction ()
insertTypeNames names =
executeMany sql (NamedRef.toRowWithNamespace <$> names)
where
sql =
[here|
INSERT INTO type_name_lookup (reversed_name, reference_builtin, reference_component_hash, reference_component_index, namespace)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT DO NOTHING
|]
-- | Remove the given set of term names into the name lookup table
removeTermNames :: [NamedRef Referent.TextReferent] -> Transaction ()
removeTermNames names = do
@ -1735,6 +1846,71 @@ removeTypeNames names = do
AND reference_component_index IS ?
|]
-- | Insert the given set of term names into the name lookup table
insertScopedTermNames :: BranchHashId -> [NamedRef (Referent.TextReferent, Maybe NamedRef.ConstructorType)] -> Transaction ()
insertScopedTermNames bhId names = do
executeMany sql (namedRefToRow <$> names)
where
namedRefToRow :: NamedRef (S.Referent.TextReferent, Maybe NamedRef.ConstructorType) -> (Only BranchHashId :. [SQLData])
namedRefToRow namedRef =
namedRef
& fmap refToRow
& NamedRef.namedRefToScopedRow
& \nr -> (Only bhId :. nr)
refToRow :: (Referent.TextReferent, Maybe NamedRef.ConstructorType) -> (Referent.TextReferent :. Only (Maybe NamedRef.ConstructorType))
refToRow (ref, ct) = ref :. Only ct
sql =
[here|
INSERT INTO scoped_term_name_lookup (root_branch_hash_id, reversed_name, namespace, last_name_segment, referent_builtin, referent_component_hash, referent_component_index, referent_constructor_index, referent_constructor_type)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT DO NOTHING
|]
-- | Insert the given set of type names into the name lookup table
insertScopedTypeNames :: BranchHashId -> [NamedRef (Reference.TextReference)] -> Transaction ()
insertScopedTypeNames bhId names =
executeMany sql ((Only bhId :.) . NamedRef.namedRefToScopedRow <$> names)
where
sql =
[here|
INSERT INTO type_name_lookup (root_branch_hash_id, reversed_name, namespace, last_name_segment, reference_builtin, reference_component_hash, reference_component_index)
VALUES (?, ?, ?, ?, ?, ?, ?)
ON CONFLICT DO NOTHING
|]
-- | Remove the given set of term names into the name lookup table
removeScopedTermNames :: BranchHashId -> [NamedRef Referent.TextReferent] -> Transaction ()
removeScopedTermNames bhId names = do
executeMany sql ((Only bhId :.) <$> names)
where
sql =
[here|
DELETE FROM term_name_lookup
WHERE
root_branch_hash_id IS ?
AND reversed_name IS ?
AND referent_builtin IS ?
AND referent_component_hash IS ?
AND referent_component_index IS ?
AND referent_constructor_index IS ?
|]
-- | Remove the given set of term names into the name lookup table
removeScopedTypeNames :: BranchHashId -> [NamedRef (Reference.TextReference)] -> Transaction ()
removeScopedTypeNames bhId names = do
executeMany sql ((Only bhId :.) <$> names)
where
sql =
[here|
DELETE FROM type_name_lookup
WHERE
root_branch_hash_id IS ?
AND reversed_name IS ?
AND reference_builtin IS ?
AND reference_component_hash IS ?
AND reference_component_index IS ?
|]
-- | We need to escape any special characters for globbing.
--
-- >>> globEscape "Nat.*.doc"
@ -1792,18 +1968,6 @@ getNamespaceDefinitionCount namespace = do
)
|]
-- | Insert the given set of type names into the name lookup table
insertTypeNames :: [NamedRef (Reference.TextReference)] -> Transaction ()
insertTypeNames names =
executeMany sql (NamedRef.toRowWithNamespace <$> names)
where
sql =
[here|
INSERT INTO type_name_lookup (reversed_name, reference_builtin, reference_component_hash, reference_component_index, namespace)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT DO NOTHING
|]
-- | Get the list of a term names in the root namespace according to the name lookup index
rootTermNamesByPath :: Maybe Text -> Transaction [NamedRef (Referent.TextReferent, Maybe NamedRef.ConstructorType)]
rootTermNamesByPath mayNamespace = do

View File

@ -655,6 +655,47 @@ updateNameLookupIndex getDeclType pathPrefix mayFromBranchHash toBranchHash = do
ct <- getDeclType ref
pure (referent, Just $ Cv.constructorType1to2 ct)
-- | Add an index for the provided causal hash.
ensureNameLookupForCausalHash ::
(C.Reference.Reference -> Sqlite.Transaction CT.ConstructorType) ->
-- | An optional branch which we already have an index for.
-- If provided, we can build the name index much faster by copying the index then computing only the changes we need to make between the two indexes.
Maybe CausalHash ->
BranchHash ->
Sqlite.Transaction ()
ensureNameLookupForCausalHash getDeclType mayFromBranchHash toBranchHash = do
Ops.checkBranchHashNameLookupExists toBranchHash >>= \case
True -> pure ()
False -> do
fromBranch <- case mayFromBranchHash of
Nothing -> pure V2Branch.empty
Just fromBH -> do
Ops.checkBranchHashNameLookupExists fromBH >>= \case
True -> Ops.expectBranchByBranchHash fromBH
False -> pure V2Branch.empty
toBranch <- Ops.expectBranchByBranchHash toBranchHash
treeDiff <- BranchDiff.diffBranches fromBranch toBranch
let namePrefix = case pathPrefix of
Path.Empty -> Nothing
(p Path.:< ps) -> Just $ Name.fromSegments (p :| Path.toList ps)
let BranchDiff.NameChanges {termNameAdds, termNameRemovals, typeNameAdds, typeNameRemovals} = BranchDiff.nameChanges namePrefix treeDiff
termNameAddsWithCT <- do
for termNameAdds \(name, ref) -> do
refWithCT <- addReferentCT ref
pure $ toNamedRef (name, refWithCT)
Ops.buildNameLookupForBranchHash mayFromBranchHash toBranchHash (termNameAddsWithCT, toNamedRef <$> termNameRemovals) (toNamedRef <$> typeNameAdds, toNamedRef <$> typeNameRemovals)
where
inferStartBranch :: BranchHash -> Sqlite.Transaction (Maybe BranchHash)
inferStartBranch = _
toNamedRef :: (Name, ref) -> S.NamedRef ref
toNamedRef (name, ref) = S.NamedRef {reversedSegments = coerce $ Name.reverseSegments name, ref = ref}
addReferentCT :: C.Referent.Referent -> Transaction (C.Referent.Referent, Maybe C.Referent.ConstructorType)
addReferentCT referent = case referent of
C.Referent.Ref {} -> pure (referent, Nothing)
C.Referent.Con ref _conId -> do
ct <- getDeclType ref
pure (referent, Just $ Cv.constructorType1to2 ct)
-- | Compute the root namespace names index which is used by the share server for serving api
-- requests. Using 'updateNameLookupIndex' is preferred whenever possible, since it's
-- considerably faster. This can be used to reset the index if it ever gets out of sync due to