damlc: incremental package db initialization (#8541)

* damlc: incremental package db initialization

We keep a hash over all dependencies of a project in the package
database metadata and only recompute the package database if a
dependency changes, i.e. the computed hash changes.

Fixes #4413.
Fixes #8409.

CHANGELOG_BEGIN
CHANGELOG_END

* using Fingerprint

* added tests

* format

* use SdkVersion instead of hardcoded version

* added a reference in tests

* factored out project file template
This commit is contained in:
Robin Krom 2021-01-20 15:52:37 +01:00 committed by GitHub
parent a66a3177dd
commit aa8c1f8fee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 376 additions and 104 deletions

View File

@ -1,11 +1,14 @@
-- Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
-- SPDX-License-Identifier: Apache-2.0
{-# OPTIONS_GHC -Wno-orphans #-}
module DA.Daml.Options.Packaging.Metadata
( PackageDbMetadata(..),
writeMetadata,
readMetadata,
renamingToFlag,
metadataFile,
) where
import Data.Aeson
@ -23,15 +26,13 @@ import Development.IDE.Types.Location
import GHC.Generics
import qualified "ghc-lib-parser" Module as Ghc
import System.FilePath
import GHC.Fingerprint
-- | Metadata about an initialized package db. We write this to a JSON
-- file in the package db after the package db has been initialized.
--
-- While we can technically reconstruct all this information by
-- reading the DARs again, this is unnecessarily wasteful.
--
-- In the future, we should also be able to include enough metadata in here
-- to decide whether we need to reinitialize the package db or not.
data PackageDbMetadata = PackageDbMetadata
{ directDependencies :: [Ghc.UnitId]
-- ^ Unit ids of direct dependencies. These are exposed by default
@ -40,8 +41,15 @@ data PackageDbMetadata = PackageDbMetadata
-- We do not bother differentiating between exposed and unexposed modules
-- since we already warn on non-exposed modules anyway and this
-- is intended for data-dependencies where everything is exposed.
, fingerprintDependencies :: Fingerprint
-- ^ Hash over all dependency dars. We use this to check whether we need to reinitialize the
-- package db or not.
} deriving Generic
deriving instance Generic Fingerprint
instance ToJSON Fingerprint
instance FromJSON Fingerprint
renamingToFlag :: Ghc.UnitId -> Ghc.ModuleName -> [LF.ModuleName] -> PackageFlag
renamingToFlag unitId prefix modules =
ExposePackage

View File

@ -8,6 +8,7 @@ module DA.Cli.Damlc.Packaging
) where
import qualified "zip-archive" Codec.Archive.Zip as ZipArchive
import Control.Exception.Safe (tryIO)
import Control.Lens (toListOf)
import Control.Monad.Extra
import Control.Monad.Trans.Maybe
@ -27,6 +28,7 @@ import Development.IDE.Core.Service (runActionSync)
import Development.IDE.GHC.Util (hscEnv)
import Development.IDE.Types.Location
import "ghc-lib-parser" DynFlags (DynFlags)
import GHC.Fingerprint
import "ghc-lib-parser" HscTypes as GHC
import "ghc-lib-parser" Module (UnitId, unitIdString)
import qualified Module as GHC
@ -34,8 +36,8 @@ import qualified "ghc-lib-parser" Packages as GHC
import System.Directory.Extra
import System.Exit
import System.FilePath
import System.Info.Extra
import System.IO.Extra
import System.Info.Extra
import System.Process (callProcess)
import "ghc-lib-parser" UniqSet
@ -70,120 +72,121 @@ import SdkVersion
-- and then remap references to those dummy packages to the original DAML-LF
-- package id.
createProjectPackageDb :: NormalizedFilePath -> Options -> PackageSdkVersion -> MS.Map UnitId GHC.ModuleName -> [FilePath] -> [FilePath] -> IO ()
createProjectPackageDb projectRoot (disableScenarioService -> opts) (PackageSdkVersion thisSdkVer) modulePrefixes deps dataDeps
| null dataDeps && all (`elem` basePackages) deps =
-- Initializing the package db is expensive since it requires calling GenerateStablePackages and GeneratePackageMap.
--Therefore we only do it if we actually have a dependency.
clearPackageDb
| otherwise = do
clearPackageDb
deps <- expandSdkPackages (optDamlLfVersion opts) (filter (`notElem` basePackages) deps)
depsExtracted <- mapM extractDar deps
createProjectPackageDb projectRoot (disableScenarioService -> opts) (PackageSdkVersion thisSdkVer) modulePrefixes deps' dataDeps
= do
deps <- expandSdkPackages (optDamlLfVersion opts) (filter (`notElem` basePackages) deps')
(needsReinitalization, depsFingerprint)
<- dbNeedsReinitialization projectRoot deps thisSdkVer (show $ optDamlLfVersion opts)
when needsReinitalization $ do
clearPackageDb
depsExtracted <- mapM extractDar deps
let uniqSdkVersions = nubSort $ thisSdkVer : map edSdkVersions depsExtracted
let depsSdkVersions = map edSdkVersions depsExtracted
unless (all (== thisSdkVer) depsSdkVersions) $
fail $
"Package dependencies from different SDK versions: " ++
intercalate ", " uniqSdkVersions
let uniqSdkVersions = nubSort $ thisSdkVer : map edSdkVersions depsExtracted
let depsSdkVersions = map edSdkVersions depsExtracted
unless (all (== thisSdkVer) depsSdkVersions) $
fail $
"Package dependencies from different SDK versions: " ++
intercalate ", " uniqSdkVersions
-- Register deps at the very beginning. This allows data-dependencies to
-- depend on dependencies which is necessary so that we can reconstruct typeclass
-- instances for a typeclass defined in a library.
-- It does mean that we cant have a dependency from a dependency on a
-- data-dependency but that seems acceptable.
-- See https://github.com/digital-asset/daml/issues/4218 for more details.
-- TODO Enforce this with useful error messages
forM_ depsExtracted $
-- We only have the interface files for the main DALF in a `dependency` so we
-- also only extract the main dalf.
\ExtractedDar{..} -> installDar dbPath edConfFiles edMain edSrcs
-- Register deps at the very beginning. This allows data-dependencies to
-- depend on dependencies which is necessary so that we can reconstruct typeclass
-- instances for a typeclass defined in a library.
-- It does mean that we cant have a dependency from a dependency on a
-- data-dependency but that seems acceptable.
-- See https://github.com/digital-asset/daml/issues/4218 for more details.
-- TODO Enforce this with useful error messages
forM_ depsExtracted $
-- We only have the interface files for the main DALF in a `dependency` so we
-- also only extract the main dalf.
\ExtractedDar{..} -> installDar dbPath edConfFiles edMain edSrcs
loggerH <- getLogger opts "generate package maps"
mbRes <- withDamlIdeState opts loggerH diagnosticsLogger $ \ide -> runActionSync ide $ runMaybeT $
(,) <$> useNoFileE GenerateStablePackages
<*> useE GeneratePackageMap projectRoot
(stablePkgs, PackageMap dependenciesInPkgDb) <- maybe (fail "Failed to generate package info") pure mbRes
let stablePkgIds :: Set LF.PackageId
stablePkgIds = Set.fromList $ map LF.dalfPackageId $ MS.elems stablePkgs
let dependenciesInPkgDbIds =
Set.fromList $ map LF.dalfPackageId $ MS.elems dependenciesInPkgDb
loggerH <- getLogger opts "generate package maps"
mbRes <- withDamlIdeState opts loggerH diagnosticsLogger $ \ide -> runActionSync ide $ runMaybeT $
(,) <$> useNoFileE GenerateStablePackages
<*> useE GeneratePackageMap projectRoot
(stablePkgs, PackageMap dependenciesInPkgDb) <- maybe (fail "Failed to generate package info") pure mbRes
let stablePkgIds :: Set LF.PackageId
stablePkgIds = Set.fromList $ map LF.dalfPackageId $ MS.elems stablePkgs
let dependenciesInPkgDbIds =
Set.fromList $ map LF.dalfPackageId $ MS.elems dependenciesInPkgDb
-- Now handle data-dependencies.
darsFromDataDependencies <- getDarsFromDataDependencies dependenciesInPkgDbIds dataDeps
let dalfsFromDataDependencies = concatMap dalfs darsFromDataDependencies
-- Now handle data-dependencies.
darsFromDataDependencies <- getDarsFromDataDependencies dependenciesInPkgDbIds dataDeps
let dalfsFromDataDependencies = concatMap dalfs darsFromDataDependencies
-- All transitive packages from DARs specified in `dependencies`.
-- This is only used for unit-id collision checks
-- and dependencies on newer LF versions.
darsFromDependencies <- getDarsFromDependencies dependenciesInPkgDbIds depsExtracted
let dalfsFromDependencies = concatMap dalfs darsFromDependencies
-- All transitive packages from DARs specified in `dependencies`.
-- This is only used for unit-id collision checks
-- and dependencies on newer LF versions.
darsFromDependencies <- getDarsFromDependencies dependenciesInPkgDbIds depsExtracted
let dalfsFromDependencies = concatMap dalfs darsFromDependencies
let dependencyInfo = DependencyInfo
{ dependenciesInPkgDb
, dalfsFromDependencies
, dalfsFromDataDependencies
, mainUnitIds =
map (decodedUnitId . mainDalf)
(darsFromDataDependencies ++ darsFromDependencies)
}
let dependencyInfo = DependencyInfo
{ dependenciesInPkgDb
, dalfsFromDependencies
, dalfsFromDataDependencies
, mainUnitIds =
map (decodedUnitId . mainDalf)
(darsFromDataDependencies ++ darsFromDependencies)
}
-- We perform this check before checking for unit id collisions
-- since it provides a more useful error message.
whenLeft
(checkForIncompatibleLfVersions (optDamlLfVersion opts) dependencyInfo)
exitWithError
whenLeft
(checkForUnitIdConflicts dependencyInfo)
exitWithError
-- We perform this check before checking for unit id collisions
-- since it provides a more useful error message.
whenLeft
(checkForIncompatibleLfVersions (optDamlLfVersion opts) dependencyInfo)
exitWithError
whenLeft
(checkForUnitIdConflicts dependencyInfo)
exitWithError
-- We run the checks for duplicate unit ids before
-- to avoid blowing up GHC when setting up the GHC session.
exposedModules <- getExposedModules opts projectRoot
-- We run the checks for duplicate unit ids before
-- to avoid blowing up GHC when setting up the GHC session.
exposedModules <- getExposedModules opts projectRoot
let (depGraph, vertexToNode) = buildLfPackageGraph dalfsFromDataDependencies stablePkgs dependenciesInPkgDb
let (depGraph, vertexToNode) = buildLfPackageGraph dalfsFromDataDependencies stablePkgs dependenciesInPkgDb
validatedModulePrefixes <- either exitWithError pure (prefixModules modulePrefixes (dalfsFromDependencies <> dalfsFromDataDependencies))
validatedModulePrefixes <- either exitWithError pure (prefixModules modulePrefixes (dalfsFromDependencies <> dalfsFromDataDependencies))
-- Iterate over the dependency graph in topological order.
-- We do a topological sort on the transposed graph which ensures that
-- the packages with no dependencies come first and we
-- never process a package without first having processed its dependencies.
forM_ (topSort $ transposeG depGraph) $ \vertex ->
let (pkgNode, pkgId) = vertexToNode vertex in
-- stable packages are mapped to the current version of daml-prim/daml-stdlib
-- so we dont need to generate interface files for them.
unless (pkgId `Set.member` stablePkgIds || pkgId `Set.member` dependenciesInPkgDbIds) $ do
let unitIdStr = unitIdString $ unitId pkgNode
let pkgIdStr = T.unpack $ LF.unPackageId pkgId
let (pkgName, mbPkgVersion) = LF.splitUnitId (unitId pkgNode)
let deps =
[ (unitId depPkgNode, dalfPackage depPkgNode)
| (depPkgNode, depPkgId) <- map vertexToNode $ reachable depGraph vertex
, pkgId /= depPkgId
, not (depPkgId `Set.member` stablePkgIds)
]
let workDir = dbPath </> unitIdStr <> "-" <> pkgIdStr
createDirectoryIfMissing True workDir
-- write the dalf package
BS.writeFile (workDir </> unitIdStr <.> "dalf") $ LF.dalfPackageBytes (dalfPackage pkgNode)
-- Iterate over the dependency graph in topological order.
-- We do a topological sort on the transposed graph which ensures that
-- the packages with no dependencies come first and we
-- never process a package without first having processed its dependencies.
forM_ (topSort $ transposeG depGraph) $ \vertex ->
let (pkgNode, pkgId) = vertexToNode vertex in
-- stable packages are mapped to the current version of daml-prim/daml-stdlib
-- so we dont need to generate interface files for them.
unless (pkgId `Set.member` stablePkgIds || pkgId `Set.member` dependenciesInPkgDbIds) $ do
let unitIdStr = unitIdString $ unitId pkgNode
let pkgIdStr = T.unpack $ LF.unPackageId pkgId
let (pkgName, mbPkgVersion) = LF.splitUnitId (unitId pkgNode)
let deps =
[ (unitId depPkgNode, dalfPackage depPkgNode)
| (depPkgNode, depPkgId) <- map vertexToNode $ reachable depGraph vertex
, pkgId /= depPkgId
, not (depPkgId `Set.member` stablePkgIds)
]
let workDir = dbPath </> unitIdStr <> "-" <> pkgIdStr
createDirectoryIfMissing True workDir
-- write the dalf package
BS.writeFile (workDir </> unitIdStr <.> "dalf") $ LF.dalfPackageBytes (dalfPackage pkgNode)
generateAndInstallIfaceFiles
(LF.extPackagePkg $ LF.dalfPackagePkg $ dalfPackage pkgNode)
(stubSources pkgNode)
opts
workDir
dbPath
projectPackageDatabase
pkgId
pkgName
mbPkgVersion
deps
dependenciesInPkgDb
exposedModules
generateAndInstallIfaceFiles
(LF.extPackagePkg $ LF.dalfPackagePkg $ dalfPackage pkgNode)
(stubSources pkgNode)
opts
workDir
dbPath
projectPackageDatabase
pkgId
pkgName
mbPkgVersion
deps
dependenciesInPkgDb
exposedModules
writeMetadata projectRoot (PackageDbMetadata (mainUnitIds dependencyInfo) validatedModulePrefixes)
writeMetadata
projectRoot
(PackageDbMetadata (mainUnitIds dependencyInfo) validatedModulePrefixes depsFingerprint)
where
dbPath = projectPackageDatabase </> lfVersionString (optDamlLfVersion opts)
clearPackageDb = do
@ -194,6 +197,23 @@ createProjectPackageDb projectRoot (disableScenarioService -> opts) (PackageSdkV
removePathForcibly dbPath
createDirectoryIfMissing True $ dbPath </> "package.conf.d"
-- | Compute the hash over all dependencies and compare it to the one stored in the metadata file in
-- the package db to decide whether to run reinitialization or not.
dbNeedsReinitialization ::
NormalizedFilePath -> [FilePath] -> String -> String -> IO (Bool, Fingerprint)
dbNeedsReinitialization projectRoot allDeps sdkVersion damlLfVersion = do
fileFingerprints <- mapM getFileHash allDeps
let sdkVersionFingerprint = fingerprintString sdkVersion
let damlLfFingerprint = fingerprintString damlLfVersion
let depsFingerprint =
fingerprintFingerprints $ sdkVersionFingerprint : damlLfFingerprint : fileFingerprints
-- Read the metadata of an already existing package database and see if wee need to reinitialize.
errOrmetaData <- tryIO $ readMetadata projectRoot
pure $
case errOrmetaData of
Left _err -> (True, depsFingerprint)
Right metaData -> (fingerprintDependencies metaData /= depsFingerprint, depsFingerprint)
disableScenarioService :: Options -> Options
disableScenarioService opts = opts
{ optScenarioService = EnableScenarioService False

View File

@ -337,6 +337,36 @@ da_haskell_test(
],
)
# Tests for incremental package-db
da_haskell_test(
name = "incremental-package-db",
srcs = ["src/DA/Test/IncrementalPackageDb.hs"],
data = [
"//compiler/damlc",
"//daml-lf/repl",
],
hackage_deps = [
"base",
"containers",
"directory",
"extra",
"filepath",
"process",
"tasty",
"tasty-hunit",
"ghcide",
],
main_function = "DA.Test.IncrementalPackageDb.main",
src_strip_prefix = "src",
visibility = ["//visibility:public"],
deps = [
"//:sdk-version-hs-lib",
"//compiler/damlc/daml-opts:daml-opts-types",
"//libs-haskell/bazel-runfiles",
"//libs-haskell/test-utils",
],
)
# Tests for packaging
da_haskell_test(
name = "packaging",

View File

@ -0,0 +1,214 @@
-- Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
-- SPDX-License-Identifier: Apache-2.0
module DA.Test.IncrementalPackageDb (main) where
import Control.Monad.Extra
import DA.Bazel.Runfiles
import Data.Foldable
import System.Directory.Extra
import System.FilePath
import System.IO.Extra
import DA.Test.Process
import Test.Tasty
import Test.Tasty.HUnit
import DA.Daml.Options.Packaging.Metadata (metadataFile)
import Development.IDE.Types.Location
import SdkVersion
newtype ExpectReinitialization = ExpectReinitialization Bool
main :: IO ()
main = do
damlc <- locateRunfiles (mainWorkspace </> "compiler" </> "damlc" </> exe "damlc")
defaultMain $ tests damlc
tests :: FilePath -> TestTree
tests damlc =
testGroup
"Incremental package db initialization"
[ test
"Reinitialize when dependency changes"
[ projectFile "proj" ["dependency/dep.dar"]
, ("daml/A.daml"
, unlines
[ "module A where"
, "import B"
, "x : Int"
, "x = f 1"])
]
[("daml/A.daml"
, unlines
[ "module A where"
, "import B"
, "x : Int"
, "x = f 1 + g 2"])
]
[ projectFile "dep" []
, ( "daml/B.daml"
, unlines
[ "module B where"
, "template T"
, " with"
, " p: Party"
, " where"
, " signatory p"
, "f : Int -> Int"
, "f i = i + 1"
])
]
[ ( "daml/B.daml"
, unlines
[ "module B where"
, "template T"
, " with"
, " p: Party"
, " where"
, " signatory p"
, "f : Int -> Int"
, "f i = i + 2"
, "g : Int -> Int"
, "g i = i + 3"
])
]
(ExpectReinitialization True)
(ShouldSucceed True)
, test
"Reinitialize when dependency is added"
[ projectFile "proj" []
, ("daml/A.daml", unlines [ "module A where"])
]
[ projectFile "proj" ["dependency/dep.dar"]
, ("daml/A.daml", unlines
[ "module A where"
, "import B"
, "g x = f x + 1"
])
]
[ projectFile "dep" []
, ( "daml/B.daml"
, unlines
[ "module B where"
, "template T"
, " with"
, " p: Party"
, " where"
, " signatory p"
, "f : Int -> Int"
, "f i = i + 1"
])
]
[]
(ExpectReinitialization True)
(ShouldSucceed True)
, test
"Fail when dependency is removed"
[ projectFile "proj" ["dependency/dep.dar"]
, ("daml/A.daml"
, unlines
["module A where"
, "import B"
, "x : Int"
, "x = f 1"])
]
[ projectFile "proj" []
]
[ ( "daml.yaml"
, unlines
[ "sdk-version: " <> sdkVersion
, "name: dep"
, "source: daml"
, "version: 0.0.1"
, "dependencies:"
, " - daml-prim"
, " - daml-stdlib"
])
, ( "daml/B.daml"
, unlines
[ "module B where"
, "template T"
, " with"
, " p: Party"
, " where"
, " signatory p"
, "f : Int -> Int"
, "f i = i + 1"
])
]
[]
(ExpectReinitialization True)
(ShouldSucceed False)
, test
"No reinitialization when nothing changes"
[ projectFile "proj" ["dependency/dep.dar"]
, ("daml/A.daml"
, unlines
["module A where"
, "import B"])
]
[]
[ projectFile "dep" []
, ( "daml/B.daml"
, unlines
[ "module B where"
, "template T"
, " with"
, " p: Party"
, " where"
, " signatory p"
, "f : Int -> Int"
, "f i = i + 1"
])
]
[]
(ExpectReinitialization False)
(ShouldSucceed True)
]
where
test ::
String
-> [(FilePath, String)]
-> [(FilePath, String)]
-> [(FilePath, String)]
-> [(FilePath, String)]
-> ExpectReinitialization
-> ShouldSucceed
-> TestTree
test name proj projModification dependency dependencyModification (ExpectReinitialization expectReinitialization) (ShouldSucceed shouldSucceed) =
testCase name $
withTempDir $ \dir -> do
let depDir = dir </> "dependency"
let metaFp = metadataFile $ toNormalizedFilePath' dir
let dar = dir </> "out.dar"
let depDar = dir </> "dependency" </> "dep.dar"
writeFiles dir proj
writeFiles depDir dependency
callProcessSilent damlc ["build", "--project-root", depDir, "-o", depDar]
callProcessSilent damlc ["build", "--project-root", dir, "-o", dar]
metaModTime <- getModificationTime metaFp
writeFiles dir projModification
unless (null dependencyModification) $ do
writeFiles depDir dependencyModification
callProcessSilent damlc ["build", "--project-root", depDir, "-o", depDar]
if shouldSucceed
then callProcessSilent damlc ["build", "--project-root", dir, "-o", dar]
else callProcessSilentError damlc ["build", "--project-root", dir, "-o", dar]
newMetaModTime <- getModificationTime metaFp
when expectReinitialization $
assertBool "package-db was not re-initialized" $ newMetaModTime /= metaModTime
writeFiles dir fs =
for_ fs $ \(file, content) -> do
createDirectoryIfMissing True (takeDirectory $ dir </> file)
writeFileUTF8 (dir </> file) content
projectFile name deps =
( "daml.yaml"
, unlines $
[ "sdk-version: " <> sdkVersion
, "name: " <> name
, "source: daml"
, "version: 0.0.1"
, "dependencies:"
, " - daml-prim"
, " - daml-stdlib"
] ++
[" - " <> dep | dep <- deps])