mirror of
https://github.com/digital-asset/daml.git
synced 2024-09-20 01:07:18 +03:00
Fix packaging performance (#6350)
fixes #3150 This PR introduces a patch to GHC to fix the performance of the pattern match checker in the presence of multiple packages which is currently significantly (orders of magnitude) slower than having everything in a single package. I also added a test case that hits this. Here’s what you need to hit this issue: 1. A typeclass with a functional dependency. `HasField` is the obvious candidate for this. 2. A lot of instances of this typeclass in a separate package (this is the only part where the separate package matters). 3. A reasonably large ADT with a bunch of strict fields. 4. A pattern match in the context of some constraints of the typeclass. The constraints can be completely unused. In that case, you will get a significant slowdown in the number of instances, number of constructors and number of constraints (didn’t verify if it’s linear but it is significant which is all that matters). Here’s why this happens: 1. The pattern match checker checks for strict fields if the type is inhabited. 2. This calls `pmTopNormaliseType_maybe` to normalize a type (the details don’t matter) which in turn calls into the typechecker. This function is called very often (presumably linear in the number of constructors but didn’t verify.) 3. The typechecker has some logic in `improveFromInstEnv` for generating additional equations by unifying functional dependencies `a -> b` with constraints in scope and thereby deducing information about `b`. 4. In the pattern match checker the list of instances of the home package is empty since the pattern match checker (apparently) doesn’t actually care about those extra equations. However, the list of instances in the EPS is not empty. This is the issue here: By moving it to an external package we suddenly end up with thousands of instances that we try to unify with the functional dependencies every time we normalize which happens very often. Proposed fix: The solution is rather simple: Since the pattern match checker apparently does not care about the instances of the home package, it almost certainly doesn’t care about instances in general so we just empty the instances of external packages explicitly. Is the fix correct? 1. I verified that the GHC test suite passes with this patch which gives me a reasonable level of confidence. 2. I verified that our own test suite passes. 3. The most dodgy part is actually emptying the instance since the whole EPS stuff is a mutable mess. What could in theory happen is that the PM ends up loading an interface file that mutates this again. However, afaiu it is impossible for the PM to need an interface that the typechecker didnt already need. I did do a bunch of debugging and this is exactly what I observed in my experiments. Alternative ideas and upstreaming: The other option would be to not try and mess with the EPS but somehow have a conditional flag somewhere in the typechecker env to disable this logic in the pattern match checker. However, that sounds significantly more complex so I don’t think it’s worth the effort. GHC 8.10 has a new pattern match checker that has different performance characteristics and seems to do much better here so there is little reason to try and upstream this. I strongly want to avoid upgrading DAML to 8.10 at this point (too much risk, let’s wait until things calm down) changelog_begin - [DAML Compiler] Fix an issue where compilation slowed down significantly when code was split up into several packages. See https://github.com/digital-asset/daml/issues/3150 changelog_end
This commit is contained in:
parent
ea9e09278d
commit
a178f62613
@ -12,7 +12,7 @@ jobs:
|
||||
variables:
|
||||
ghc-lib-sha: 'd2eadb7ef4a6d4f33a6a04056d2e57f11a59ef0f'
|
||||
base-sha: '9c787d4d24f2b515934c8503ee2bbd7cfac4da20'
|
||||
patches: '4650347db9a7e8c65831ab83b80da43b28c3deba 833ca63be2ab14871874ccb6974921e8952802e9'
|
||||
patches: 'd1e1a671bb0e653106c5bce23bfd0f4e1660c503 833ca63be2ab14871874ccb6974921e8952802e9'
|
||||
flavor: 'ghc-8.8.1'
|
||||
steps:
|
||||
- checkout: self
|
||||
|
@ -442,6 +442,38 @@ damlc_compile_test(
|
||||
stack_limit = "100K" if is_windows else "70K",
|
||||
)
|
||||
|
||||
da_haskell_binary(
|
||||
name = "generate-package-pattern-match",
|
||||
srcs = ["src/DA/Test/GeneratePackagePatternMatch.hs"],
|
||||
hackage_deps = [
|
||||
"base",
|
||||
"directory",
|
||||
"filepath",
|
||||
],
|
||||
main_function = "DA.Test.GeneratePackagePatternMatch.main",
|
||||
src_strip_prefix = "src",
|
||||
)
|
||||
|
||||
# This is an SH test since damlc_compile_test does not support packages so far.
|
||||
sh_test(
|
||||
name = "package-pattern-match-perf",
|
||||
srcs = ["src/package-pattern-match-perf.sh"],
|
||||
args = [
|
||||
"$(location //compiler/damlc)",
|
||||
"$(location :generate-package-pattern-match)",
|
||||
],
|
||||
data = [
|
||||
":generate-package-pattern-match",
|
||||
"//compiler/damlc",
|
||||
],
|
||||
toolchains = [
|
||||
"@rules_sh//sh/posix:make_variables",
|
||||
],
|
||||
deps = [
|
||||
"@bazel_tools//tools/bash/runfiles",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "daml-test-files",
|
||||
srcs = glob(["daml-test-files/**"]),
|
||||
|
@ -0,0 +1,80 @@
|
||||
-- Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
|
||||
-- SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
module DA.Test.GeneratePackagePatternMatch (main) where
|
||||
|
||||
import System.Directory
|
||||
import System.Environment
|
||||
import System.FilePath
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
[dir, numConstrs, numTys] <- getArgs
|
||||
removePathForcibly (dir </> "main")
|
||||
removePathForcibly (dir </> "dep")
|
||||
createDirectoryIfMissing True (dir </> "main")
|
||||
createDirectoryIfMissing True (dir </> "dep")
|
||||
writeFile (dir </> "dep" </> "Dep.daml") (depMod numTys)
|
||||
writeFile (dir </> "dep" </> "ADT.daml") (adtMod numConstrs)
|
||||
writeFile (dir </> "dep" </> "daml.yaml") (damlYaml "dep" "." [])
|
||||
writeFile (dir </> "main" </> "Main.daml") mainMod
|
||||
writeFile (dir </> "main" </> "daml.yaml") (damlYaml "main" "Main.daml" ["../dep/.daml/dist/dep-0.0.1.dar"])
|
||||
where
|
||||
damlYaml name src deps = unlines $
|
||||
[ "sdk-version: 0.0.0"
|
||||
, "name: " <> name
|
||||
, "version: 0.0.1"
|
||||
, "source: " <> src
|
||||
, "dependencies:"
|
||||
, " - daml-stdlib"
|
||||
, " - daml-prim"
|
||||
] <> map (" - " <>) deps
|
||||
-- C is basically HasField but I don’t want to rely on our custom HasField and the
|
||||
-- record dot preprocessor.
|
||||
depMod numTys = unlines $
|
||||
[ "module Dep where"
|
||||
, "data A"
|
||||
, "data B"
|
||||
, "data C"
|
||||
, "data D"
|
||||
, "data E"
|
||||
, "class Cl s a b | s a -> b where"
|
||||
] <> concat
|
||||
[ [ "data " <> ty <> " = " <> ty <> " {}"
|
||||
, "instance Cl A " <> ty <> " ()"
|
||||
, "instance Cl B " <> ty <> " ()"
|
||||
, "instance Cl C " <> ty <> " ()"
|
||||
, "instance Cl D " <> ty <> " ()"
|
||||
, "instance Cl E " <> ty <> " ()"
|
||||
]
|
||||
| i <- [ 1 :: Int .. read numTys ]
|
||||
, let ty = "X" <> show i
|
||||
]
|
||||
adtMod numConstrs = unlines $
|
||||
[ "module ADT (T(..)) where"
|
||||
, "data Opaque = Opaque"
|
||||
, "data Arg = Arg Opaque"
|
||||
, "data T = C0 Arg"
|
||||
] <>
|
||||
[ " | C" <> show i <> " Arg"
|
||||
| i <- [ 1 :: Int .. (read numConstrs - 1) ]
|
||||
]
|
||||
mainMod = unlines
|
||||
[ "module Main where"
|
||||
, "import Dep (Cl, A, B, C, D, E)"
|
||||
, "import qualified ADT"
|
||||
, "f : forall a."
|
||||
, " ( Cl A a ()"
|
||||
, " , Cl B a ()"
|
||||
, " , Cl C a ()"
|
||||
, " , Cl D a ()"
|
||||
, " , Cl E a ()"
|
||||
, " ) => ADT.T -> a"
|
||||
, "f x = case x of"
|
||||
, " ADT.C5 _ -> error \"a\""
|
||||
, " ADT.C6 _ -> error \"b\""
|
||||
, " ADT.C7 _ -> error \"c\""
|
||||
, " ADT.C8 _ -> error \"d\""
|
||||
, " e -> error \"e\""
|
||||
]
|
||||
|
31
compiler/damlc/tests/src/package-pattern-match-perf.sh
Executable file
31
compiler/damlc/tests/src/package-pattern-match-perf.sh
Executable file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Copy-pasted from the Bazel Bash runfiles library v2.
|
||||
set -uo pipefail; f=bazel_tools/tools/bash/runfiles/runfiles.bash
|
||||
source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
|
||||
source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
|
||||
source "$0.runfiles/$f" 2>/dev/null || \
|
||||
source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
|
||||
source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
|
||||
{ echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
|
||||
# --- end runfiles.bash initialization v2 ---
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DAMLC="$(rlocation "$TEST_WORKSPACE/$1")"
|
||||
GENERATE="$(rlocation "$TEST_WORKSPACE/$2")"
|
||||
|
||||
DIR=$(mktemp -d)
|
||||
trap "rm -rf $DIR" EXIT
|
||||
|
||||
$GENERATE "$DIR" 2000 2000
|
||||
|
||||
$DAMLC build --project-root $DIR/dep
|
||||
|
||||
# Note that we don’t seem to hit the limits here since the slowness
|
||||
# does not come with larger memomry or stack usage. However, the slow
|
||||
# down is so big that this times out the 300s Bazel timeout whereas
|
||||
# the fixed version runs in about 30s.
|
||||
$DAMLC build --project-root $DIR/main +RTS -s -M100M -K1M -N1
|
@ -3,10 +3,10 @@
|
||||
|
||||
resolver: lts-14.1
|
||||
packages:
|
||||
- archive: https://daml-binaries.da-ext.net/da-ghc-lib/ghc-lib-48295232a0bebf0bf80b90447d0f5890.tar.gz
|
||||
sha256: "32ef1e2aebdd473681e38fbbf4747b88a1e03d00ea90adb9917b613659acad9f"
|
||||
- archive: https://daml-binaries.da-ext.net/da-ghc-lib/ghc-lib-parser-48295232a0bebf0bf80b90447d0f5890.tar.gz
|
||||
sha256: "dbaaca05c677794261e102f1a7d6a4409d368a47feb214b6378fa1169d269ec3"
|
||||
- archive: https://daml-binaries.da-ext.net/da-ghc-lib/ghc-lib-942755169bad510701179c8c40cddd3e.tar.gz
|
||||
sha256: "367f779d564e01a98fe7e9c7f1d7c1a7936759aae9c273eb2dfc6f1ed1c51d07"
|
||||
- archive: https://daml-binaries.da-ext.net/da-ghc-lib/ghc-lib-parser-942755169bad510701179c8c40cddd3e.tar.gz
|
||||
sha256: "bf17b50a11369b67237e6e0e22589f7ead7b482d05e0e3c68654c9be5b3d3fac"
|
||||
- github: digital-asset/hlint
|
||||
commit: "3e78bce69749b22a80fec1e8eb853cc0c100c18e"
|
||||
sha256: "cf39f2b378485afc77ffdad4dbb057d5d9b4dfc5a38c76ddc44e920e537fb0fa"
|
||||
|
Loading…
Reference in New Issue
Block a user