DAML-LF: Add interning for type to DAML-LF 1.dev (#7893)

* DAML-LF: Add interning for type to DAML-LF 1.dev

We add two new features to DAML-LF 1.dev:

* a per package list (or table) of `Type` messages, and
* a new case in the `Type` message which is an index into this table.

In combination, these two features can be used to allow DAML-LF
encoders to perform hash-consing of `Type` messages. We also change the
Haskell implementation of our DAML-LF encoder to do exactly that when
targetting DAML-LF 1.dev.

Doing this has a few benefits:

1. The DALFs produced by `damlc` get smaller: I've seen a case where
   the size dropped from 69MB to 45MB.
2. DAML-LF decoders need to decode less data.
3. Decoded packages use less memory because identical structures are
   now shared. This is particularly helpful in situations where we need
   to keep the interface (or signature) of a package in memory for a
   long time.

This PR mostly takes care of the Haskell implementation. However, we
need to make the Scala implementation of the decoder aware of the new
features as well since we have tests that load DAML-LF 1.dev into the
engine. A decoder and _targeted_ tests on the Scala side will follow
in a separate PR.

CHANGELOG_BEGIN
CHANGELOG_END

* Make jq tests aware of type interning

CHANGELOG_BEGIN
CHANGELOG_END

* Improve jq test

CHANGELOG_BEGIN
CHANGELOG_END

* Apply Remy's suggestions

Co-authored-by: Remy <remy.haemmerle@daml.com>

* Improve the imperative bits

CHANGELOG_BEGIN
CHANGELOG_END

Co-authored-by: Remy <remy.haemmerle@daml.com>
This commit is contained in:
Martin Huschenbett 2020-11-18 12:14:30 +01:00 committed by GitHub
parent 1ac3160a46
commit f0e5bed36f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 194 additions and 18 deletions

View File

@ -135,6 +135,14 @@ featureChoiceObservers = Feature
, featureCppFlag = "DAML_CHOICE_OBSERVERS"
}
featureTypeInterning :: Feature
featureTypeInterning = Feature
{ featureName = "Type interning"
-- TODO Change as part of #7139
, featureMinVersion = versionDev
, featureCppFlag = "DAML_TYPE_INTERNING"
}
allFeatures :: [Feature]
allFeatures =
[ featureNumeric

View File

@ -26,7 +26,7 @@ import qualified Com.Daml.DamlLfDev.DamlLf1 as LF1
import qualified Data.NameMap as NM
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Vector as V
import qualified Data.Vector.Extended as V
import qualified Proto3.Suite as Proto
@ -37,6 +37,7 @@ data DecodeEnv = DecodeEnv
-- erroring out when producing the string interning table.
{ internedStrings :: !(V.Vector (T.Text, Either String UnmangledIdentifier))
, internedDottedNames :: !(V.Vector ([T.Text], Either String [UnmangledIdentifier]))
, internedTypes :: !(V.Vector Type)
, selfPackageRef :: PackageRef
}
@ -177,14 +178,18 @@ decodeInternedDottedName (LF1.InternedDottedName ids) = do
pure (mangled, sequence unmangledOrErr)
decodePackage :: TL.Text -> LF.PackageRef -> LF1.Package -> Either Error Package
decodePackage minorText selfPackageRef (LF1.Package mods internedStringsV internedDottedNamesV metadata) = do
decodePackage minorText selfPackageRef (LF1.Package mods internedStringsV internedDottedNamesV metadata internedTypesV) = do
version <- decodeVersion (decodeString minorText)
let internedStrings = V.map decodeMangledString internedStringsV
let internedDottedNames = V.empty
let internedTypes = V.empty
let env0 = DecodeEnv{..}
internedDottedNames <- runDecode env0 $ mapM decodeInternedDottedName internedDottedNamesV
let env = DecodeEnv{..}
runDecode env $ do
let env1 = env0{internedDottedNames}
internedTypes <- V.constructNE (V.length internedTypesV) $ \prefix i ->
runDecode env1{internedTypes = prefix} $ decodeType (internedTypesV V.! i)
let env2 = env1{internedTypes}
runDecode env2 $ do
Package version <$> decodeNM DuplicateModule decodeModule mods <*> traverse decodePackageMetadata metadata
decodePackageMetadata :: LF1.PackageMetadata -> Decode PackageMetadata
@ -776,6 +781,9 @@ decodeType LF1.Type{..} = mayDecode "typeSum" typeSum $ \case
foldr TForall body <$> traverse decodeTypeVarWithKind (V.toList binders)
LF1.TypeSumStruct (LF1.Type_Struct flds) ->
TStruct <$> mapM (decodeFieldWithType FieldName) (V.toList flds)
LF1.TypeSumInterned n -> do
DecodeEnv{internedTypes} <- ask
lookupInterned internedTypes BadTypeId n
where
decodeWithArgs :: V.Vector LF1.Type -> Decode Type -> Decode Type
decodeWithArgs args fun = foldl' TApp <$> fun <*> traverse decodeType args

View File

@ -19,6 +19,7 @@ import Data.Either
import Data.Functor.Identity
import qualified Data.HashMap.Strict as HMS
import qualified Data.List as L
import qualified Data.Map.Strict as Map
import qualified Data.NameMap as NM
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
@ -51,6 +52,8 @@ data EncodeEnv = EncodeEnv
, internedDottedNames :: !(HMS.HashMap [Int32] Int32)
, nextInternedDottedNameId :: !Int32
-- ^ We track the size of `internedDottedNames` explicitly since `HMS.size` is `O(n)`.
, internedTypes :: !(Map.Map P.TypeSum Int32)
, nextInternedTypeId :: !Int32
}
initEncodeEnv :: Version -> WithInterning -> EncodeEnv
@ -60,6 +63,8 @@ initEncodeEnv version withInterning =
, internedStrings = HMS.empty
, internedDottedNames = HMS.empty
, nextInternedDottedNameId = 0
, internedTypes = Map.empty
, nextInternedTypeId = 0
, ..
}
@ -290,7 +295,8 @@ encodeBuiltinType = P.Enumerated . Right . \case
BTTypeRep -> P.PrimTypeTYPE_REP
encodeType' :: Type -> Encode P.Type
encodeType' typ = fmap (P.Type . Just) $ case typ ^. _TApps of
encodeType' typ = do
ptyp <- case typ ^. _TApps of
(TVar var, args) -> do
type_VarVar <- encodeName unTypeVarName var
type_VarArgs <- encodeList encodeType' args
@ -326,10 +332,28 @@ encodeType' typ = fmap (P.Type . Just) $ case typ ^. _TApps of
-- which we don't support.
(TForall{}, _:_) -> error "Application of TForall"
(TSynApp{}, _:_) -> error "Application of TSynApp"
allocType ptyp
encodeType :: Type -> Encode (Just P.Type)
encodeType t = Just <$> encodeType' t
allocType :: P.TypeSum -> Encode P.Type
allocType ptyp = fmap (P.Type . Just) $ do
env@EncodeEnv{version, withInterning, internedTypes, nextInternedTypeId = n} <- get
if getWithInterning withInterning && version `supports` featureTypeInterning then
case ptyp `Map.lookup` internedTypes of
Just n -> pure (P.TypeSumInterned n)
Nothing -> do
when (n == maxBound) $
error "Type interning table grew too large"
put $! env
{ internedTypes = Map.insert ptyp n internedTypes
, nextInternedTypeId = n + 1
}
pure (P.TypeSumInterned n)
else
pure ptyp
------------------------------------------------------------------------
-- Encoding of expressions
------------------------------------------------------------------------
@ -855,12 +879,14 @@ encodePackageMetadata PackageMetadata{..} = do
encodePackage :: Package -> P.Package
encodePackage (Package version mods metadata) =
let env = initEncodeEnv version (WithInterning True)
((packageModules, packageMetadata), EncodeEnv{internedStrings, internedDottedNames}) =
((packageModules, packageMetadata), EncodeEnv{internedStrings, internedDottedNames, internedTypes}) =
runState ((,) <$> encodeNameMap encodeModule mods <*> traverse encodePackageMetadata metadata) env
packageInternedStrings =
V.fromList $ map (encodeString . fst) $ L.sortOn snd $ HMS.toList internedStrings
packageInternedDottedNames =
V.fromList $ map (P.InternedDottedName . V.fromList . fst) $ L.sortOn snd $ HMS.toList internedDottedNames
packageInternedTypes =
V.fromList $ map (P.Type . Just . fst) $ L.sortOn snd $ Map.toList internedTypes
in
P.Package{..}

View File

@ -23,5 +23,6 @@ data Error
| UnsupportedMinorVersion T.Text
| BadStringId Int32
| BadDottedNameId Int32
| BadTypeId Int32
| ExpectedTCon Type
deriving (Show, Eq)

View File

@ -4,8 +4,8 @@
-- Check that functional dependency metadata is added when available.
-- @SINCE-LF 1.8
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$fdFoo"]) | .type | .forall | select(.vars | length == 2) | .body | .struct | .fields | length == 1
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$fdBar"]) | .type | .forall | select(.vars | length == 5) | .body | .struct | .fields | length == 3
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$fdFoo"]) | .type | lf::norm_ty($pkg) | .forall | select(.vars | length == 2) | .body | lf::norm_ty($pkg) | .struct | .fields | length == 1
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$fdBar"]) | .type | lf::norm_ty($pkg) | .forall | select(.vars | length == 5) | .body | lf::norm_ty($pkg) | .struct | .fields | length == 3
module FunctionalDependencies where
class Foo a b | a -> b where

View File

@ -0,0 +1,25 @@
-- Copyright (c) 2020, Digital Asset (Switzerland) GmbH and/or its affiliates.
-- All rights reserved.
-- Test that interning of types using hash-consing works. We particularly
-- test that sharing works.
-- The code below should produce the following type interning table:
-- 0: a
-- 1: b
-- 2: $0 -> $1
-- 3: $2 -> $2
-- 4: forall a b. $3
-- @TODO Change as part of #7139
-- @SINCE-LF 1.dev
-- @QUERY-LF .interned_types | (length == 5) and (.[0] | .var.var_interned_str | isnormal) and (.[1] | .var.var_interned_str | isnormal) and (.[2].prim | (.args | map(.interned) == [0, 1]) and (.prim == "ARROW")) and (.[3].prim | (.args | map(.interned) == [2, 2]) and (.prim == "ARROW")) and (.[4].forall.body.interned == 3)
-- @QUERY-LF [.modules[].values[]] | all(.name_with_type.type.interned == 4)
module InternedTypes where
ap1: (a -> b) -> a -> b
ap1 f x = f x
-- A second copy to make sure we share across functions.
ap2: (a -> b) -> (a -> b)
ap2 f x = f x

View File

@ -6,10 +6,10 @@
-- @QUERY-LF [ .modules[] | .values[] | select(.name_with_type | lf::get_value_name($pkg) == ["$$fFooOptional0"]) ] | length == 1
-- @QUERY-LF [ .modules[] | .values[] | select(.name_with_type | lf::get_value_name($pkg) == ["$$$$om$$fFooOptional0"]) ] == []
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$om$$fFooOptional"]) | .type.struct.fields[0] | lf::get_field($pkg) == "OVERLAPPING"
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$om$$fFoof"]) | .type.struct.fields[0] | lf::get_field($pkg) == "OVERLAPPABLE"
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$om$$fFoox"]) | .type.struct.fields[0] | lf::get_field($pkg) == "OVERLAPS"
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$om$$fFooBool"]) | .type.struct.fields[0] | lf::get_field($pkg) == "INCOHERENT"
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$om$$fFooOptional"]) | .type | lf::norm_ty($pkg) | .struct.fields[0] | lf::get_field($pkg) == "OVERLAPPING"
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$om$$fFoof"]) | .type | lf::norm_ty($pkg) | .struct.fields[0] | lf::get_field($pkg) == "OVERLAPPABLE"
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$om$$fFoox"]) | .type | lf::norm_ty($pkg) | .struct.fields[0] | lf::get_field($pkg) == "OVERLAPS"
-- @QUERY-LF .modules[] | .values[] | .name_with_type | select(lf::get_value_name($pkg) == ["$$$$om$$fFooBool"]) | .type | lf::norm_ty($pkg) | .struct.fields[0] | lf::get_field($pkg) == "INCOHERENT"
module OverlapPragmas where

View File

@ -17,3 +17,5 @@ def get_field(pkg): .field_interned_str | resolve_interned_string(pkg);
def get_name(pkg): .name_interned_str | resolve_interned_string(pkg);
def get_text(pkg): .text_interned_str | resolve_interned_string(pkg);
def norm_ty(pkg): if has("interned") then pkg.interned_types[.interned] else . end;

View File

@ -13,3 +13,5 @@ def get_field(pkg): .field_str;
def get_name(pkg): .name_str;
def get_text(pkg): .text_str;
def norm_ty(pkg): .;

View File

@ -44,8 +44,9 @@
// 2019-11-12: Add Generic Map (GenMap)
// 2019-12-03: Add (experimental) text primitives.
// 2019-12-05: Add Generic Equality builtin
// 2019-13-10: Add ExerciseByKey Update
// 2020-10-??: Add choice-observers
// 2019-10-13: Add ExerciseByKey Update
// 2020-11-04: Add interning of types
// 2020-11-??: Add choice-observers
syntax = "proto3";
package daml_lf_1;
@ -397,6 +398,8 @@ message Type {
// use standard signed long for future usage.
sint64 nat = 11;
Syn syn = 12; // *Available in versions >= 1.8*
int32 interned = 13; // *Available in versions >= 1.dev*
}
reserved 6; // This was list. Removed in favour of PrimType.LIST
@ -1480,4 +1483,8 @@ message Package {
repeated string interned_strings = 2; // *Available in versions >= 1.6*
repeated InternedDottedName interned_dotted_names = 3; // *Available in versions >= 1.7*
PackageMetadata metadata = 4; // *Available and required in versions >= 1.8*
// Types in the interning table are only allowed to refer to interned types
// at smaller indices.
repeated Type interned_types = 5; // *Available in versions >= 1.dev*
}

View File

@ -51,14 +51,18 @@ private[archive] class DecodeV1(minor: LV.Minor) extends Decode.OfPackage[PLF.Pa
None
}
val env = DecoderEnv(
val env0 = DecoderEnv(
packageId,
internedStrings,
internedDottedNames,
IndexedSeq.empty,
Some(dependencyTracker),
None,
onlySerializableDataDefs,
)
val internedTypes = decodeInternedTypes(env0, lfPackage)
val env = env0.copy(internedTypes = internedTypes)
Package(
modules = lfPackage.getModulesList.asScala.map(env.decodeModule(_)),
directDeps = dependencyTracker.getDependencies,
@ -102,14 +106,19 @@ private[archive] class DecodeV1(minor: LV.Minor) extends Decode.OfPackage[PLF.Pa
throw ParseError(
s"expected exactly one module in proto package, found ${lfScenarioModule.getModulesCount} modules")
DecoderEnv(
val env0 = DecoderEnv(
packageId,
internedStrings,
internedDottedNames,
IndexedSeq.empty,
None,
None,
onlySerializableDataDefs = false
).decodeModule(lfScenarioModule.getModules(0))
)
val internedTypes =
decodeInternedTypes(env0, lfScenarioModule)
val env = env0.copy(internedTypes = internedTypes)
env.decodeModule(lfScenarioModule.getModules(0))
}
@ -138,6 +147,19 @@ private[archive] class DecodeV1(minor: LV.Minor) extends Decode.OfPackage[PLF.Pa
case Right(x) => x
}
private[archive] def decodeInternedTypes(
env: DecoderEnv,
lfPackage: PLF.Package,
): IndexedSeq[Type] = {
val lfTypes = lfPackage.getInternedTypesList
if (!lfTypes.isEmpty)
assertSince(LV.Features.internedTypes, "interned types table")
lfTypes.iterator.asScala
.foldLeft(new mutable.ArrayBuffer[Type](lfTypes.size)) { (buf, typ) =>
buf += env.copy(internedTypes = buf).decodeType(typ)
}
.toIndexedSeq
}
case class PackageDependencyTracker(self: PackageId) {
private val deps = mutable.Set.empty[PackageId]
def markDependency(pkgId: PackageId): Unit =
@ -150,6 +172,7 @@ private[archive] class DecodeV1(minor: LV.Minor) extends Decode.OfPackage[PLF.Pa
packageId: PackageId,
internedStrings: ImmArraySeq[String],
internedDottedNames: ImmArraySeq[DottedName],
internedTypes: IndexedSeq[Type],
optDependencyTracker: Option[PackageDependencyTracker],
optModuleName: Option[ModuleName],
onlySerializableDataDefs: Boolean
@ -653,6 +676,11 @@ private[archive] class DecodeV1(minor: LV.Minor) extends Decode.OfPackage[PLF.Pa
name => throw ParseError(s"TStruct: duplicate field $name"),
identity
))
case PLF.Type.SumCase.INTERNED =>
internedTypes.applyOrElse(
lfType.getInterned,
(index: Int) => throw ParseError(s"invalid internedTypes table index $index"),
)
case PLF.Type.SumCase.SUM_NOT_SET =>
throw ParseError("Type.SUM_NOT_SET")
}

View File

@ -48,12 +48,13 @@ class DecodeV1Spec
private def moduleDecoder(
minVersion: LV.Minor,
stringTable: ImmArraySeq[String] = ImmArraySeq.empty,
dottedNameTable: ImmArraySeq[DottedName] = ImmArraySeq.empty
dottedNameTable: ImmArraySeq[DottedName] = ImmArraySeq.empty,
) = {
new DecodeV1(minVersion).DecoderEnv(
Ref.PackageId.assertFromString("noPkgId"),
stringTable,
dottedNameTable,
IndexedSeq(),
None,
Some(dummyModuleName),
onlySerializableDataDefs = false
@ -120,6 +121,13 @@ class DecodeV1Spec
LV.Minor.Dev,
)
private val preTypeInterningVersions = Table(
"minVersion",
LV.Minor.Stable("6"),
LV.Minor.Stable("7"),
LV.Minor.Stable("8"),
)
private val postContractIdTextConversionVersions = Table(
"minVersion",
// FIXME: https://github.com/digital-asset/daml/issues/7139
@ -871,4 +879,33 @@ class DecodeV1Spec
}
}
"decodeInternedTypes" should {
def pkgWithInternedTypes: DamlLf1.Package = {
val typeNat1 = DamlLf1.Type.newBuilder().setNat(1).build()
DamlLf1.Package
.newBuilder()
.addInternedTypes(typeNat1)
.build()
}
"reject PackageMetadata if lf version < 1.8" in {
forEvery(preTypeInterningVersions) { minVersion =>
val decoder = new DecodeV1(minVersion)
val env = decoder.DecoderEnv(
Ref.PackageId.assertFromString("noPkgId"),
ImmArraySeq.empty,
ImmArraySeq.empty,
IndexedSeq.empty,
None,
None,
onlySerializableDataDefs = false
)
val parseError = the[ParseError] thrownBy decoder.decodeInternedTypes(
env,
pkgWithInternedTypes,
)
parseError.toString should include("interned types table is not supported")
}
}
}
}

View File

@ -65,6 +65,7 @@ object LanguageVersion {
val scenarioMustFailAtMsg = v1_dev
val contractIdTextConversions = v1_dev
val exerciseByKey = v1_dev
val internedTypes = v1_dev
/** Unstable, experimental features. This should stay in 1.dev forever.
* Features implemented with this flag should be moved to a separate

View File

@ -0,0 +1,31 @@
-- Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
-- SPDX-License-Identifier: Apache-2.0
module Data.Vector.Extended (
module Data.Vector,
constructNE,
) where
import Control.Monad.ST
import Data.Vector
import qualified Data.Vector.Mutable as M
-- | /O(n)/ Construct a vector with @n@ elements by repeatedly applying the
-- generator function to the already constructed part of the vector and the
-- index of the current element to construct.
constructNE :: forall a e. Int -> (Vector a -> Int -> Either e a) -> Either e (Vector a)
-- NOTE(MH): This is a copy of `Data.Vector.constructN` with small modifications
-- to pass the current index to `f` and to run in the `Either` monad.
constructNE !n f = runST $ do
v <- M.new n
v' <- unsafeFreeze v
fill v' 0
where
fill :: forall s. Vector a -> Int -> ST s (Either e (Vector a))
fill !v i | i < n = case f (unsafeTake i v) i of
Left e -> return (Left e)
Right x -> seq x $ do
v' <- unsafeThaw v
M.unsafeWrite v' i x
v'' <- unsafeFreeze v'
fill v'' (i+1)
fill v _ = return (Right v)