Downgrade hashing to SHA-1 and other optimizations (#5791)

This change downgrades hashing algorithm used in caching IR and library bindings to SHA-1. It is sufficient and significantly faster for the purpose of simple checksum we use it for.

Additionally, don't calculate the digest for serialized bytes - if we get the expected object type then we are confident about the integrity.

Don't initialize Jackson's ObjectMapper for every metadata serialization/de-serialization. Initialization is very costly.

Avoid unnecessary conversions between Scala and Java. Those back-and-forth `asScala` and `asJava` are pretty expensive.

Finally fix an SBT warning when generating library cache.

Closes https://github.com/enso-org/enso/issues/5763

# Important Notes
The change cuts roughly 0.8-1s from the overall startup.
This change will certainly lead to invalidation of existing caches. It is advised to simply start with a clean slate.
This commit is contained in:
Hubert Plociniczak 2023-03-09 08:36:59 +01:00 committed by GitHub
parent 5cf58d611b
commit 6769ab0ee7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 111 additions and 45 deletions

View File

@ -605,6 +605,7 @@
- [Update to GraalVM 22.3.1][5602]
- [Cache library bindings to optimize import/export resolution][5700]
- [Comparators support partial ordering][5778]
- [Use SHA-1 for calculating hashes of modules' IR and bindings][5791]
[3227]: https://github.com/enso-org/enso/pull/3227
[3248]: https://github.com/enso-org/enso/pull/3248
@ -703,6 +704,7 @@
[5602]: https://github.com/enso-org/enso/pull/5602
[5700]: https://github.com/enso-org/enso/pull/5700
[5778]: https://github.com/enso-org/enso/pull/5778
[5791]: https://github.com/enso-org/enso/pull/5791
# Enso 2.0.0-alpha.18 (2021-10-12)

View File

@ -142,9 +142,9 @@ It is a JSON file as follows:
}
```
All hashes are encoded in SHA3-224 format, as is used by other components in the
Engine. The engine version is encoded in the cache path, and hence does not need
to be explicitly specified in the metadata.
All hashes are encoded in SHA1 format, for performance reasons. The engine
version is encoded in the cache path, and hence does not need to be explicitly
specified in the metadata.
### Portability Guarantees

View File

@ -2,7 +2,7 @@ package org.enso.compiler;
import com.oracle.truffle.api.TruffleFile;
import com.oracle.truffle.api.TruffleLogger;
import org.bouncycastle.jcajce.provider.digest.SHA3;
import org.bouncycastle.jcajce.provider.digest.SHA1;
import org.bouncycastle.util.encoders.Hex;
import org.enso.interpreter.runtime.EnsoContext;
import org.enso.logger.masking.MaskedPath;
@ -188,10 +188,12 @@ public abstract class Cache<T, M extends Cache.Metadata> {
return loadedCache;
}
logger.log(logLevel, "Unable to load a cache for module [" + stringRepr + "]");
logger.log(logLevel, "Unable to load a cache [" + stringRepr + "]");
} catch (IOException e) {
logger.log(
Level.WARNING, "Unable to load a cache for module [" + stringRepr + "]", e);
Level.WARNING,
"Unable to load a cache [" + stringRepr + "]: " + e.getMessage(),
e);
}
return Optional.empty();
});
@ -212,15 +214,18 @@ public abstract class Cache<T, M extends Cache.Metadata> {
TruffleFile metadataPath = getCacheMetadataPath(cacheRoot);
TruffleFile dataPath = getCacheDataPath(cacheRoot);
Optional<M> optMeta = loadCacheMetadata(metadataPath);
Optional<M> optMeta = loadCacheMetadata(metadataPath, logger);
if (optMeta.isPresent()) {
M meta = optMeta.get();
boolean sourceDigestValid =
computeDigestFromSource(context, logger)
.map(digest -> digest.equals(meta.sourceHash()))
.orElseGet(() -> false);
!needsSourceDigestVerification()
|| computeDigestFromSource(context, logger)
.map(digest -> digest.equals(meta.sourceHash()))
.orElseGet(() -> false);
byte[] blobBytes = dataPath.readAllBytes();
boolean blobDigestValid = computeDigestFromBytes(blobBytes).equals(meta.blobHash());
boolean blobDigestValid =
!needsDataDigestVerification()
|| computeDigestFromBytes(blobBytes).equals(meta.blobHash());
if (sourceDigestValid && blobDigestValid) {
Object readObject;
@ -270,6 +275,18 @@ public abstract class Cache<T, M extends Cache.Metadata> {
}
}
/**
* Flag indicating if the de-serialization process should compute the hash of the sources from
* which the cache was created and compare it with the stored metadata entry.
*/
protected abstract boolean needsSourceDigestVerification();
/**
* Flag indicating if the de-serialization process should compute the hash of the stored cache and
* compare it with the stored metadata entry.
*/
protected abstract boolean needsDataDigestVerification();
/**
* Validates the deserialized data by returning the expected cached entry, or [[null]].
*
@ -288,9 +305,9 @@ public abstract class Cache<T, M extends Cache.Metadata> {
* @param path location of the serialized metadata
* @return deserialized metadata, or [[None]] if invalid
*/
private Optional<M> loadCacheMetadata(TruffleFile path) throws IOException {
private Optional<M> loadCacheMetadata(TruffleFile path, TruffleLogger logger) throws IOException {
if (path.isReadable()) {
return metadataFromBytes(path.readAllBytes());
return metadataFromBytes(path.readAllBytes(), logger);
} else {
return Optional.empty();
}
@ -302,7 +319,7 @@ public abstract class Cache<T, M extends Cache.Metadata> {
* @param bytes raw bytes representing metadata
* @return non-empty metadata, if de-serialization was successful
*/
protected abstract Optional<M> metadataFromBytes(byte[] bytes);
protected abstract Optional<M> metadataFromBytes(byte[] bytes, TruffleLogger logger);
/**
* Compute digest of cache's data
@ -363,7 +380,7 @@ public abstract class Cache<T, M extends Cache.Metadata> {
* @return digest used for computing hashes
*/
protected MessageDigest messageDigest() {
return new SHA3.Digest224();
return new SHA1.Digest();
}
/**

View File

@ -6,6 +6,7 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.oracle.truffle.api.TruffleFile;
import com.oracle.truffle.api.TruffleLogger;
import org.apache.commons.lang3.StringUtils;
import org.enso.compiler.data.BindingsMap;
import org.enso.editions.LibraryName;
import org.enso.interpreter.runtime.EnsoContext;
@ -35,14 +36,23 @@ public final class ImportExportCache extends Cache<ImportExportCache.CachedBindi
@Override
protected byte[] metadata(String sourceDigest, String blobDigest, CachedBindings entry) {
var mapper = new ObjectMapper();
try {
return mapper.writeValueAsString(new Metadata(sourceDigest, blobDigest)).getBytes(metadataCharset);
return objectMapper.writeValueAsString(new Metadata(sourceDigest, blobDigest)).getBytes(metadataCharset);
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
@Override
protected boolean needsSourceDigestVerification() {
return true;
}
@Override
protected boolean needsDataDigestVerification() {
return false;
}
@Override
protected CachedBindings validateReadObject(Object obj, Metadata meta, TruffleLogger logger) throws CacheException {
if (obj instanceof MapToBindings bindings) {
@ -53,12 +63,13 @@ public final class ImportExportCache extends Cache<ImportExportCache.CachedBindi
}
@Override
protected Optional<Metadata> metadataFromBytes(byte[] bytes) {
protected Optional<Metadata> metadataFromBytes(byte[] bytes, TruffleLogger logger) {
var maybeJsonString = new String(bytes, Cache.metadataCharset);
var mapper = new ObjectMapper();
try {
return Optional.of(mapper.readValue(maybeJsonString, ImportExportCache.Metadata.class));
return Optional.of(objectMapper.readValue(maybeJsonString, ImportExportCache.Metadata.class));
} catch (JsonProcessingException e) {
logger.log(logLevel, "Failed to deserialize library's metadata: " + e.getMessage(), e);
return Optional.empty();
}
}
@ -84,15 +95,15 @@ public final class ImportExportCache extends Cache<ImportExportCache.CachedBindi
pkg.getBindingsCacheRootForPackage(Info.ensoVersion());
var localCacheRoot = bindingsCacheRoot.resolve(libraryName.namespace());
var distribution = context.getDistributionManager();
var pathSegments = CollectionConverters.ListHasAsScala(Arrays.asList(
var pathSegments = new String[]{
pkg.namespace(),
pkg.name(),
pkg.config().version(),
Info.ensoVersion(),
libraryName.namespace()
)).asScala();
};
var path = distribution.LocallyInstalledDirectories().irCacheDirectory()
.resolve(pathSegments.mkString("/"));
.resolve(StringUtils.join(pathSegments, "/"));
var globalCacheRoot = context.getTruffleFile(path.toFile());
return new Cache.Roots(localCacheRoot, globalCacheRoot);
});
@ -143,6 +154,9 @@ public final class ImportExportCache extends Cache<ImportExportCache.CachedBindi
@JsonProperty("blob_hash") String blobHash) implements Cache.Metadata {}
private static final String bindingsCacheDataExtension = ".bindings";
private static final String bindingsCacheMetadataExtension =".bindings.meta";
private final static ObjectMapper objectMapper = new ObjectMapper();
}

View File

@ -6,10 +6,10 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.oracle.truffle.api.TruffleLogger;
import com.oracle.truffle.api.source.Source;
import org.apache.commons.lang3.StringUtils;
import org.enso.compiler.core.IR;
import org.enso.interpreter.runtime.EnsoContext;
import org.enso.interpreter.runtime.Module;
import scala.jdk.CollectionConverters;
import org.enso.interpreter.runtime.builtin.Builtins;
import java.io.IOException;
@ -33,14 +33,23 @@ public final class ModuleCache extends Cache<ModuleCache.CachedModule, ModuleCac
@Override
protected byte[] metadata(String sourceDigest, String blobDigest, CachedModule entry) {
var mapper = new ObjectMapper();
try {
return mapper.writeValueAsBytes(new Metadata(sourceDigest, blobDigest, entry.compilationStage().toString()));
return objectMapper.writeValueAsBytes(new Metadata(sourceDigest, blobDigest, entry.compilationStage().toString()));
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}
@Override
protected boolean needsSourceDigestVerification() {
return true;
}
@Override
protected boolean needsDataDigestVerification() {
return false;
}
@Override
protected CachedModule validateReadObject(Object obj, Metadata meta, TruffleLogger logger) throws CacheException {
if (obj instanceof IR.Module ir) {
@ -55,12 +64,12 @@ public final class ModuleCache extends Cache<ModuleCache.CachedModule, ModuleCac
}
@Override
protected Optional<Metadata> metadataFromBytes(byte[] bytes) {
protected Optional<Metadata> metadataFromBytes(byte[] bytes, TruffleLogger logger) {
var maybeJsonString = new String(bytes, Cache.metadataCharset);
var mapper = new ObjectMapper();
try {
return Optional.of(mapper.readValue(maybeJsonString, Metadata.class));
return Optional.of(objectMapper.readValue(maybeJsonString, Metadata.class));
} catch (JsonProcessingException e) {
logger.log(logLevel, "Failed to deserialize module's metadata: " + e.getMessage(), e);
return Optional.empty();
}
}
@ -111,9 +120,8 @@ public final class ModuleCache extends Cache<ModuleCache.CachedModule, ModuleCac
Info.ensoVersion()
));
pathSegmentsJava.addAll(qualName.pathAsJava());
var pathSegments = CollectionConverters.ListHasAsScala(pathSegmentsJava).asScala();
var path = distribution.LocallyInstalledDirectories().irCacheDirectory()
.resolve(pathSegments.mkString("/"));
.resolve(StringUtils.join(pathSegmentsJava, "/"));
var globalCacheRoot = context.getTruffleFile(path.toFile());
return new Cache.Roots(localCacheRoot, globalCacheRoot);
@ -128,9 +136,8 @@ public final class ModuleCache extends Cache<ModuleCache.CachedModule, ModuleCac
Info.ensoVersion()
));
pathSegmentsJava.addAll(module.getName().pathAsJava());
var pathSegments = CollectionConverters.ListHasAsScala(pathSegmentsJava).asScala();
var path = distribution.LocallyInstalledDirectories().irCacheDirectory()
.resolve(pathSegments.mkString("/"));
.resolve(StringUtils.join(pathSegmentsJava, "/"));
var globalCacheRoot = context.getTruffleFile(path.toFile());
return Optional.of(new Cache.Roots(globalCacheRoot, globalCacheRoot));
@ -171,6 +178,9 @@ public final class ModuleCache extends Cache<ModuleCache.CachedModule, ModuleCac
@JsonProperty("compilation_stage") String compilationStage) implements Cache.Metadata {}
private final static String irCacheDataExtension = ".ir";
private final static String irCacheMetadataExtension = ".meta";
private final static ObjectMapper objectMapper = new ObjectMapper();
}

View File

@ -6,6 +6,7 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.oracle.truffle.api.TruffleFile;
import com.oracle.truffle.api.TruffleLogger;
import org.apache.commons.lang3.StringUtils;
import org.enso.editions.LibraryName;
import org.enso.interpreter.runtime.EnsoContext;
import org.enso.pkg.SourceFile;
@ -46,6 +47,16 @@ public final class SuggestionsCache
}
}
@Override
protected boolean needsSourceDigestVerification() {
return true;
}
@Override
protected boolean needsDataDigestVerification() {
return false;
}
@Override
protected CachedSuggestions validateReadObject(Object obj, Metadata meta, TruffleLogger logger)
throws CacheException {
@ -57,11 +68,12 @@ public final class SuggestionsCache
}
@Override
protected Optional<Metadata> metadataFromBytes(byte[] bytes) {
protected Optional<Metadata> metadataFromBytes(byte[] bytes, TruffleLogger logger) {
var maybeJsonString = new String(bytes, Cache.metadataCharset);
try {
return Optional.of(objectMapper.readValue(maybeJsonString, SuggestionsCache.Metadata.class));
} catch (JsonProcessingException e) {
logger.log(logLevel, "Failed to deserialize suggestions' metadata: " + e.getMessage(), e);
return Optional.empty();
}
}
@ -86,15 +98,15 @@ public final class SuggestionsCache
var bindingsCacheRoot = pkg.getSuggestionsCacheRootForPackage(Info.ensoVersion());
var localCacheRoot = bindingsCacheRoot.resolve(libraryName.namespace());
var distribution = context.getDistributionManager();
var pathSegments = CollectionConverters.ListHasAsScala(Arrays.asList(
var pathSegments = new String[]{
pkg.namespace(),
pkg.name(),
pkg.config().version(),
Info.ensoVersion(),
libraryName.namespace())
).asScala();
libraryName.namespace()
};
var path = distribution.LocallyInstalledDirectories().irCacheDirectory()
.resolve(pathSegments.mkString("/"));
.resolve(StringUtils.join(pathSegments, "/"));
var globalCacheRoot = context.getTruffleFile(path.toFile());
return new Cache.Roots(localCacheRoot, globalCacheRoot);
});

View File

@ -42,6 +42,7 @@ import com.oracle.truffle.api.TruffleLogger;
import com.oracle.truffle.api.nodes.Node;
import com.oracle.truffle.api.object.Shape;
import java.util.concurrent.ExecutorService;
import java.util.stream.StreamSupport;
import scala.jdk.javaapi.OptionConverters;
@ -357,7 +358,7 @@ public class EnsoContext {
if (file == null) {
return Optional.empty();
}
return ScalaConversions.asJava(packageRepository.getLoadedPackages()).stream()
return StreamSupport.stream(packageRepository.getLoadedPackagesJava().spliterator(), true)
.filter(pkg -> file.getAbsoluteFile().startsWith(pkg.root().getAbsoluteFile()))
.findFirst();
}

View File

@ -2,6 +2,7 @@ package org.enso.compiler
import com.oracle.truffle.api.TruffleFile
import com.typesafe.scalalogging.Logger
import org.apache.commons.lang3.StringUtils
import org.enso.distribution.locking.ResourceManager
import org.enso.distribution.{DistributionManager, LanguageHome}
import org.enso.editions.updater.EditionManager
@ -32,6 +33,7 @@ import org.enso.text.buffer.Rope
import java.nio.file.Path
import scala.collection.immutable.ListSet
import scala.jdk.OptionConverters.RichOption
import scala.jdk.CollectionConverters.{IterableHasAsJava, SeqHasAsJava}
import scala.util.{Failure, Try, Using}
/** Manages loaded packages and modules. */
@ -61,6 +63,9 @@ trait PackageRepository {
/** Get a sequence of currently loaded packages. */
def getLoadedPackages: Seq[Package[TruffleFile]]
/** Get a sequence of currently loaded packages. */
def getLoadedPackagesJava: java.lang.Iterable[Package[TruffleFile]]
/** Get a sequence of currently loaded modules. */
def getLoadedModules: Seq[Module]
@ -574,6 +579,10 @@ object PackageRepository {
override def getLoadedPackages: Seq[Package[TruffleFile]] =
loadedPackages.values.toSeq.flatten
override def getLoadedPackagesJava
: java.lang.Iterable[Package[TruffleFile]] =
loadedPackages.flatMap(_._2).asJava
/** @inheritdoc */
override def getLoadedModule(qualifiedName: String): Option[Module] =
loadedModules.get(qualifiedName)
@ -604,8 +613,6 @@ object PackageRepository {
syntheticModule: Module,
refs: List[QualifiedName]
): Unit = {
import scala.jdk.CollectionConverters._
assert(syntheticModule.isSynthetic)
if (!loadedModules.contains(syntheticModule.getName.toString)) {
loadedModules.put(syntheticModule.getName.toString, syntheticModule)
@ -708,10 +715,9 @@ object PackageRepository {
}
private def readManifest(file: TruffleFile): Try[String] = {
import scala.jdk.CollectionConverters._
if (file.exists())
Using(file.newBufferedReader) { reader =>
reader.lines().iterator().asScala.mkString("\n")
StringUtils.join(reader.lines().iterator(), "\n")
}
else Failure(PackageManager.PackageNotFound())
}

View File

@ -11,9 +11,13 @@ import scala.jdk.CollectionConverters._
* @param item the name of the item
*/
case class QualifiedName(path: List[String], item: String) {
lazy val qualifiedNameString: String =
(path :+ item).mkString(QualifiedName.separator)
@CompilerDirectives.TruffleBoundary
override def toString: String =
(path :+ item).mkString(QualifiedName.separator)
qualifiedNameString
/** Get the parent of this qualified name.
*

View File

@ -205,8 +205,8 @@ object DistributionPackage {
"--compile",
path.toString
)
log.info(command.mkString(" "))
val exitCode = command.!
log.debug(command.mkString(" "))
val exitCode = Process(command, None, "JAVA_OPTS"->"-Dorg.jline.terminal.dumb=true").!
if (exitCode != 0) {
throw new RuntimeException(s"Cannot compile $libMajor.$libName.")
}