Add a common-polyglot-core-utils project (#5855)

Adds a common project that allows sharing code between the `runtime` and `std-bits`.

Due to classpath separation and the way it is compiled, the classes will be duplicated - we will have one copy for the `runtime` classpath and another copy as a small JAR for `Standard.Base` library.

This is still much better than having the code duplicated - now at least we have a single source of truth for the shared implementations.

Due to the copying we should not expand this project too much, but I encourage to put here any methods that would otherwise require us to copy the code itself.

This may be a good place to put parts of the hashing logic to then allow sharing the logic between the `runtime` and the `MultiValueKey` in the `Table` library (cc: @Akirathan).
This commit is contained in:
Radosław Waśko 2023-03-11 10:27:26 +01:00 committed by GitHub
parent 725841467f
commit 263c3ad651
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 151 additions and 105 deletions

View File

@ -1504,6 +1504,7 @@ lazy val runtime = (project in file("engine/runtime"))
}.evaluated,
Benchmark / parallelExecution := false
)
.dependsOn(`common-polyglot-core-utils`)
.dependsOn(`runtime-language-epb`)
.dependsOn(`edition-updater`)
.dependsOn(`interpreter-dsl`)
@ -2025,22 +2026,37 @@ lazy val `std-base` = project
Compile / packageBin / artifactPath :=
`base-polyglot-root` / "std-base.jar",
libraryDependencies ++= Seq(
"com.ibm.icu" % "icu4j" % icuVersion,
"org.graalvm.truffle" % "truffle-api" % graalVersion % "provided",
"org.netbeans.api" % "org-openide-util-lookup" % netbeansApiVersion % "provided"
),
Compile / packageBin := Def.task {
val result = (Compile / packageBin).value
val _ensureCoreIsCompiled =
(`common-polyglot-core-utils` / Compile / packageBin).value
val _ = StdBits
.copyDependencies(
`base-polyglot-root`,
Some("std-base.jar"),
Seq("std-base.jar", "common-polyglot-core-utils.jar"),
ignoreScalaLibrary = true
)
.value
result
}.value
)
.dependsOn(`common-polyglot-core-utils`)
lazy val `common-polyglot-core-utils` = project
.in(file("lib/scala/common-polyglot-core-utils"))
.settings(
frgaalJavaCompilerSetting,
autoScalaLibrary := false,
Compile / packageBin / artifactPath :=
`base-polyglot-root` / "common-polyglot-core-utils.jar",
libraryDependencies ++= Seq(
"com.ibm.icu" % "icu4j" % icuVersion,
"org.graalvm.truffle" % "truffle-api" % graalVersion % "provided"
)
)
lazy val `enso-test-java-helpers` = project
.in(file("test/Tests/polyglot-sources/enso-test-java-helpers"))
@ -2093,7 +2109,7 @@ lazy val `std-table` = project
val _ = StdBits
.copyDependencies(
`table-polyglot-root`,
Some("std-table.jar"),
Seq("std-table.jar"),
ignoreScalaLibrary = true
)
.value
@ -2118,7 +2134,7 @@ lazy val `std-image` = project
val _ = StdBits
.copyDependencies(
`image-polyglot-root`,
Some("std-image.jar"),
Seq("std-image.jar"),
ignoreScalaLibrary = true
)
.value
@ -2143,7 +2159,7 @@ lazy val `std-google-api` = project
val _ = StdBits
.copyDependencies(
`google-api-polyglot-root`,
Some("std-google-api.jar"),
Seq("std-google-api.jar"),
ignoreScalaLibrary = true
)
.value
@ -2172,7 +2188,7 @@ lazy val `std-database` = project
val _ = StdBits
.copyDependencies(
`database-polyglot-root`,
Some("std-database.jar"),
Seq("std-database.jar"),
ignoreScalaLibrary = true
)
.value

View File

@ -14,12 +14,12 @@ import java.time.LocalTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import org.enso.interpreter.dsl.Builtin;
import org.enso.interpreter.runtime.EnsoContext;
import org.enso.interpreter.runtime.data.text.Text;
import org.enso.interpreter.runtime.library.dispatch.TypesLibrary;
import org.enso.polyglot.common_utils.Core_Date_Utils;
@ExportLibrary(InteropLibrary.class)
@ExportLibrary(TypesLibrary.class)
@ -71,12 +71,7 @@ public final class EnsoDateTime implements TruffleObject {
@Builtin.WrapException(from = DateTimeParseException.class)
@CompilerDirectives.TruffleBoundary
public static EnsoDateTime parse(String text) {
String iso = text;
if (text != null && text.length() > 10 && text.charAt(10) == ' ') {
var builder = new StringBuilder(iso);
builder.replace(10, 11, "T");
iso = builder.toString();
}
String iso = Core_Date_Utils.normaliseISODateTime(text);
var datetime = DATE_TIME_FORMATTER.parseBest(iso, ZonedDateTime::from, LocalDateTime::from);
if (datetime instanceof ZonedDateTime zdt) {
@ -273,17 +268,5 @@ public final class EnsoDateTime implements TruffleObject {
private static final EnsoDateTime epochStart =
EnsoDateTime.create(1582, 10, 15, 0, 0, 0, 0, EnsoTimeZone.parse("UTC"));
private static final DateTimeFormatter DATE_TIME_FORMATTER =
new DateTimeFormatterBuilder()
.append(DateTimeFormatter.ISO_LOCAL_DATE_TIME)
.optionalStart()
.parseLenient()
.appendOffsetId()
.optionalEnd()
.optionalStart()
.appendLiteral('[')
.parseCaseSensitive()
.appendZoneRegionId()
.appendLiteral(']')
.toFormatter();
private static final DateTimeFormatter DATE_TIME_FORMATTER = Core_Date_Utils.defaultZonedDateTimeFormatter();
}

View File

@ -1,6 +1,5 @@
package org.enso.interpreter.runtime.data.text;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.Normalizer2;
import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.dsl.Cached;
@ -19,6 +18,7 @@ import org.enso.interpreter.runtime.library.dispatch.TypesLibrary;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.enso.interpreter.dsl.Builtin;
import org.enso.polyglot.common_utils.Core_Text_Utils;
/** The main runtime type for Enso's Text. */
@ExportLibrary(InteropLibrary.class)
@ -178,13 +178,7 @@ public final class Text implements TruffleObject {
@CompilerDirectives.TruffleBoundary
private int computeLength() {
BreakIterator iter = BreakIterator.getCharacterInstance();
iter.setText(toString());
int len = 0;
while (iter.next() != BreakIterator.DONE) {
len++;
}
return len;
return Core_Text_Utils.computeGraphemeLength(toString());
}
@CompilerDirectives.TruffleBoundary
@ -193,37 +187,7 @@ public final class Text implements TruffleObject {
boolean allowSideEffects,
@Cached("build()") @Cached.Shared("strings") ToJavaStringNode toJavaStringNode) {
String str = toJavaStringNode.execute(this);
int len = str.length();
int outputLength = len + 2; // Precise if there are no special characters.
// TODO This should be more extensible; while it's still a small fixed set,
// a switch is probably fastest (unconfirmed)
StringBuffer strBuf = new StringBuffer(outputLength);
strBuf.append('\'');
for (int i = 0; i < len; ++i) {
char c = str.charAt(i);
switch (c) {
case '\\' -> strBuf.append("\\\\");
case '\'' -> strBuf.append("\\'");
case '\n' -> strBuf.append("\\n");
case '\t' -> strBuf.append("\\t");
case '\0' -> strBuf.append("\\0");
case '\u0007' -> strBuf.append("\\a");
case '\u0008' -> strBuf.append("\\b");
case '\u000c' -> strBuf.append("\\f");
case '\r' -> strBuf.append("\\r");
case '\u000B' -> strBuf.append("\\v");
case '\u001B' -> strBuf.append("\\e");
default -> strBuf.append(c);
}
}
strBuf.append('\'');
return strBuf.toString();
return Core_Text_Utils.prettyPrint(str);
}
@ExportMessage

View File

@ -0,0 +1,29 @@
package org.enso.polyglot.common_utils;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
public class Core_Date_Utils {
/**
* Replace space with T in ISO date time string to make it compatible with ISO format.
* @param dateString Raw date time string with either space or T as separator
* @return ISO format date time string
*/
public static String normaliseISODateTime(String dateString) {
if (dateString != null && dateString.length() > 10 && dateString.charAt(10) == ' ') {
var builder = new StringBuilder(dateString);
builder.replace(10, 11, "T");
return builder.toString();
}
return dateString;
}
/** @return default Date Time formatter for parsing a Date_Time. */
public static DateTimeFormatter defaultZonedDateTimeFormatter() {
return new DateTimeFormatterBuilder().append(DateTimeFormatter.ISO_LOCAL_DATE_TIME)
.optionalStart().parseLenient().appendOffsetId().optionalEnd()
.optionalStart().appendLiteral('[').parseCaseSensitive().appendZoneRegionId().appendLiteral(']')
.toFormatter();
}
}

View File

@ -0,0 +1,51 @@
package org.enso.polyglot.common_utils;
import com.ibm.icu.text.BreakIterator;
public class Core_Text_Utils {
/** Computes the length of the string as the number of grapheme clusters it contains. */
public static int computeGraphemeLength(String text) {
BreakIterator iter = BreakIterator.getCharacterInstance();
iter.setText(text);
int len = 0;
while (iter.next() != BreakIterator.DONE) {
len++;
}
return len;
}
/** Pretty prints the string, escaping special characters. */
public static String prettyPrint(String str) {
int len = str.length();
int outputLength = len + 2; // Precise if there are no special characters.
// TODO This should be more extensible; while it's still a small fixed set,
// a switch is probably fastest (unconfirmed)
StringBuilder sb = new StringBuilder(outputLength);
sb.append('\'');
for (int i = 0; i < len; ++i) {
char c = str.charAt(i);
switch (c) {
case '\\' -> sb.append("\\\\");
case '\'' -> sb.append("\\'");
case '\n' -> sb.append("\\n");
case '\t' -> sb.append("\\t");
case '\0' -> sb.append("\\0");
case '\u0007' -> sb.append("\\a");
case '\u0008' -> sb.append("\\b");
case '\u000c' -> sb.append("\\f");
case '\r' -> sb.append("\\r");
case '\u000B' -> sb.append("\\v");
case '\u001B' -> sb.append("\\e");
default -> sb.append(c);
}
}
sb.append('\'');
return sb.toString();
}
}

View File

@ -0,0 +1,20 @@
/**
* This package contains common utilities which can be used both by the engine runtime and the libraries.
* <p>
* This allows us to avoid duplicating code between the runtime and library projects for operations that need to be
* accessible on both sides.
* <p>
* The utilities that belong here are mostly operations that are builtins of the Enso language but also need to be used
* from our Java libraries where the cost of calling back to Enso methods is relatively high, so accessing the Java
* implementations directly is desirable. The primary example of that is the algorithm for computing the length of a
* string by counting the grapheme clusters.
* <p>
* Due to classpath separation, the class files of this package will be duplicated with one copy embedded in the engine
* and another attached as `common-polyglot-core-utils.jar` placed in the `polyglot` directory of the Standard.Base
* library. But it allows us to avoid duplicating the code, so we can have a single source of truth for each
* implementation.
* <p>
* Due to the copying, the project should not be expanded too much, but all utilities which would end up being
* duplicated are best moved here.
*/
package org.enso.polyglot.common_utils;

View File

@ -12,16 +12,17 @@ object StdBits {
* directory.
*
* @param destination location where to put the dependencies
* @param baseJarName name of the base generated JAR (if any); unexpected
* (old) files are removed, so this task needs to know
* this file's name to avoid removing it
* @param providedJarNames name of JARs generated by the local projects;
* unexpected (old) files are removed, so this task
* needs to know these files' names to avoid removing
* them
* @param ignoreScalaLibrary whether to ignore Scala dependencies that are
* added by default be SBT and are not relevant in
* pure-Java projects
*/
def copyDependencies(
destination: File,
baseJarName: Option[String],
providedJarNames: Seq[String],
ignoreScalaLibrary: Boolean
): Def.Initialize[Task[Unit]] =
Def.task {
@ -50,7 +51,7 @@ object StdBits {
Tracked.diffInputs(dependencyStore, FileInfo.hash)(relevantFiles.toSet) {
report =>
val expectedFileNames =
report.checked.map(file => file.getName) ++ baseJarName.toSeq
report.checked.map(file => file.getName) ++ providedJarNames
for (existing <- IO.listFiles(destination)) {
if (!expectedFileNames.contains(existing.getName)) {
log.info(

View File

@ -13,6 +13,7 @@ import org.enso.base.text.CaseFoldedString;
import org.enso.base.text.CaseFoldedString.Grapheme;
import org.enso.base.text.GraphemeSpan;
import org.enso.base.text.Utf16Span;
import org.enso.polyglot.common_utils.Core_Text_Utils;
/** Utils for standard library operations on Text. */
public class Text_Utils {
@ -268,14 +269,8 @@ public class Text_Utils {
* @param str the string to measure
* @return length of the string
*/
private static long grapheme_length(String str) {
BreakIterator iter = BreakIterator.getCharacterInstance();
iter.setText(str);
long len = 0;
while (iter.next() != BreakIterator.DONE) {
len++;
}
return len;
public static long grapheme_length(String str) {
return Core_Text_Utils.computeGraphemeLength(str);
}
/** Returns a prefix of the string not exceeding the provided grapheme length. */
@ -566,4 +561,9 @@ public class Text_Utils {
sb.append(str, current_ix, str.length());
return sb.toString();
}
/** Pretty prints the string, escaping special characters. */
public static String pretty_print(String str) {
return Core_Text_Utils.prettyPrint(str);
}
}

View File

@ -4,6 +4,7 @@ import org.enso.base.time.Date_Time_Utils;
import org.enso.base.time.Date_Utils;
import org.enso.base.time.TimeUtilsBase;
import org.enso.base.time.Time_Of_Day_Utils;
import org.enso.polyglot.common_utils.Core_Date_Utils;
import org.graalvm.polyglot.Value;
import java.time.DateTimeException;
@ -74,10 +75,7 @@ public class Time_Utils {
/** @return default Date Time formatter for parsing a Date_Time. */
public static DateTimeFormatter default_zoned_date_time_formatter() {
return new DateTimeFormatterBuilder().append(DateTimeFormatter.ISO_LOCAL_DATE_TIME)
.optionalStart().parseLenient().appendOffsetId().optionalEnd()
.optionalStart().appendLiteral('[').parseCaseSensitive().appendZoneRegionId().appendLiteral(']')
.toFormatter();
return Core_Date_Utils.defaultZonedDateTimeFormatter();
}
/** @return default Date Time formatter for writing a Date_Time. */
@ -94,13 +92,7 @@ public class Time_Utils {
* @return ISO format date time string
*/
public static String normaliseISODateTime(String dateString) {
if (dateString != null && dateString.length() > 10 && dateString.charAt(10) == ' ') {
var builder = new StringBuilder(dateString);
builder.replace(10, 11, "T");
return builder.toString();
}
return dateString;
return Core_Date_Utils.normaliseISODateTime(dateString);
}
public static String local_date_format(LocalDate date, Object format) {

View File

@ -1,6 +1,7 @@
package org.enso.table.aggregations;
import com.ibm.icu.text.BreakIterator;
import org.enso.base.Text_Utils;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.problems.InvalidAggregation;
@ -26,12 +27,12 @@ public class ShortestOrLongest extends Aggregator {
for (int row : indexes) {
Object value = storage.getItemBoxed(row);
if (value != null) {
if (!(value instanceof String)) {
if (!(value instanceof String asString)) {
this.addProblem(new InvalidAggregation(this.getName(), row, "Not a text value."));
return null;
}
long valueLength = GraphemeLength((String) value);
long valueLength = Text_Utils.grapheme_length(asString);
if (current == null || Long.compare(valueLength, length) == minOrMax) {
length = valueLength;
current = value;
@ -41,16 +42,4 @@ public class ShortestOrLongest extends Aggregator {
return current;
}
private static long GraphemeLength(String text) {
BreakIterator iter = BreakIterator.getCharacterInstance();
iter.setText(text);
int count = 0;
for (int end = iter.next(); end != BreakIterator.DONE; end = iter.next()) {
count++;
}
return count;
}
}

View File

@ -1,5 +1,6 @@
package org.enso.table.data.table;
import org.enso.base.Text_Utils;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.storage.BoolStorage;
@ -40,8 +41,8 @@ public class Column {
throw new IllegalArgumentException("Column name cannot be empty.");
}
if (name.indexOf('\0') >= 0) {
// TODO pretty?
throw new IllegalArgumentException("Column name "+name+" must not contain the NUL character.");
String pretty = Text_Utils.pretty_print(name);
throw new IllegalArgumentException("Column name "+pretty+" must not contain the NUL character.");
}
}