mirror of
https://github.com/enso-org/enso.git
synced 2024-12-22 16:11:45 +03:00
Add a common-polyglot-core-utils
project (#5855)
Adds a common project that allows sharing code between the `runtime` and `std-bits`. Due to classpath separation and the way it is compiled, the classes will be duplicated - we will have one copy for the `runtime` classpath and another copy as a small JAR for `Standard.Base` library. This is still much better than having the code duplicated - now at least we have a single source of truth for the shared implementations. Due to the copying we should not expand this project too much, but I encourage to put here any methods that would otherwise require us to copy the code itself. This may be a good place to put parts of the hashing logic to then allow sharing the logic between the `runtime` and the `MultiValueKey` in the `Table` library (cc: @Akirathan).
This commit is contained in:
parent
725841467f
commit
263c3ad651
28
build.sbt
28
build.sbt
@ -1504,6 +1504,7 @@ lazy val runtime = (project in file("engine/runtime"))
|
||||
}.evaluated,
|
||||
Benchmark / parallelExecution := false
|
||||
)
|
||||
.dependsOn(`common-polyglot-core-utils`)
|
||||
.dependsOn(`runtime-language-epb`)
|
||||
.dependsOn(`edition-updater`)
|
||||
.dependsOn(`interpreter-dsl`)
|
||||
@ -2025,22 +2026,37 @@ lazy val `std-base` = project
|
||||
Compile / packageBin / artifactPath :=
|
||||
`base-polyglot-root` / "std-base.jar",
|
||||
libraryDependencies ++= Seq(
|
||||
"com.ibm.icu" % "icu4j" % icuVersion,
|
||||
"org.graalvm.truffle" % "truffle-api" % graalVersion % "provided",
|
||||
"org.netbeans.api" % "org-openide-util-lookup" % netbeansApiVersion % "provided"
|
||||
),
|
||||
Compile / packageBin := Def.task {
|
||||
val result = (Compile / packageBin).value
|
||||
val _ensureCoreIsCompiled =
|
||||
(`common-polyglot-core-utils` / Compile / packageBin).value
|
||||
val _ = StdBits
|
||||
.copyDependencies(
|
||||
`base-polyglot-root`,
|
||||
Some("std-base.jar"),
|
||||
Seq("std-base.jar", "common-polyglot-core-utils.jar"),
|
||||
ignoreScalaLibrary = true
|
||||
)
|
||||
.value
|
||||
result
|
||||
}.value
|
||||
)
|
||||
.dependsOn(`common-polyglot-core-utils`)
|
||||
|
||||
lazy val `common-polyglot-core-utils` = project
|
||||
.in(file("lib/scala/common-polyglot-core-utils"))
|
||||
.settings(
|
||||
frgaalJavaCompilerSetting,
|
||||
autoScalaLibrary := false,
|
||||
Compile / packageBin / artifactPath :=
|
||||
`base-polyglot-root` / "common-polyglot-core-utils.jar",
|
||||
libraryDependencies ++= Seq(
|
||||
"com.ibm.icu" % "icu4j" % icuVersion,
|
||||
"org.graalvm.truffle" % "truffle-api" % graalVersion % "provided"
|
||||
)
|
||||
)
|
||||
|
||||
lazy val `enso-test-java-helpers` = project
|
||||
.in(file("test/Tests/polyglot-sources/enso-test-java-helpers"))
|
||||
@ -2093,7 +2109,7 @@ lazy val `std-table` = project
|
||||
val _ = StdBits
|
||||
.copyDependencies(
|
||||
`table-polyglot-root`,
|
||||
Some("std-table.jar"),
|
||||
Seq("std-table.jar"),
|
||||
ignoreScalaLibrary = true
|
||||
)
|
||||
.value
|
||||
@ -2118,7 +2134,7 @@ lazy val `std-image` = project
|
||||
val _ = StdBits
|
||||
.copyDependencies(
|
||||
`image-polyglot-root`,
|
||||
Some("std-image.jar"),
|
||||
Seq("std-image.jar"),
|
||||
ignoreScalaLibrary = true
|
||||
)
|
||||
.value
|
||||
@ -2143,7 +2159,7 @@ lazy val `std-google-api` = project
|
||||
val _ = StdBits
|
||||
.copyDependencies(
|
||||
`google-api-polyglot-root`,
|
||||
Some("std-google-api.jar"),
|
||||
Seq("std-google-api.jar"),
|
||||
ignoreScalaLibrary = true
|
||||
)
|
||||
.value
|
||||
@ -2172,7 +2188,7 @@ lazy val `std-database` = project
|
||||
val _ = StdBits
|
||||
.copyDependencies(
|
||||
`database-polyglot-root`,
|
||||
Some("std-database.jar"),
|
||||
Seq("std-database.jar"),
|
||||
ignoreScalaLibrary = true
|
||||
)
|
||||
.value
|
||||
|
@ -14,12 +14,12 @@ import java.time.LocalTime;
|
||||
import java.time.ZoneId;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeFormatterBuilder;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import org.enso.interpreter.dsl.Builtin;
|
||||
import org.enso.interpreter.runtime.EnsoContext;
|
||||
import org.enso.interpreter.runtime.data.text.Text;
|
||||
import org.enso.interpreter.runtime.library.dispatch.TypesLibrary;
|
||||
import org.enso.polyglot.common_utils.Core_Date_Utils;
|
||||
|
||||
@ExportLibrary(InteropLibrary.class)
|
||||
@ExportLibrary(TypesLibrary.class)
|
||||
@ -71,12 +71,7 @@ public final class EnsoDateTime implements TruffleObject {
|
||||
@Builtin.WrapException(from = DateTimeParseException.class)
|
||||
@CompilerDirectives.TruffleBoundary
|
||||
public static EnsoDateTime parse(String text) {
|
||||
String iso = text;
|
||||
if (text != null && text.length() > 10 && text.charAt(10) == ' ') {
|
||||
var builder = new StringBuilder(iso);
|
||||
builder.replace(10, 11, "T");
|
||||
iso = builder.toString();
|
||||
}
|
||||
String iso = Core_Date_Utils.normaliseISODateTime(text);
|
||||
|
||||
var datetime = DATE_TIME_FORMATTER.parseBest(iso, ZonedDateTime::from, LocalDateTime::from);
|
||||
if (datetime instanceof ZonedDateTime zdt) {
|
||||
@ -273,17 +268,5 @@ public final class EnsoDateTime implements TruffleObject {
|
||||
private static final EnsoDateTime epochStart =
|
||||
EnsoDateTime.create(1582, 10, 15, 0, 0, 0, 0, EnsoTimeZone.parse("UTC"));
|
||||
|
||||
private static final DateTimeFormatter DATE_TIME_FORMATTER =
|
||||
new DateTimeFormatterBuilder()
|
||||
.append(DateTimeFormatter.ISO_LOCAL_DATE_TIME)
|
||||
.optionalStart()
|
||||
.parseLenient()
|
||||
.appendOffsetId()
|
||||
.optionalEnd()
|
||||
.optionalStart()
|
||||
.appendLiteral('[')
|
||||
.parseCaseSensitive()
|
||||
.appendZoneRegionId()
|
||||
.appendLiteral(']')
|
||||
.toFormatter();
|
||||
private static final DateTimeFormatter DATE_TIME_FORMATTER = Core_Date_Utils.defaultZonedDateTimeFormatter();
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
package org.enso.interpreter.runtime.data.text;
|
||||
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
import com.oracle.truffle.api.CompilerDirectives;
|
||||
import com.oracle.truffle.api.dsl.Cached;
|
||||
@ -19,6 +18,7 @@ import org.enso.interpreter.runtime.library.dispatch.TypesLibrary;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import org.enso.interpreter.dsl.Builtin;
|
||||
import org.enso.polyglot.common_utils.Core_Text_Utils;
|
||||
|
||||
/** The main runtime type for Enso's Text. */
|
||||
@ExportLibrary(InteropLibrary.class)
|
||||
@ -178,13 +178,7 @@ public final class Text implements TruffleObject {
|
||||
|
||||
@CompilerDirectives.TruffleBoundary
|
||||
private int computeLength() {
|
||||
BreakIterator iter = BreakIterator.getCharacterInstance();
|
||||
iter.setText(toString());
|
||||
int len = 0;
|
||||
while (iter.next() != BreakIterator.DONE) {
|
||||
len++;
|
||||
}
|
||||
return len;
|
||||
return Core_Text_Utils.computeGraphemeLength(toString());
|
||||
}
|
||||
|
||||
@CompilerDirectives.TruffleBoundary
|
||||
@ -193,37 +187,7 @@ public final class Text implements TruffleObject {
|
||||
boolean allowSideEffects,
|
||||
@Cached("build()") @Cached.Shared("strings") ToJavaStringNode toJavaStringNode) {
|
||||
String str = toJavaStringNode.execute(this);
|
||||
int len = str.length();
|
||||
int outputLength = len + 2; // Precise if there are no special characters.
|
||||
|
||||
// TODO This should be more extensible; while it's still a small fixed set,
|
||||
// a switch is probably fastest (unconfirmed)
|
||||
|
||||
StringBuffer strBuf = new StringBuffer(outputLength);
|
||||
|
||||
strBuf.append('\'');
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
char c = str.charAt(i);
|
||||
switch (c) {
|
||||
case '\\' -> strBuf.append("\\\\");
|
||||
case '\'' -> strBuf.append("\\'");
|
||||
case '\n' -> strBuf.append("\\n");
|
||||
case '\t' -> strBuf.append("\\t");
|
||||
case '\0' -> strBuf.append("\\0");
|
||||
case '\u0007' -> strBuf.append("\\a");
|
||||
case '\u0008' -> strBuf.append("\\b");
|
||||
case '\u000c' -> strBuf.append("\\f");
|
||||
case '\r' -> strBuf.append("\\r");
|
||||
case '\u000B' -> strBuf.append("\\v");
|
||||
case '\u001B' -> strBuf.append("\\e");
|
||||
default -> strBuf.append(c);
|
||||
}
|
||||
}
|
||||
|
||||
strBuf.append('\'');
|
||||
|
||||
return strBuf.toString();
|
||||
return Core_Text_Utils.prettyPrint(str);
|
||||
}
|
||||
|
||||
@ExportMessage
|
||||
|
@ -0,0 +1,29 @@
|
||||
package org.enso.polyglot.common_utils;
|
||||
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeFormatterBuilder;
|
||||
|
||||
public class Core_Date_Utils {
|
||||
/**
|
||||
* Replace space with T in ISO date time string to make it compatible with ISO format.
|
||||
* @param dateString Raw date time string with either space or T as separator
|
||||
* @return ISO format date time string
|
||||
*/
|
||||
public static String normaliseISODateTime(String dateString) {
|
||||
if (dateString != null && dateString.length() > 10 && dateString.charAt(10) == ' ') {
|
||||
var builder = new StringBuilder(dateString);
|
||||
builder.replace(10, 11, "T");
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
return dateString;
|
||||
}
|
||||
|
||||
/** @return default Date Time formatter for parsing a Date_Time. */
|
||||
public static DateTimeFormatter defaultZonedDateTimeFormatter() {
|
||||
return new DateTimeFormatterBuilder().append(DateTimeFormatter.ISO_LOCAL_DATE_TIME)
|
||||
.optionalStart().parseLenient().appendOffsetId().optionalEnd()
|
||||
.optionalStart().appendLiteral('[').parseCaseSensitive().appendZoneRegionId().appendLiteral(']')
|
||||
.toFormatter();
|
||||
}
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
package org.enso.polyglot.common_utils;
|
||||
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
|
||||
public class Core_Text_Utils {
|
||||
/** Computes the length of the string as the number of grapheme clusters it contains. */
|
||||
public static int computeGraphemeLength(String text) {
|
||||
BreakIterator iter = BreakIterator.getCharacterInstance();
|
||||
iter.setText(text);
|
||||
int len = 0;
|
||||
while (iter.next() != BreakIterator.DONE) {
|
||||
len++;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/** Pretty prints the string, escaping special characters. */
|
||||
public static String prettyPrint(String str) {
|
||||
int len = str.length();
|
||||
int outputLength = len + 2; // Precise if there are no special characters.
|
||||
|
||||
// TODO This should be more extensible; while it's still a small fixed set,
|
||||
// a switch is probably fastest (unconfirmed)
|
||||
|
||||
StringBuilder sb = new StringBuilder(outputLength);
|
||||
|
||||
sb.append('\'');
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
char c = str.charAt(i);
|
||||
switch (c) {
|
||||
case '\\' -> sb.append("\\\\");
|
||||
case '\'' -> sb.append("\\'");
|
||||
case '\n' -> sb.append("\\n");
|
||||
case '\t' -> sb.append("\\t");
|
||||
case '\0' -> sb.append("\\0");
|
||||
case '\u0007' -> sb.append("\\a");
|
||||
case '\u0008' -> sb.append("\\b");
|
||||
case '\u000c' -> sb.append("\\f");
|
||||
case '\r' -> sb.append("\\r");
|
||||
case '\u000B' -> sb.append("\\v");
|
||||
case '\u001B' -> sb.append("\\e");
|
||||
default -> sb.append(c);
|
||||
}
|
||||
}
|
||||
|
||||
sb.append('\'');
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
/**
|
||||
* This package contains common utilities which can be used both by the engine runtime and the libraries.
|
||||
* <p>
|
||||
* This allows us to avoid duplicating code between the runtime and library projects for operations that need to be
|
||||
* accessible on both sides.
|
||||
* <p>
|
||||
* The utilities that belong here are mostly operations that are builtins of the Enso language but also need to be used
|
||||
* from our Java libraries where the cost of calling back to Enso methods is relatively high, so accessing the Java
|
||||
* implementations directly is desirable. The primary example of that is the algorithm for computing the length of a
|
||||
* string by counting the grapheme clusters.
|
||||
* <p>
|
||||
* Due to classpath separation, the class files of this package will be duplicated with one copy embedded in the engine
|
||||
* and another attached as `common-polyglot-core-utils.jar` placed in the `polyglot` directory of the Standard.Base
|
||||
* library. But it allows us to avoid duplicating the code, so we can have a single source of truth for each
|
||||
* implementation.
|
||||
* <p>
|
||||
* Due to the copying, the project should not be expanded too much, but all utilities which would end up being
|
||||
* duplicated are best moved here.
|
||||
*/
|
||||
package org.enso.polyglot.common_utils;
|
@ -12,16 +12,17 @@ object StdBits {
|
||||
* directory.
|
||||
*
|
||||
* @param destination location where to put the dependencies
|
||||
* @param baseJarName name of the base generated JAR (if any); unexpected
|
||||
* (old) files are removed, so this task needs to know
|
||||
* this file's name to avoid removing it
|
||||
* @param providedJarNames name of JARs generated by the local projects;
|
||||
* unexpected (old) files are removed, so this task
|
||||
* needs to know these files' names to avoid removing
|
||||
* them
|
||||
* @param ignoreScalaLibrary whether to ignore Scala dependencies that are
|
||||
* added by default be SBT and are not relevant in
|
||||
* pure-Java projects
|
||||
*/
|
||||
def copyDependencies(
|
||||
destination: File,
|
||||
baseJarName: Option[String],
|
||||
providedJarNames: Seq[String],
|
||||
ignoreScalaLibrary: Boolean
|
||||
): Def.Initialize[Task[Unit]] =
|
||||
Def.task {
|
||||
@ -50,7 +51,7 @@ object StdBits {
|
||||
Tracked.diffInputs(dependencyStore, FileInfo.hash)(relevantFiles.toSet) {
|
||||
report =>
|
||||
val expectedFileNames =
|
||||
report.checked.map(file => file.getName) ++ baseJarName.toSeq
|
||||
report.checked.map(file => file.getName) ++ providedJarNames
|
||||
for (existing <- IO.listFiles(destination)) {
|
||||
if (!expectedFileNames.contains(existing.getName)) {
|
||||
log.info(
|
||||
|
@ -13,6 +13,7 @@ import org.enso.base.text.CaseFoldedString;
|
||||
import org.enso.base.text.CaseFoldedString.Grapheme;
|
||||
import org.enso.base.text.GraphemeSpan;
|
||||
import org.enso.base.text.Utf16Span;
|
||||
import org.enso.polyglot.common_utils.Core_Text_Utils;
|
||||
|
||||
/** Utils for standard library operations on Text. */
|
||||
public class Text_Utils {
|
||||
@ -268,14 +269,8 @@ public class Text_Utils {
|
||||
* @param str the string to measure
|
||||
* @return length of the string
|
||||
*/
|
||||
private static long grapheme_length(String str) {
|
||||
BreakIterator iter = BreakIterator.getCharacterInstance();
|
||||
iter.setText(str);
|
||||
long len = 0;
|
||||
while (iter.next() != BreakIterator.DONE) {
|
||||
len++;
|
||||
}
|
||||
return len;
|
||||
public static long grapheme_length(String str) {
|
||||
return Core_Text_Utils.computeGraphemeLength(str);
|
||||
}
|
||||
|
||||
/** Returns a prefix of the string not exceeding the provided grapheme length. */
|
||||
@ -566,4 +561,9 @@ public class Text_Utils {
|
||||
sb.append(str, current_ix, str.length());
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/** Pretty prints the string, escaping special characters. */
|
||||
public static String pretty_print(String str) {
|
||||
return Core_Text_Utils.prettyPrint(str);
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ import org.enso.base.time.Date_Time_Utils;
|
||||
import org.enso.base.time.Date_Utils;
|
||||
import org.enso.base.time.TimeUtilsBase;
|
||||
import org.enso.base.time.Time_Of_Day_Utils;
|
||||
import org.enso.polyglot.common_utils.Core_Date_Utils;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
import java.time.DateTimeException;
|
||||
@ -74,10 +75,7 @@ public class Time_Utils {
|
||||
|
||||
/** @return default Date Time formatter for parsing a Date_Time. */
|
||||
public static DateTimeFormatter default_zoned_date_time_formatter() {
|
||||
return new DateTimeFormatterBuilder().append(DateTimeFormatter.ISO_LOCAL_DATE_TIME)
|
||||
.optionalStart().parseLenient().appendOffsetId().optionalEnd()
|
||||
.optionalStart().appendLiteral('[').parseCaseSensitive().appendZoneRegionId().appendLiteral(']')
|
||||
.toFormatter();
|
||||
return Core_Date_Utils.defaultZonedDateTimeFormatter();
|
||||
}
|
||||
|
||||
/** @return default Date Time formatter for writing a Date_Time. */
|
||||
@ -94,13 +92,7 @@ public class Time_Utils {
|
||||
* @return ISO format date time string
|
||||
*/
|
||||
public static String normaliseISODateTime(String dateString) {
|
||||
if (dateString != null && dateString.length() > 10 && dateString.charAt(10) == ' ') {
|
||||
var builder = new StringBuilder(dateString);
|
||||
builder.replace(10, 11, "T");
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
return dateString;
|
||||
return Core_Date_Utils.normaliseISODateTime(dateString);
|
||||
}
|
||||
|
||||
public static String local_date_format(LocalDate date, Object format) {
|
||||
|
@ -1,6 +1,7 @@
|
||||
package org.enso.table.aggregations;
|
||||
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import org.enso.base.Text_Utils;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.problems.InvalidAggregation;
|
||||
@ -26,12 +27,12 @@ public class ShortestOrLongest extends Aggregator {
|
||||
for (int row : indexes) {
|
||||
Object value = storage.getItemBoxed(row);
|
||||
if (value != null) {
|
||||
if (!(value instanceof String)) {
|
||||
if (!(value instanceof String asString)) {
|
||||
this.addProblem(new InvalidAggregation(this.getName(), row, "Not a text value."));
|
||||
return null;
|
||||
}
|
||||
|
||||
long valueLength = GraphemeLength((String) value);
|
||||
long valueLength = Text_Utils.grapheme_length(asString);
|
||||
if (current == null || Long.compare(valueLength, length) == minOrMax) {
|
||||
length = valueLength;
|
||||
current = value;
|
||||
@ -41,16 +42,4 @@ public class ShortestOrLongest extends Aggregator {
|
||||
|
||||
return current;
|
||||
}
|
||||
|
||||
private static long GraphemeLength(String text) {
|
||||
BreakIterator iter = BreakIterator.getCharacterInstance();
|
||||
iter.setText(text);
|
||||
|
||||
int count = 0;
|
||||
for (int end = iter.next(); end != BreakIterator.DONE; end = iter.next()) {
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
package org.enso.table.data.table;
|
||||
|
||||
import org.enso.base.Text_Utils;
|
||||
import org.enso.base.polyglot.Polyglot_Utils;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
import org.enso.table.data.column.storage.BoolStorage;
|
||||
@ -40,8 +41,8 @@ public class Column {
|
||||
throw new IllegalArgumentException("Column name cannot be empty.");
|
||||
}
|
||||
if (name.indexOf('\0') >= 0) {
|
||||
// TODO pretty?
|
||||
throw new IllegalArgumentException("Column name "+name+" must not contain the NUL character.");
|
||||
String pretty = Text_Utils.pretty_print(name);
|
||||
throw new IllegalArgumentException("Column name "+pretty+" must not contain the NUL character.");
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user