mirror of
https://github.com/enso-org/enso.git
synced 2025-01-05 15:42:52 +03:00
Add benchmarks comparing performance of Table operations 'vectorized' in Java vs performed in Enso (#7270)
The added benchmark is a basis for a performance investigation. We compare the performance of the same operation run in Java vs Enso to see what is the overhead and try to get the Enso operations closer to the pure-Java performance.
This commit is contained in:
parent
3e3b823620
commit
56635c9a88
26
build.sbt
26
build.sbt
@ -303,7 +303,8 @@ lazy val enso = (project in file("."))
|
|||||||
`std-table`,
|
`std-table`,
|
||||||
`std-aws`,
|
`std-aws`,
|
||||||
`simple-httpbin`,
|
`simple-httpbin`,
|
||||||
`enso-test-java-helpers`
|
`enso-test-java-helpers`,
|
||||||
|
`exploratory-benchmark-java-helpers`
|
||||||
)
|
)
|
||||||
.settings(Global / concurrentRestrictions += Tags.exclusive(Exclusive))
|
.settings(Global / concurrentRestrictions += Tags.exclusive(Exclusive))
|
||||||
.settings(
|
.settings(
|
||||||
@ -1359,6 +1360,7 @@ lazy val runtime = (project in file("engine/runtime"))
|
|||||||
(Runtime / compile) := (Runtime / compile)
|
(Runtime / compile) := (Runtime / compile)
|
||||||
.dependsOn(`std-base` / Compile / packageBin)
|
.dependsOn(`std-base` / Compile / packageBin)
|
||||||
.dependsOn(`enso-test-java-helpers` / Compile / packageBin)
|
.dependsOn(`enso-test-java-helpers` / Compile / packageBin)
|
||||||
|
.dependsOn(`exploratory-benchmark-java-helpers` / Compile / packageBin)
|
||||||
.dependsOn(`std-image` / Compile / packageBin)
|
.dependsOn(`std-image` / Compile / packageBin)
|
||||||
.dependsOn(`std-database` / Compile / packageBin)
|
.dependsOn(`std-database` / Compile / packageBin)
|
||||||
.dependsOn(`std-google-api` / Compile / packageBin)
|
.dependsOn(`std-google-api` / Compile / packageBin)
|
||||||
@ -2017,6 +2019,26 @@ lazy val `enso-test-java-helpers` = project
|
|||||||
.dependsOn(`std-base` % "provided")
|
.dependsOn(`std-base` % "provided")
|
||||||
.dependsOn(`std-table` % "provided")
|
.dependsOn(`std-table` % "provided")
|
||||||
|
|
||||||
|
lazy val `exploratory-benchmark-java-helpers` = project
|
||||||
|
.in(
|
||||||
|
file(
|
||||||
|
"test/Exploratory_Benchmarks/polyglot-sources/exploratory-benchmark-java-helpers"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.settings(
|
||||||
|
frgaalJavaCompilerSetting,
|
||||||
|
autoScalaLibrary := false,
|
||||||
|
Compile / packageBin / artifactPath :=
|
||||||
|
file(
|
||||||
|
"test/Exploratory_Benchmarks/polyglot/java/exploratory-benchmark-java-helpers.jar"
|
||||||
|
),
|
||||||
|
libraryDependencies ++= Seq(
|
||||||
|
"org.graalvm.sdk" % "graal-sdk" % graalMavenPackagesVersion % "provided"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.dependsOn(`std-base` % "provided")
|
||||||
|
.dependsOn(`std-table` % "provided")
|
||||||
|
|
||||||
lazy val `std-table` = project
|
lazy val `std-table` = project
|
||||||
.in(file("std-bits") / "table")
|
.in(file("std-bits") / "table")
|
||||||
.enablePlugins(Antlr4Plugin)
|
.enablePlugins(Antlr4Plugin)
|
||||||
@ -2340,11 +2362,13 @@ pkgStdLibInternal := Def.inputTask {
|
|||||||
(`std-table` / Compile / packageBin).value
|
(`std-table` / Compile / packageBin).value
|
||||||
case "TestHelpers" =>
|
case "TestHelpers" =>
|
||||||
(`enso-test-java-helpers` / Compile / packageBin).value
|
(`enso-test-java-helpers` / Compile / packageBin).value
|
||||||
|
(`exploratory-benchmark-java-helpers` / Compile / packageBin).value
|
||||||
case "AWS" =>
|
case "AWS" =>
|
||||||
(`std-aws` / Compile / packageBin).value
|
(`std-aws` / Compile / packageBin).value
|
||||||
case _ if buildAllCmd =>
|
case _ if buildAllCmd =>
|
||||||
(`std-base` / Compile / packageBin).value
|
(`std-base` / Compile / packageBin).value
|
||||||
(`enso-test-java-helpers` / Compile / packageBin).value
|
(`enso-test-java-helpers` / Compile / packageBin).value
|
||||||
|
(`exploratory-benchmark-java-helpers` / Compile / packageBin).value
|
||||||
(`std-table` / Compile / packageBin).value
|
(`std-table` / Compile / packageBin).value
|
||||||
(`std-database` / Compile / packageBin).value
|
(`std-database` / Compile / packageBin).value
|
||||||
(`std-image` / Compile / packageBin).value
|
(`std-image` / Compile / packageBin).value
|
||||||
|
@ -47,8 +47,9 @@ type Column
|
|||||||
Column.from_vector "My Column" [1, 2, 3, 4, 5]
|
Column.from_vector "My Column" [1, 2, 3, 4, 5]
|
||||||
from_vector : Text -> Vector -> Column
|
from_vector : Text -> Vector -> Column
|
||||||
from_vector name items =
|
from_vector name items =
|
||||||
|
expected_storage_type = Nothing
|
||||||
Illegal_Argument.handle_java_exception <|
|
Illegal_Argument.handle_java_exception <|
|
||||||
Column.Value (Java_Column.fromItems name items)
|
Column.Value (Java_Column.fromItems name items expected_storage_type)
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Creates a new column given a name and an internal Java storage.
|
Creates a new column given a name and an internal Java storage.
|
||||||
|
@ -70,6 +70,15 @@ type Bench
|
|||||||
- label: A name for the measurement.
|
- label: A name for the measurement.
|
||||||
- iter_size: The number of runs per iteration.
|
- iter_size: The number of runs per iteration.
|
||||||
- num_iters: The number of iterations per measurement.
|
- num_iters: The number of iterations per measurement.
|
||||||
|
- run_gc_between_iterations: Whether to try running the garbage collector
|
||||||
|
between iterations. Defaults to False. This is helpful when testing
|
||||||
|
memory intensive operations, to ensure that GC runs between iterations
|
||||||
|
and not _during_ iterations. The time taken to run the requested
|
||||||
|
garbage collection will not be counted into the iteration time, however
|
||||||
|
there is no guarantee that the JVM will actually accept the GC hint and
|
||||||
|
it is still possible the JVM may run GC during an iteration. But
|
||||||
|
setting this option to True should make it less likely for GC to
|
||||||
|
interrupt measurements.
|
||||||
|
|
||||||
> Example
|
> Example
|
||||||
Measure a computation called "foo" with an iteration size of 2 and a number
|
Measure a computation called "foo" with an iteration size of 2 and a number
|
||||||
@ -80,8 +89,8 @@ type Bench
|
|||||||
|
|
||||||
example_measure =
|
example_measure =
|
||||||
Bench.measure Examples.get_boolean "foo" iter_size=2 num_iters=1
|
Bench.measure Examples.get_boolean "foo" iter_size=2 num_iters=1
|
||||||
measure : Any -> Text -> Integer -> Integer -> Nothing
|
measure : Any -> Text -> Integer -> Integer -> Boolean -> Nothing
|
||||||
measure ~act label iter_size num_iters =
|
measure ~act label iter_size num_iters run_gc_between_iterations=False =
|
||||||
dry_run = Environment.get "ENSO_BENCHMARK_TEST_DRY_RUN" "False" == "True"
|
dry_run = Environment.get "ENSO_BENCHMARK_TEST_DRY_RUN" "False" == "True"
|
||||||
result = Ref.new 0.0
|
result = Ref.new 0.0
|
||||||
single_call = _ ->
|
single_call = _ ->
|
||||||
@ -90,6 +99,8 @@ type Bench
|
|||||||
x2 = System.nano_time
|
x2 = System.nano_time
|
||||||
x2 - x1
|
x2 - x1
|
||||||
iteration = it_size -> it_num ->
|
iteration = it_size -> it_num ->
|
||||||
|
if run_gc_between_iterations then
|
||||||
|
Runtime.gc
|
||||||
act_it_num = num_iters - it_num
|
act_it_num = num_iters - it_num
|
||||||
res = times it_size single_call
|
res = times it_size single_call
|
||||||
avg = avg_list res
|
avg = avg_list res
|
||||||
|
@ -2,9 +2,11 @@ package org.enso.table.data.table;
|
|||||||
|
|
||||||
import org.enso.base.Text_Utils;
|
import org.enso.base.Text_Utils;
|
||||||
import org.enso.base.polyglot.Polyglot_Utils;
|
import org.enso.base.polyglot.Polyglot_Utils;
|
||||||
|
import org.enso.table.data.column.builder.Builder;
|
||||||
import org.enso.table.data.column.builder.InferredBuilder;
|
import org.enso.table.data.column.builder.InferredBuilder;
|
||||||
import org.enso.table.data.column.storage.BoolStorage;
|
import org.enso.table.data.column.storage.BoolStorage;
|
||||||
import org.enso.table.data.column.storage.Storage;
|
import org.enso.table.data.column.storage.Storage;
|
||||||
|
import org.enso.table.data.column.storage.type.StorageType;
|
||||||
import org.enso.table.data.index.DefaultIndex;
|
import org.enso.table.data.index.DefaultIndex;
|
||||||
import org.enso.table.data.index.Index;
|
import org.enso.table.data.index.Index;
|
||||||
import org.enso.table.data.mask.OrderMask;
|
import org.enso.table.data.mask.OrderMask;
|
||||||
@ -116,18 +118,13 @@ public class Column {
|
|||||||
return new Column(name, storage);
|
return new Column(name, storage);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** Creates a column from an Enso array, ensuring Enso dates are converted to Java dates. */
|
||||||
* Creates a new column with given name and elements.
|
public static Column fromItems(String name, List<Value> items, StorageType expectedType) throws ClassCastException {
|
||||||
*
|
|
||||||
* @param name the name to use
|
|
||||||
* @param items the items contained in the column
|
|
||||||
* @return a column with given name and items
|
|
||||||
*/
|
|
||||||
public static Column fromItems(String name, List<Value> items) {
|
|
||||||
Context context = Context.getCurrent();
|
Context context = Context.getCurrent();
|
||||||
InferredBuilder builder = new InferredBuilder(items.size());
|
int n = items.size();
|
||||||
|
Builder builder = expectedType == null ? new InferredBuilder(n) : Builder.getForType(expectedType, n);
|
||||||
|
|
||||||
// ToDo: This a workaround for an issue with polyglot layer. #5590 is related.
|
// ToDo: This a workaround for an issue with polyglot layer. #5590 is related.
|
||||||
// to revert replace with: for (Value item : items) {
|
|
||||||
for (Object item : items) {
|
for (Object item : items) {
|
||||||
if (item instanceof Value v) {
|
if (item instanceof Value v) {
|
||||||
Object converted = Polyglot_Utils.convertPolyglotValue(v);
|
Object converted = Polyglot_Utils.convertPolyglotValue(v);
|
||||||
@ -142,6 +139,20 @@ public class Column {
|
|||||||
return new Column(name, storage);
|
return new Column(name, storage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Creates a column from an Enso array. No polyglot conversion happens. This is unsafe */
|
||||||
|
public static Column fromItemsNoDateConversion(String name, List<Object> items, StorageType expectedType) throws ClassCastException {
|
||||||
|
Context context = Context.getCurrent();
|
||||||
|
int n = items.size();
|
||||||
|
Builder builder = expectedType == null ? new InferredBuilder(n) : Builder.getForType(expectedType, n);
|
||||||
|
|
||||||
|
for (Object item : items) {
|
||||||
|
builder.appendNoGrow(item);
|
||||||
|
context.safepoint();
|
||||||
|
}
|
||||||
|
var storage = builder.seal();
|
||||||
|
return new Column(name, storage);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new column with given name and elements.
|
* Creates a new column with given name and elements.
|
||||||
*
|
*
|
||||||
@ -155,7 +166,7 @@ public class Column {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (repeat == 1) {
|
if (repeat == 1) {
|
||||||
return fromItems(name, items);
|
return fromItems(name, items, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
Context context = Context.getCurrent();
|
Context context = Context.getCurrent();
|
||||||
|
52
test/Exploratory_Benchmarks/README.md
Normal file
52
test/Exploratory_Benchmarks/README.md
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# Exploring Table operation performance
|
||||||
|
|
||||||
|
These benchmarks are used to compare various approaches to computing operations
|
||||||
|
on Table columns, to find out what best practices should we use for these and
|
||||||
|
find venues for optimization of the language and Table implementation.
|
||||||
|
|
||||||
|
These benchmarks are not meant to be used for tracking performance of the
|
||||||
|
current implementation itself. That is supposed to be done by another project -
|
||||||
|
`Table_Benchmarks`.
|
||||||
|
|
||||||
|
## Structure
|
||||||
|
|
||||||
|
Currently, the benchmarks are split into a few files, each exploring some
|
||||||
|
separate topic, like mapping a single column, combining two columns with some
|
||||||
|
operation, or computing an aggregate operation over a column. In each file,
|
||||||
|
there may be a few Enso types, each representing a separate benchmark. Usually,
|
||||||
|
we have two benchmarks for each operation type - one dealing with a primitive
|
||||||
|
value type like integers (`long` in the Java side) and another dealing with a
|
||||||
|
reference type like `String` or `Date`. We expect the performance
|
||||||
|
characteristics between these may differ, e.g. because Java allows to use `long`
|
||||||
|
without boxing, so we compare them separately.
|
||||||
|
|
||||||
|
Each Enso type for a given benchmark contains multiple methods which represent
|
||||||
|
various 'approaches' to computing the same operation.
|
||||||
|
|
||||||
|
Each benchmark run has a name that consists of the type it defines it, a dot and
|
||||||
|
the method representing the particular approach, e.g.
|
||||||
|
`Boxed_Map_Test.enso_map_as_vector`.
|
||||||
|
|
||||||
|
## Running
|
||||||
|
|
||||||
|
The runner is very simple. If any options are to be customized, the Enso file
|
||||||
|
itself needs to be modified. One can run the whole project to run all the
|
||||||
|
benchmarks, or run only a specific file.
|
||||||
|
|
||||||
|
## Analysis
|
||||||
|
|
||||||
|
The output of the benchmarks should be saved to a file. Then that file can be
|
||||||
|
loaded using the Enso workflow in `tools/performance/benchmark-analysis`.
|
||||||
|
|
||||||
|
The workflow is tuned to analysing these comparative benchmarks.
|
||||||
|
|
||||||
|
At the top, one can select which file is to be analyzed. Below there is a
|
||||||
|
dropdown allowing to select one particular benchmark (represented by the type,
|
||||||
|
e.g. `Boxed_Map_Test`). With that selected, one can display a scatter plot
|
||||||
|
visualization comparing various approaches of that one given benchmark. On the
|
||||||
|
plot we can see runtimes of subsequent iterations. Later, we drop the first 40
|
||||||
|
iterations (the number can easily be customized in the workflow) to ensure
|
||||||
|
sufficient warm-up for each benchmark. Then a table is displayed computing the
|
||||||
|
average runtime of each approach and how they compare relative to each other - a
|
||||||
|
dropdown allows to select one benchmark that will be used as a reference point
|
||||||
|
(100%) for the average runtime comparison.
|
6
test/Exploratory_Benchmarks/package.yaml
Normal file
6
test/Exploratory_Benchmarks/package.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
name: Exploratory_Benchmarks
|
||||||
|
enso-version: default
|
||||||
|
version: 0.0.1
|
||||||
|
license: MIT
|
||||||
|
author: enso-dev@enso.org
|
||||||
|
maintainer: enso-dev@enso.org
|
@ -0,0 +1,109 @@
|
|||||||
|
package org.enso.exploratory_benchmark_helpers;
|
||||||
|
|
||||||
|
import java.util.BitSet;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import org.enso.base.Text_Utils;
|
||||||
|
import org.enso.table.data.column.builder.Builder;
|
||||||
|
import org.enso.table.data.column.builder.InferredBuilder;
|
||||||
|
import org.enso.table.data.column.storage.BoolStorage;
|
||||||
|
import org.enso.table.data.column.storage.Storage;
|
||||||
|
import org.enso.table.data.column.storage.StringStorage;
|
||||||
|
import org.enso.table.data.column.storage.datetime.DateStorage;
|
||||||
|
import org.enso.table.data.column.storage.numeric.LongStorage;
|
||||||
|
import org.enso.table.data.column.storage.type.StorageType;
|
||||||
|
|
||||||
|
public class MapHelpers {
|
||||||
|
public static StringStorage stringConcatBimap(StringStorage storage1, StringStorage storage2) {
|
||||||
|
if (storage1.size() != storage2.size()) {
|
||||||
|
throw new IllegalArgumentException("Storage sizes must match");
|
||||||
|
}
|
||||||
|
|
||||||
|
int n = storage1.size();
|
||||||
|
String[] result = new String[n];
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (!storage1.isNa(i) && !storage2.isNa(i)) {
|
||||||
|
result[i] = storage1.getItem(i) + storage2.getItem(i);
|
||||||
|
} else {
|
||||||
|
result[i] = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new StringStorage(result, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static LongStorage longAddBimap(LongStorage storage1, LongStorage storage2) {
|
||||||
|
if (storage1.size() != storage2.size()) {
|
||||||
|
throw new IllegalArgumentException("Storage sizes must match");
|
||||||
|
}
|
||||||
|
|
||||||
|
int n = storage1.size();
|
||||||
|
long[] result = new long[n];
|
||||||
|
BitSet missing = new BitSet();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (!storage1.isNa(i) && !storage2.isNa(i)) {
|
||||||
|
result[i] = storage1.getItem(i) + storage2.getItem(i);
|
||||||
|
} else {
|
||||||
|
missing.set(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new LongStorage(result, n, missing);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static BoolStorage textEndsWith(StringStorage storage, String suffix) {
|
||||||
|
int n = storage.size();
|
||||||
|
BitSet result = new BitSet();
|
||||||
|
BitSet missing = new BitSet();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (storage.isNa(i)) {
|
||||||
|
missing.set(i);
|
||||||
|
} else {
|
||||||
|
if (Text_Utils.ends_with(storage.getItem(i), suffix)) {
|
||||||
|
result.set(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new BoolStorage(result, missing, n, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static LongStorage longAdd(LongStorage storage, long shift) {
|
||||||
|
int n = storage.size();
|
||||||
|
long[] result = new long[n];
|
||||||
|
BitSet missing = new BitSet();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (!storage.isNa(i)) {
|
||||||
|
result[i] = storage.getItem(i) + shift;
|
||||||
|
} else {
|
||||||
|
missing.set(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new LongStorage(result, n, missing);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static LongStorage getYear(DateStorage storage) {
|
||||||
|
int n = storage.size();
|
||||||
|
long[] result = new long[n];
|
||||||
|
BitSet missing = new BitSet();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (!storage.isNa(i)) {
|
||||||
|
result[i] = storage.getItem(i).getYear();
|
||||||
|
} else {
|
||||||
|
missing.set(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new LongStorage(result, n, missing);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Storage<?> mapCallback(
|
||||||
|
Storage<?> storage, Function<Object, Object> fn, StorageType expectedType) {
|
||||||
|
int n = storage.size();
|
||||||
|
Builder builder =
|
||||||
|
expectedType == null ? new InferredBuilder(n) : Builder.getForType(expectedType, n);
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (!storage.isNa(i)) {
|
||||||
|
builder.append(fn.apply(storage.getItemBoxed(i)));
|
||||||
|
} else {
|
||||||
|
builder.appendNulls(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return builder.seal();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,46 @@
|
|||||||
|
package org.enso.exploratory_benchmark_helpers;
|
||||||
|
|
||||||
|
import java.time.LocalDate;
|
||||||
|
import org.enso.base.Text_Utils;
|
||||||
|
import org.enso.table.data.column.storage.StringStorage;
|
||||||
|
import org.enso.table.data.column.storage.datetime.DateStorage;
|
||||||
|
import org.enso.table.data.column.storage.numeric.LongStorage;
|
||||||
|
|
||||||
|
public class SimpleStorageAggregateHelpers {
|
||||||
|
public static long sumLongStorage(LongStorage storage) {
|
||||||
|
long sum = 0;
|
||||||
|
for (int i = 0; i < storage.size(); i++) {
|
||||||
|
if (!storage.isNa(i)) {
|
||||||
|
sum += storage.getItem(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static long sumMonthsOfDateStorage(DateStorage storage) {
|
||||||
|
long sum = 0;
|
||||||
|
for (LocalDate date : storage.getData()) {
|
||||||
|
if (date != null) {
|
||||||
|
sum += date.getMonthValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String longestText(StringStorage storage) {
|
||||||
|
long longest = -1;
|
||||||
|
String longestText = null;
|
||||||
|
int n = storage.size();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (!storage.isNa(i)) {
|
||||||
|
String text = storage.getItem(i);
|
||||||
|
long length = Text_Utils.grapheme_length(text);
|
||||||
|
if (length > longest) {
|
||||||
|
longest = length;
|
||||||
|
longestText = text;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return longestText;
|
||||||
|
}
|
||||||
|
}
|
3
test/Exploratory_Benchmarks/src/Main.enso
Normal file
3
test/Exploratory_Benchmarks/src/Main.enso
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
import project.Table.Main as Table_Main
|
||||||
|
|
||||||
|
main = Table_Main.spec
|
127
test/Exploratory_Benchmarks/src/Table/Column_Aggregate.enso
Normal file
127
test/Exploratory_Benchmarks/src/Table/Column_Aggregate.enso
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
from Standard.Table import all
|
||||||
|
|
||||||
|
from Standard.Test import Bench
|
||||||
|
|
||||||
|
import project.Table.Common_Setup.Common_Setup
|
||||||
|
import project.Table.Helpers
|
||||||
|
|
||||||
|
polyglot java import org.enso.exploratory_benchmark_helpers.SimpleStorageAggregateHelpers
|
||||||
|
|
||||||
|
## Computes the Longest text in the column - aggregate with no grouping.
|
||||||
|
This is of interest, because in contrast to all benchmarks above, it can generally be done in O(1) memory.
|
||||||
|
type Boxed_Total_Aggregate
|
||||||
|
Instance text_column
|
||||||
|
|
||||||
|
current_aggregate_implementation self =
|
||||||
|
self.text_column.to_table.aggregate [Aggregate_Column.Longest 0] . at 0 . at 0
|
||||||
|
|
||||||
|
java_loop self =
|
||||||
|
SimpleStorageAggregateHelpers.longestText self.text_column.java_column.getStorage
|
||||||
|
|
||||||
|
enso_aggregate_vector_proxy self =
|
||||||
|
n = self.text_column.length
|
||||||
|
vector_proxy = self.text_column.to_vector
|
||||||
|
(0.up_to n).fold Nothing acc-> ix->
|
||||||
|
item = vector_proxy.at ix
|
||||||
|
if acc.is_nothing then item else
|
||||||
|
if item.is_nothing then acc else
|
||||||
|
if item.length > acc.length then item else acc
|
||||||
|
|
||||||
|
enso_aggregate_storage_get_item self =
|
||||||
|
n = self.text_column.length
|
||||||
|
storage = self.text_column.java_column.getStorage
|
||||||
|
(0.up_to n).fold Nothing acc-> ix->
|
||||||
|
item = storage.getItemBoxed ix
|
||||||
|
if acc.is_nothing then item else
|
||||||
|
if item.is_nothing then acc else
|
||||||
|
if item.length > acc.length then item else acc
|
||||||
|
|
||||||
|
verify_correctness self =
|
||||||
|
Helpers.check_results [self.current_aggregate_implementation, self.java_loop, self.enso_aggregate_vector_proxy, self.enso_aggregate_storage_get_item]
|
||||||
|
|
||||||
|
## Computes Sum of integers.
|
||||||
|
We have to be careful with `n` because if we use too large values Enso will start using BigInts, while Java will overflow.
|
||||||
|
type Primitive_Total_Aggregate
|
||||||
|
Instance int_column
|
||||||
|
|
||||||
|
current_aggregate_implementation self =
|
||||||
|
self.int_column.to_table.aggregate [Aggregate_Column.Sum 0] . at 0 . at 0
|
||||||
|
|
||||||
|
java_loop self =
|
||||||
|
long_storage = self.int_column.java_column.getStorage
|
||||||
|
SimpleStorageAggregateHelpers.sumLongStorage long_storage
|
||||||
|
|
||||||
|
enso_aggregate_vector_proxy self =
|
||||||
|
vector_proxy = self.int_column.to_vector
|
||||||
|
vector_proxy.fold 0 acc-> item->
|
||||||
|
if item.is_nothing then acc else
|
||||||
|
acc + item
|
||||||
|
|
||||||
|
enso_aggregate_storage_get_item self =
|
||||||
|
n = self.int_column.length
|
||||||
|
storage = self.int_column.java_column.getStorage
|
||||||
|
(0.up_to n).fold 0 acc-> ix->
|
||||||
|
if storage.isNa ix then acc else
|
||||||
|
acc + storage.getItem ix
|
||||||
|
|
||||||
|
verify_correctness self =
|
||||||
|
Helpers.check_results [self.current_aggregate_implementation, self.java_loop, self.enso_aggregate_vector_proxy, self.enso_aggregate_storage_get_item]
|
||||||
|
|
||||||
|
## An alternative to Boxed_Total_Aggregate. Computing text length is complex due
|
||||||
|
to ICU complexity. This is a simpler one - we get the month of each value and
|
||||||
|
sum these.
|
||||||
|
type Boxed_Sum_Months
|
||||||
|
Instance date_column
|
||||||
|
|
||||||
|
java_loop self =
|
||||||
|
date_storage = self.date_column.java_column.getStorage
|
||||||
|
SimpleStorageAggregateHelpers.sumMonthsOfDateStorage date_storage
|
||||||
|
|
||||||
|
enso_aggregate_vector_proxy self =
|
||||||
|
vector_proxy = self.date_column.to_vector
|
||||||
|
vector_proxy.fold 0 acc-> item->
|
||||||
|
if item.is_nothing then acc else
|
||||||
|
acc + item.month
|
||||||
|
|
||||||
|
enso_aggregate_storage_get_item self =
|
||||||
|
n = self.date_column.length
|
||||||
|
storage = self.date_column.java_column.getStorage
|
||||||
|
(0.up_to n).fold 0 acc-> ix->
|
||||||
|
item = storage.getItemBoxed ix
|
||||||
|
if item.is_nothing then acc else
|
||||||
|
acc + item.month
|
||||||
|
|
||||||
|
verify_correctness self =
|
||||||
|
Helpers.check_results [self.java_loop, self.enso_aggregate_vector_proxy, self.enso_aggregate_storage_get_item]
|
||||||
|
|
||||||
|
main = spec (Common_Setup.Config)
|
||||||
|
|
||||||
|
spec setup =
|
||||||
|
t = setup.generate_input_table
|
||||||
|
t2 = setup.generate_input_table_date
|
||||||
|
|
||||||
|
iter_size = setup.iter_size
|
||||||
|
num_iterations = setup.num_iterations
|
||||||
|
|
||||||
|
# Using ints2 to get smaller values to avoid integer overflow.
|
||||||
|
primitive_total_aggregate = Primitive_Total_Aggregate.Instance (t.at "ints2")
|
||||||
|
primitive_total_aggregate.verify_correctness
|
||||||
|
# GC not needed here as this should be O(1) memory.
|
||||||
|
Bench.measure (primitive_total_aggregate.current_aggregate_implementation) "Primitive_Total_Aggregate.current_aggregate_implementation" iter_size num_iterations run_gc_between_iterations=False
|
||||||
|
Bench.measure (primitive_total_aggregate.java_loop) "Primitive_Total_Aggregate.java_loop" iter_size num_iterations run_gc_between_iterations=False
|
||||||
|
Bench.measure (primitive_total_aggregate.enso_aggregate_vector_proxy) "Primitive_Total_Aggregate.enso_aggregate_vector_proxy" iter_size num_iterations run_gc_between_iterations=False
|
||||||
|
Bench.measure (primitive_total_aggregate.enso_aggregate_storage_get_item) "Primitive_Total_Aggregate.enso_aggregate_storage_get_item" iter_size num_iterations run_gc_between_iterations=False
|
||||||
|
|
||||||
|
boxed_sum_months = Boxed_Sum_Months.Instance (t2.at "dates")
|
||||||
|
boxed_sum_months.verify_correctness
|
||||||
|
Bench.measure (boxed_sum_months.java_loop) "Boxed_Sum_Months.java_loop" iter_size num_iterations run_gc_between_iterations=False
|
||||||
|
Bench.measure (boxed_sum_months.enso_aggregate_vector_proxy) "Boxed_Sum_Months.enso_aggregate_vector_proxy" iter_size num_iterations run_gc_between_iterations=False
|
||||||
|
Bench.measure (boxed_sum_months.enso_aggregate_storage_get_item) "Boxed_Sum_Months.enso_aggregate_storage_get_item" iter_size num_iterations run_gc_between_iterations=False
|
||||||
|
|
||||||
|
boxed_total_aggregate = Boxed_Total_Aggregate.Instance (t.at "text")
|
||||||
|
boxed_total_aggregate.verify_correctness
|
||||||
|
Bench.measure (boxed_total_aggregate.current_aggregate_implementation) "Boxed_Total_Aggregate.current_aggregate_implementation" iter_size num_iterations run_gc_between_iterations=False
|
||||||
|
Bench.measure (boxed_total_aggregate.java_loop) "Boxed_Total_Aggregate.java_loop" iter_size num_iterations run_gc_between_iterations=False
|
||||||
|
Bench.measure (boxed_total_aggregate.enso_aggregate_vector_proxy) "Boxed_Total_Aggregate.enso_aggregate_vector_proxy" iter_size num_iterations run_gc_between_iterations=False
|
||||||
|
Bench.measure (boxed_total_aggregate.enso_aggregate_storage_get_item) "Boxed_Total_Aggregate.enso_aggregate_storage_get_item" iter_size num_iterations run_gc_between_iterations=False
|
104
test/Exploratory_Benchmarks/src/Table/Column_Bi_Map.enso
Normal file
104
test/Exploratory_Benchmarks/src/Table/Column_Bi_Map.enso
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
from Standard.Table import all
|
||||||
|
|
||||||
|
from Standard.Test import Bench
|
||||||
|
|
||||||
|
import project.Table.Common_Setup.Common_Setup
|
||||||
|
import project.Table.Helpers
|
||||||
|
|
||||||
|
polyglot java import org.enso.exploratory_benchmark_helpers.MapHelpers
|
||||||
|
polyglot java import org.enso.table.data.column.builder.NumericBuilder
|
||||||
|
polyglot java import org.enso.table.data.column.builder.StringBuilder
|
||||||
|
|
||||||
|
# Adding two String columns
|
||||||
|
type Boxed_Bi_Map_Test
|
||||||
|
Instance text_column_1 text_column_2
|
||||||
|
|
||||||
|
current_implementation self =
|
||||||
|
self.text_column_1 + self.text_column_2
|
||||||
|
|
||||||
|
java_map self =
|
||||||
|
Column.from_storage "result" <|
|
||||||
|
MapHelpers.stringConcatBimap self.text_column_1.java_column.getStorage self.text_column_2.java_column.getStorage
|
||||||
|
|
||||||
|
enso_map_as_vector self convert_polyglot_dates =
|
||||||
|
vector_proxy_1 = self.text_column_1.to_vector
|
||||||
|
vector_proxy_2 = self.text_column_2.to_vector
|
||||||
|
mapped = vector_proxy_1.zip vector_proxy_2 (+)
|
||||||
|
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
|
||||||
|
|
||||||
|
enso_map_with_builder self =
|
||||||
|
n = self.text_column_1.length
|
||||||
|
if self.text_column_2.length != n then Panic.throw "LENGTH MISMATCH" else
|
||||||
|
builder = StringBuilder.new n
|
||||||
|
storage_1 = self.text_column_1.java_column.getStorage
|
||||||
|
storage_2 = self.text_column_2.java_column.getStorage
|
||||||
|
0.up_to n . each i->
|
||||||
|
item_1 = storage_1.getItemBoxed i
|
||||||
|
item_2 = storage_2.getItemBoxed i
|
||||||
|
if item_1.is_nothing || item_2.is_nothing then builder.appendNulls 1 else
|
||||||
|
res = item_1 + item_2
|
||||||
|
builder.append res
|
||||||
|
Column.from_storage "result" builder.seal
|
||||||
|
|
||||||
|
verify_correctness self =
|
||||||
|
Helpers.check_results [self.current_implementation, self.java_map, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder]
|
||||||
|
|
||||||
|
# Adding two Long columns
|
||||||
|
type Primitive_Bi_Map_Test
|
||||||
|
Instance int_column_1 int_column_2
|
||||||
|
|
||||||
|
current_implementation self =
|
||||||
|
self.int_column_1 + self.int_column_2
|
||||||
|
|
||||||
|
java_map self =
|
||||||
|
Column.from_storage "result" <|
|
||||||
|
MapHelpers.longAddBimap self.int_column_1.java_column.getStorage self.int_column_2.java_column.getStorage
|
||||||
|
|
||||||
|
enso_map_as_vector self convert_polyglot_dates =
|
||||||
|
vector_proxy_1 = self.int_column_1.to_vector
|
||||||
|
vector_proxy_2 = self.int_column_2.to_vector
|
||||||
|
mapped = vector_proxy_1.zip vector_proxy_2 (+)
|
||||||
|
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
|
||||||
|
|
||||||
|
enso_map_with_builder self =
|
||||||
|
n = self.int_column_1.length
|
||||||
|
if self.int_column_2.length != n then Panic.throw "LENGTH MISMATCH" else
|
||||||
|
builder = NumericBuilder.createLongBuilder n
|
||||||
|
storage_1 = self.int_column_1.java_column.getStorage
|
||||||
|
storage_2 = self.int_column_2.java_column.getStorage
|
||||||
|
0.up_to n . each i->
|
||||||
|
if storage_1.isNa i || storage_2.isNa i then builder.appendNulls 1 else
|
||||||
|
item_1 = storage_1.getItem i
|
||||||
|
item_2 = storage_2.getItem i
|
||||||
|
res = item_1 + item_2
|
||||||
|
builder.appendLong res
|
||||||
|
Column.from_storage "result" builder.seal
|
||||||
|
|
||||||
|
verify_correctness self =
|
||||||
|
Helpers.check_results [self.current_implementation, self.java_map, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder]
|
||||||
|
|
||||||
|
main = spec Common_Setup.Config
|
||||||
|
|
||||||
|
spec setup =
|
||||||
|
t = setup.generate_input_table
|
||||||
|
|
||||||
|
iter_size = setup.iter_size
|
||||||
|
num_iterations = setup.num_iterations
|
||||||
|
should_run_gc = setup.force_gc
|
||||||
|
|
||||||
|
primitive_bimap = Primitive_Bi_Map_Test.Instance (t.at "ints") (t.at "ints2")
|
||||||
|
primitive_bimap.verify_correctness
|
||||||
|
Bench.measure (primitive_bimap.current_implementation) "Primitive_Bi_Map_Test.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_bimap.java_map) "Primitive_Bi_Map_Test.java_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_bimap.enso_map_as_vector convert_polyglot_dates=True) "Primitive_Bi_Map_Test.enso_map_as_vector+date_conversion" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_bimap.enso_map_as_vector convert_polyglot_dates=False) "Primitive_Bi_Map_Test.enso_map_as_vector" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_bimap.enso_map_with_builder) "Primitive_Bi_Map_Test.enso_map_with_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
|
||||||
|
boxed_bimap = Boxed_Bi_Map_Test.Instance (t.at "text") (t.at "text2")
|
||||||
|
boxed_bimap.verify_correctness
|
||||||
|
Bench.measure (boxed_bimap.current_implementation) "Boxed_Bi_Map_Test.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_bimap.java_map) "Boxed_Bi_Map_Test.java_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_bimap.enso_map_as_vector convert_polyglot_dates=True) "Boxed_Bi_Map_Test.enso_map_as_vector+date_conversion" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_bimap.enso_map_as_vector convert_polyglot_dates=False) "Boxed_Bi_Map_Test.enso_map_as_vector" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_bimap.enso_map_with_builder) "Boxed_Bi_Map_Test.enso_map_with_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
111
test/Exploratory_Benchmarks/src/Table/Column_Map.enso
Normal file
111
test/Exploratory_Benchmarks/src/Table/Column_Map.enso
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
from Standard.Table import all
|
||||||
|
|
||||||
|
from Standard.Test import Bench
|
||||||
|
|
||||||
|
import project.Table.Common_Setup.Common_Setup
|
||||||
|
import project.Table.Helpers
|
||||||
|
|
||||||
|
polyglot java import org.enso.exploratory_benchmark_helpers.MapHelpers
|
||||||
|
polyglot java import org.enso.table.data.column.builder.BoolBuilder
|
||||||
|
polyglot java import org.enso.table.data.column.builder.NumericBuilder
|
||||||
|
|
||||||
|
## This tests an operation on a boxed value (e.g. ends_with on a String).
|
||||||
|
It is the basic benchmark for comparing the performance between the vectorized Java op and approaches relying on Enso.
|
||||||
|
We would like to see the Enso approach to have comparable performance to the Java one.
|
||||||
|
type Boxed_Map_Test
|
||||||
|
Instance text_column (suffix : Text)
|
||||||
|
|
||||||
|
current_implementation self =
|
||||||
|
self.text_column.ends_with self.suffix
|
||||||
|
|
||||||
|
java_map self =
|
||||||
|
Column.from_storage "result" <|
|
||||||
|
MapHelpers.textEndsWith self.text_column.java_column.getStorage self.suffix
|
||||||
|
|
||||||
|
enso_map_as_vector self convert_polyglot_dates =
|
||||||
|
suffix = self.suffix
|
||||||
|
vector_proxy = self.text_column.to_vector
|
||||||
|
mapped = vector_proxy.map x-> x.ends_with suffix
|
||||||
|
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
|
||||||
|
|
||||||
|
enso_map_with_builder self =
|
||||||
|
suffix = self.suffix
|
||||||
|
n = self.text_column.length
|
||||||
|
builder = BoolBuilder.new n
|
||||||
|
storage = self.text_column.java_column.getStorage
|
||||||
|
0.up_to n . each i->
|
||||||
|
item = storage.getItemBoxed i
|
||||||
|
case item of
|
||||||
|
Nothing ->
|
||||||
|
builder.appendNulls 1
|
||||||
|
_ ->
|
||||||
|
b = item.ends_with suffix
|
||||||
|
builder.appendBoolean b
|
||||||
|
Column.from_storage "result" builder.seal
|
||||||
|
|
||||||
|
verify_correctness self =
|
||||||
|
Helpers.check_results [self.current_implementation, self.java_map, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder]
|
||||||
|
|
||||||
|
## This tests an operation on a primitive value, that in Java is stored as unboxed (e.g. + on LongStorage).
|
||||||
|
This is a more demanding benchmark, because the Java side has an advantage of easily using the unboxed values everywhere.
|
||||||
|
Here it may be harder to achieve comparable performance, but we want to know what is the difference, and ideally we want to be getting closer here as well.
|
||||||
|
type Primitive_Map_Test
|
||||||
|
Instance int_column (shift : Integer)
|
||||||
|
|
||||||
|
current_implementation self =
|
||||||
|
self.int_column + self.shift
|
||||||
|
|
||||||
|
java_map self =
|
||||||
|
Column.from_storage "result" <|
|
||||||
|
MapHelpers.longAdd self.int_column.java_column.getStorage self.shift
|
||||||
|
|
||||||
|
enso_map_as_vector self convert_polyglot_dates =
|
||||||
|
shift = self.shift
|
||||||
|
vector_proxy = self.int_column.to_vector
|
||||||
|
mapped = vector_proxy.map x-> x + shift
|
||||||
|
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
|
||||||
|
|
||||||
|
enso_map_with_builder self =
|
||||||
|
shift = self.shift
|
||||||
|
n = self.int_column.length
|
||||||
|
builder = NumericBuilder.createLongBuilder n
|
||||||
|
storage = self.int_column.java_column.getStorage
|
||||||
|
0.up_to n . each i->
|
||||||
|
case storage.isNa i of
|
||||||
|
True ->
|
||||||
|
builder.appendNulls 1
|
||||||
|
False ->
|
||||||
|
item = storage.getItem i
|
||||||
|
x = item + shift
|
||||||
|
builder.appendLong x
|
||||||
|
Column.from_storage "result" builder.seal
|
||||||
|
|
||||||
|
verify_correctness self =
|
||||||
|
Helpers.check_results [self.current_implementation, self.java_map, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder]
|
||||||
|
|
||||||
|
main = spec Common_Setup.Config
|
||||||
|
|
||||||
|
spec setup =
|
||||||
|
t = setup.generate_input_table
|
||||||
|
|
||||||
|
iter_size = setup.iter_size
|
||||||
|
num_iterations = setup.num_iterations
|
||||||
|
should_run_gc = setup.force_gc
|
||||||
|
|
||||||
|
primitive_map = Primitive_Map_Test.Instance (t.at "ints") 42
|
||||||
|
primitive_map.verify_correctness
|
||||||
|
Bench.measure (primitive_map.current_implementation) "Primitive_Map_Test.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_map.java_map) "Primitive_Map_Test.java_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_map.enso_map_as_vector convert_polyglot_dates=True) "Primitive_Map_Test.enso_map_as_vector+convert_dates" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_map.enso_map_as_vector convert_polyglot_dates=False) "Primitive_Map_Test.enso_map_as_vector" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_map.enso_map_with_builder) "Primitive_Map_Test.enso_map_with_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
|
||||||
|
# This one seems slowest so I put it at the end.
|
||||||
|
boxed_map = Boxed_Map_Test.Instance (t.at "text") "5"
|
||||||
|
boxed_map.verify_correctness
|
||||||
|
Bench.measure (boxed_map.current_implementation) "Boxed_Map_Test.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_map.java_map) "Boxed_Map_Test.java_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_map.enso_map_as_vector convert_polyglot_dates=True) "Boxed_Map_Test.enso_map_as_vector+convert_dates" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_map.enso_map_as_vector convert_polyglot_dates=False) "Boxed_Map_Test.enso_map_as_vector" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_map.enso_map_with_builder) "Boxed_Map_Test.enso_map_with_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
77
test/Exploratory_Benchmarks/src/Table/Column_Map_2.enso
Normal file
77
test/Exploratory_Benchmarks/src/Table/Column_Map_2.enso
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
from Standard.Table import all
|
||||||
|
|
||||||
|
from Standard.Test import Bench
|
||||||
|
|
||||||
|
import project.Table.Common_Setup.Common_Setup
|
||||||
|
import project.Table.Helpers
|
||||||
|
|
||||||
|
polyglot java import org.enso.exploratory_benchmark_helpers.MapHelpers
|
||||||
|
polyglot java import org.enso.table.data.column.builder.NumericBuilder
|
||||||
|
|
||||||
|
## A second variant of Boxed_Map_Test.
|
||||||
|
The first one relied on `ends_with` which is actually a costly operation due to reliance on ICU and correct grapheme cluster handling.
|
||||||
|
So as a second comparison we will do `Date.year` instead which is much simpler.
|
||||||
|
type Boxed_Map_Test_2
|
||||||
|
Instance date_column
|
||||||
|
|
||||||
|
current_implementation self =
|
||||||
|
self.date_column.year
|
||||||
|
|
||||||
|
java_map self =
|
||||||
|
Column.from_storage "result" <|
|
||||||
|
MapHelpers.getYear self.date_column.java_column.getStorage
|
||||||
|
|
||||||
|
## We can still opt-out of `convert_polyglot_dates`, because this is applied
|
||||||
|
at output which is Integer. If our output was another Date, we could not
|
||||||
|
opt-out to remain correct.
|
||||||
|
enso_map_as_vector self convert_polyglot_dates =
|
||||||
|
vector_proxy = self.date_column.to_vector
|
||||||
|
mapped = vector_proxy.map x-> x.year
|
||||||
|
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
|
||||||
|
|
||||||
|
enso_map_with_builder_append_long self =
|
||||||
|
n = self.date_column.length
|
||||||
|
builder = NumericBuilder.createLongBuilder n
|
||||||
|
storage = self.date_column.java_column.getStorage
|
||||||
|
0.up_to n . each i->
|
||||||
|
case storage.getItemBoxed i of
|
||||||
|
Nothing ->
|
||||||
|
builder.appendNulls 1
|
||||||
|
date ->
|
||||||
|
builder.appendLong date.year
|
||||||
|
Column.from_storage "result" builder.seal
|
||||||
|
|
||||||
|
## This is the same as above, but uses `appendNoGrow` instead of
|
||||||
|
`appendLong`. I suspect it could be more efficient, so I'm testing it.
|
||||||
|
enso_map_with_builder_append_object self =
|
||||||
|
n = self.date_column.length
|
||||||
|
builder = NumericBuilder.createLongBuilder n
|
||||||
|
storage = self.date_column.java_column.getStorage
|
||||||
|
0.up_to n . each i->
|
||||||
|
case storage.getItemBoxed i of
|
||||||
|
Nothing ->
|
||||||
|
builder.appendNulls 1
|
||||||
|
date ->
|
||||||
|
builder.appendNoGrow date.year
|
||||||
|
Column.from_storage "result" builder.seal
|
||||||
|
|
||||||
|
verify_correctness self =
|
||||||
|
Helpers.check_results [self.current_implementation, self.java_map, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder_append_long, self.enso_map_with_builder_append_object]
|
||||||
|
|
||||||
|
main = spec (Common_Setup.Config)
|
||||||
|
|
||||||
|
spec setup =
|
||||||
|
t = setup.generate_input_table_date
|
||||||
|
|
||||||
|
iter_size = setup.iter_size
|
||||||
|
num_iterations = setup.num_iterations
|
||||||
|
should_run_gc = setup.force_gc
|
||||||
|
|
||||||
|
boxed_map = Boxed_Map_Test_2.Instance (t.at "dates")
|
||||||
|
Bench.measure (boxed_map.current_implementation) "Boxed_Map_Test_2.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_map.java_map) "Boxed_Map_Test_2.java_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_map.enso_map_as_vector convert_polyglot_dates=True) "Boxed_Map_Test_2.enso_map_as_vector+convert_dates" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_map.enso_map_as_vector convert_polyglot_dates=False) "Boxed_Map_Test_2.enso_map_as_vector" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_map.enso_map_with_builder_append_long) "Boxed_Map_Test_2.enso_map_with_builder_append_long" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_map.enso_map_with_builder_append_object) "Boxed_Map_Test_2.enso_map_with_builder_append_object" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
19
test/Exploratory_Benchmarks/src/Table/Common_Setup.enso
Normal file
19
test/Exploratory_Benchmarks/src/Table/Common_Setup.enso
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
from Standard.Table import all
|
||||||
|
|
||||||
|
type Common_Setup
|
||||||
|
Config (n : Integer = 10^6) (iter_size : Integer = 1) (num_iterations : Integer = 100) (force_gc : Boolean = False)
|
||||||
|
|
||||||
|
generate_input_table : Table
|
||||||
|
generate_input_table self =
|
||||||
|
n = self.n
|
||||||
|
v1 = (0.up_to n).to_vector
|
||||||
|
v2 = (0.up_to n).to_vector.reverse.map (x-> x % 20)
|
||||||
|
Table.new [["ints", v1], ["text", v1.map .to_text], ["ints2", v2], ["text2", v2.map .to_text]]
|
||||||
|
|
||||||
|
generate_input_table_date : Table
|
||||||
|
generate_input_table_date self =
|
||||||
|
n = self.n
|
||||||
|
v1 = (0.up_to n).map i->
|
||||||
|
(Date.new 1999 1 1) . date_add i Date_Period.Day
|
||||||
|
Table.new [["dates", v1]]
|
140
test/Exploratory_Benchmarks/src/Table/Enso_Callback.enso
Normal file
140
test/Exploratory_Benchmarks/src/Table/Enso_Callback.enso
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
|
||||||
|
from Standard.Table import all
|
||||||
|
import Standard.Table.Data.Type.Storage
|
||||||
|
|
||||||
|
from Standard.Test import Bench
|
||||||
|
|
||||||
|
import project.Table.Common_Setup.Common_Setup
|
||||||
|
import project.Table.Helpers
|
||||||
|
|
||||||
|
polyglot java import org.enso.exploratory_benchmark_helpers.MapHelpers
|
||||||
|
polyglot java import org.enso.table.data.column.builder.StringBuilder
|
||||||
|
polyglot java import org.enso.table.data.column.builder.NumericBuilder
|
||||||
|
polyglot java import org.enso.table.data.column.operation.map.MapOperationProblemBuilder
|
||||||
|
polyglot java import org.enso.table.data.table.Column as Java_Column
|
||||||
|
|
||||||
|
## This tests an operation that executes an Enso function on each element of a column.
|
||||||
|
It is meant to compare the cost of calling-back into Enso from Java vs staying in Enso.
|
||||||
|
type Boxed_Enso_Callback_Test
|
||||||
|
Instance text_column (fn : Text -> Text)
|
||||||
|
|
||||||
|
current_implementation self =
|
||||||
|
self.text_column.map self.fn
|
||||||
|
|
||||||
|
java_roundtrip self =
|
||||||
|
expected_type = Storage.from_value_type_strict Value_Type.Char
|
||||||
|
Column.from_storage "result" <|
|
||||||
|
MapHelpers.mapCallback self.text_column.java_column.getStorage self.fn expected_type
|
||||||
|
|
||||||
|
enso_map_as_vector self convert_polyglot_dates =
|
||||||
|
vector_proxy = self.text_column.to_vector
|
||||||
|
mapped = vector_proxy.map self.fn
|
||||||
|
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
|
||||||
|
|
||||||
|
enso_map_with_builder self =
|
||||||
|
n = self.text_column.length
|
||||||
|
fn = self.fn
|
||||||
|
builder = StringBuilder.new n
|
||||||
|
storage = self.text_column.java_column.getStorage
|
||||||
|
0.up_to n . each i->
|
||||||
|
case storage.getItemBoxed i of
|
||||||
|
Nothing ->
|
||||||
|
builder.appendNulls 1
|
||||||
|
item ->
|
||||||
|
builder.append (fn item)
|
||||||
|
Column.from_storage "result" builder.seal
|
||||||
|
|
||||||
|
verify_correctness self =
|
||||||
|
Helpers.check_results [self.current_implementation, self.java_roundtrip, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder]
|
||||||
|
|
||||||
|
## This variant checks how a primitive type column will behave.
|
||||||
|
type Primitive_Enso_Callback_Test
|
||||||
|
Instance int_column (shift : Integer)
|
||||||
|
|
||||||
|
fn self =
|
||||||
|
shift = self.shift
|
||||||
|
x-> x + shift
|
||||||
|
|
||||||
|
current_implementation_as_map self =
|
||||||
|
self.int_column.map self.fn
|
||||||
|
|
||||||
|
java_vectorized self =
|
||||||
|
Column.from_storage "result" <|
|
||||||
|
MapHelpers.longAdd self.int_column.java_column.getStorage self.shift
|
||||||
|
|
||||||
|
java_roundtrip self =
|
||||||
|
expected_type = Storage.from_value_type_strict Value_Type.Integer
|
||||||
|
Column.from_storage "result" <|
|
||||||
|
MapHelpers.mapCallback self.int_column.java_column.getStorage self.fn expected_type
|
||||||
|
|
||||||
|
enso_map_as_vector_inferred_builder self convert_polyglot_dates =
|
||||||
|
vector_proxy = self.int_column.to_vector
|
||||||
|
mapped = vector_proxy.map self.fn
|
||||||
|
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
|
||||||
|
|
||||||
|
enso_map_as_vector_long_builder self =
|
||||||
|
vector_proxy = self.int_column.to_vector
|
||||||
|
mapped = vector_proxy.map self.fn
|
||||||
|
# No expected storage will use inferred builder.
|
||||||
|
expected_storage_type = Storage.from_value_type_strict Value_Type.Integer
|
||||||
|
Column.Value (Java_Column.fromItemsNoDateConversion "result" mapped expected_storage_type)
|
||||||
|
|
||||||
|
enso_map_with_builder_2_calls_unboxed self =
|
||||||
|
n = self.int_column.length
|
||||||
|
fn = self.fn
|
||||||
|
builder = NumericBuilder.createLongBuilder n
|
||||||
|
storage = self.int_column.java_column.getStorage
|
||||||
|
0.up_to n . each i->
|
||||||
|
case storage.isNa i of
|
||||||
|
True ->
|
||||||
|
builder.appendNulls 1
|
||||||
|
False ->
|
||||||
|
item = storage.getItem i
|
||||||
|
builder.append (fn item)
|
||||||
|
Column.from_storage "result" builder.seal
|
||||||
|
|
||||||
|
enso_map_with_builder_1_call_boxed self =
|
||||||
|
n = self.int_column.length
|
||||||
|
fn = self.fn
|
||||||
|
builder = NumericBuilder.createLongBuilder n
|
||||||
|
storage = self.int_column.java_column.getStorage
|
||||||
|
0.up_to n . each i->
|
||||||
|
case storage.getItemBoxed i of
|
||||||
|
Nothing ->
|
||||||
|
builder.appendNulls 1
|
||||||
|
item ->
|
||||||
|
builder.append (fn item)
|
||||||
|
Column.from_storage "result" builder.seal
|
||||||
|
|
||||||
|
verify_correctness self =
|
||||||
|
Helpers.check_results [self.current_implementation_as_map, self.java_vectorized, self.java_roundtrip, self.enso_map_as_vector_inferred_builder convert_polyglot_dates=True, self.enso_map_as_vector_inferred_builder convert_polyglot_dates=False, self.enso_map_as_vector_long_builder, self.enso_map_with_builder_2_calls_unboxed, self.enso_map_with_builder_1_call_boxed]
|
||||||
|
|
||||||
|
main = spec (Common_Setup.Config)
|
||||||
|
|
||||||
|
spec setup =
|
||||||
|
t = setup.generate_input_table
|
||||||
|
|
||||||
|
iter_size = setup.iter_size
|
||||||
|
num_iterations = setup.num_iterations
|
||||||
|
should_run_gc = setup.force_gc
|
||||||
|
|
||||||
|
primitive_callback_test = Primitive_Enso_Callback_Test.Instance (t.at "ints") 42
|
||||||
|
primitive_callback_test.verify_correctness
|
||||||
|
Bench.measure (primitive_callback_test.current_implementation_as_map) "Primitive_Enso_Callback_Test.current_implementation_as_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_callback_test.java_vectorized) "Primitive_Enso_Callback_Test.java_vectorized" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_callback_test.java_roundtrip) "Primitive_Enso_Callback_Test.java_roundtrip" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_callback_test.enso_map_as_vector_inferred_builder convert_polyglot_dates=False) "Primitive_Enso_Callback_Test.enso_map_as_vector_inferred_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_callback_test.enso_map_as_vector_inferred_builder convert_polyglot_dates=True) "Primitive_Enso_Callback_Test.enso_map_as_vector_inferred_builder_and_date_conversions" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_callback_test.enso_map_as_vector_long_builder) "Primitive_Enso_Callback_Test.enso_map_as_vector_long_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_callback_test.enso_map_with_builder_2_calls_unboxed) "Primitive_Enso_Callback_Test.enso_map_with_builder_2_calls_unboxed" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (primitive_callback_test.enso_map_with_builder_1_call_boxed) "Primitive_Enso_Callback_Test.enso_map_with_builder_1_call_boxed" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
|
||||||
|
fn x = "|" + x + "|"
|
||||||
|
boxed_callback_test = Boxed_Enso_Callback_Test.Instance (t.at "text") fn
|
||||||
|
boxed_callback_test.verify_correctness
|
||||||
|
Bench.measure (boxed_callback_test.current_implementation) "Boxed_Enso_Callback_Test.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_callback_test.java_roundtrip) "Boxed_Enso_Callback_Test.java_roundtrip" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_callback_test.enso_map_as_vector convert_polyglot_dates=False) "Boxed_Enso_Callback_Test.enso_map_as_vector_without_date_conversion" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_callback_test.enso_map_as_vector convert_polyglot_dates=True) "Boxed_Enso_Callback_Test.enso_map_as_vector_with_date_conversion" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
||||||
|
Bench.measure (boxed_callback_test.enso_map_with_builder) "Boxed_Enso_Callback_Test.enso_map_with_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc
|
34
test/Exploratory_Benchmarks/src/Table/Helpers.enso
Normal file
34
test/Exploratory_Benchmarks/src/Table/Helpers.enso
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
|
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||||
|
|
||||||
|
from Standard.Table import all
|
||||||
|
|
||||||
|
polyglot java import org.enso.table.data.table.Column as Java_Column
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
A helper implementation essentially mimicking Column.from_vector, but
|
||||||
|
allowing to control whether polyglot conversions are performed.
|
||||||
|
Used to make tests stable regardless of changes to Column.from_vector.
|
||||||
|
column_from_vector : Text -> Vector -> Boolean -> Column
|
||||||
|
column_from_vector name items convert_polyglot_dates =
|
||||||
|
expected_storage_type = Nothing
|
||||||
|
Illegal_Argument.handle_java_exception <|
|
||||||
|
java_column = case convert_polyglot_dates of
|
||||||
|
True ->
|
||||||
|
Java_Column.fromItems name items expected_storage_type
|
||||||
|
False ->
|
||||||
|
Java_Column.fromItemsNoDateConversion name items expected_storage_type
|
||||||
|
Column.Value java_column
|
||||||
|
|
||||||
|
check_results results =
|
||||||
|
mapped = results.map x-> case x of
|
||||||
|
_ : Column -> x.to_vector
|
||||||
|
_ -> x
|
||||||
|
reference = mapped.first
|
||||||
|
mapped.each_with_index ix-> result->
|
||||||
|
if result != reference then
|
||||||
|
IO.println "Mismatched results: "
|
||||||
|
IO.println "Reference: "+reference.to_display_text
|
||||||
|
IO.println "Result (ix="+ix.to_text+"): "+result.to_display_text
|
||||||
|
Panic.throw (Illegal_State.Error "The benchmark result ix="+ix.to_text+" does not match the 0th one.")
|
19
test/Exploratory_Benchmarks/src/Table/Main.enso
Normal file
19
test/Exploratory_Benchmarks/src/Table/Main.enso
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
## NOTE
|
||||||
|
This file is _not_ automatically run when the benchmarks are run.
|
||||||
|
It can be used to run all performance tests in a single run.
|
||||||
|
import project.Table.Column_Aggregate
|
||||||
|
import project.Table.Column_Bi_Map
|
||||||
|
import project.Table.Column_Map
|
||||||
|
import project.Table.Column_Map_2
|
||||||
|
import project.Table.Common_Setup.Common_Setup
|
||||||
|
import project.Table.Enso_Callback
|
||||||
|
|
||||||
|
spec =
|
||||||
|
setup = (Common_Setup.Config)
|
||||||
|
Column_Map.spec setup
|
||||||
|
Column_Map_2.spec setup
|
||||||
|
Column_Bi_Map.spec setup
|
||||||
|
Column_Aggregate.spec setup
|
||||||
|
Enso_Callback.spec setup
|
||||||
|
|
||||||
|
main = spec
|
5
tools/performance/benchmark-analysis/README.md
Normal file
5
tools/performance/benchmark-analysis/README.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Analysing benchmarks
|
||||||
|
|
||||||
|
This workflow is prepared mostly to analyse the output of benchmarks from
|
||||||
|
`test/Exploratory_Benchmarks`. See `test/Exploratory_Benchmarks/README.md` for
|
||||||
|
more information.
|
File diff suppressed because it is too large
Load Diff
8
tools/performance/benchmark-analysis/package.yaml
Normal file
8
tools/performance/benchmark-analysis/package.yaml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
name: Vectorized_Benchmarks
|
||||||
|
namespace: local
|
||||||
|
version: 0.0.1
|
||||||
|
license: ""
|
||||||
|
authors: []
|
||||||
|
maintainers: []
|
||||||
|
edition: 0.0.0-dev
|
||||||
|
prefer-local-libraries: true
|
59
tools/performance/benchmark-analysis/src/Main.enso
Normal file
59
tools/performance/benchmark-analysis/src/Main.enso
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user