Add benchmarks comparing performance of Table operations 'vectorized' in Java vs performed in Enso (#7270)

The added benchmark is a basis for a performance investigation.

We compare the performance of the same operation run in Java vs Enso to see what is the overhead and try to get the Enso operations closer to the pure-Java performance.
This commit is contained in:
Radosław Waśko 2023-07-21 19:25:02 +02:00 committed by GitHub
parent 3e3b823620
commit 56635c9a88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 6031 additions and 15 deletions

View File

@ -303,7 +303,8 @@ lazy val enso = (project in file("."))
`std-table`,
`std-aws`,
`simple-httpbin`,
`enso-test-java-helpers`
`enso-test-java-helpers`,
`exploratory-benchmark-java-helpers`
)
.settings(Global / concurrentRestrictions += Tags.exclusive(Exclusive))
.settings(
@ -1359,6 +1360,7 @@ lazy val runtime = (project in file("engine/runtime"))
(Runtime / compile) := (Runtime / compile)
.dependsOn(`std-base` / Compile / packageBin)
.dependsOn(`enso-test-java-helpers` / Compile / packageBin)
.dependsOn(`exploratory-benchmark-java-helpers` / Compile / packageBin)
.dependsOn(`std-image` / Compile / packageBin)
.dependsOn(`std-database` / Compile / packageBin)
.dependsOn(`std-google-api` / Compile / packageBin)
@ -2017,6 +2019,26 @@ lazy val `enso-test-java-helpers` = project
.dependsOn(`std-base` % "provided")
.dependsOn(`std-table` % "provided")
lazy val `exploratory-benchmark-java-helpers` = project
.in(
file(
"test/Exploratory_Benchmarks/polyglot-sources/exploratory-benchmark-java-helpers"
)
)
.settings(
frgaalJavaCompilerSetting,
autoScalaLibrary := false,
Compile / packageBin / artifactPath :=
file(
"test/Exploratory_Benchmarks/polyglot/java/exploratory-benchmark-java-helpers.jar"
),
libraryDependencies ++= Seq(
"org.graalvm.sdk" % "graal-sdk" % graalMavenPackagesVersion % "provided"
)
)
.dependsOn(`std-base` % "provided")
.dependsOn(`std-table` % "provided")
lazy val `std-table` = project
.in(file("std-bits") / "table")
.enablePlugins(Antlr4Plugin)
@ -2340,11 +2362,13 @@ pkgStdLibInternal := Def.inputTask {
(`std-table` / Compile / packageBin).value
case "TestHelpers" =>
(`enso-test-java-helpers` / Compile / packageBin).value
(`exploratory-benchmark-java-helpers` / Compile / packageBin).value
case "AWS" =>
(`std-aws` / Compile / packageBin).value
case _ if buildAllCmd =>
(`std-base` / Compile / packageBin).value
(`enso-test-java-helpers` / Compile / packageBin).value
(`exploratory-benchmark-java-helpers` / Compile / packageBin).value
(`std-table` / Compile / packageBin).value
(`std-database` / Compile / packageBin).value
(`std-image` / Compile / packageBin).value

View File

@ -47,8 +47,9 @@ type Column
Column.from_vector "My Column" [1, 2, 3, 4, 5]
from_vector : Text -> Vector -> Column
from_vector name items =
expected_storage_type = Nothing
Illegal_Argument.handle_java_exception <|
Column.Value (Java_Column.fromItems name items)
Column.Value (Java_Column.fromItems name items expected_storage_type)
## PRIVATE
Creates a new column given a name and an internal Java storage.

View File

@ -70,6 +70,15 @@ type Bench
- label: A name for the measurement.
- iter_size: The number of runs per iteration.
- num_iters: The number of iterations per measurement.
- run_gc_between_iterations: Whether to try running the garbage collector
between iterations. Defaults to False. This is helpful when testing
memory intensive operations, to ensure that GC runs between iterations
and not _during_ iterations. The time taken to run the requested
garbage collection will not be counted into the iteration time, however
there is no guarantee that the JVM will actually accept the GC hint and
it is still possible the JVM may run GC during an iteration. But
setting this option to True should make it less likely for GC to
interrupt measurements.
> Example
Measure a computation called "foo" with an iteration size of 2 and a number
@ -80,8 +89,8 @@ type Bench
example_measure =
Bench.measure Examples.get_boolean "foo" iter_size=2 num_iters=1
measure : Any -> Text -> Integer -> Integer -> Nothing
measure ~act label iter_size num_iters =
measure : Any -> Text -> Integer -> Integer -> Boolean -> Nothing
measure ~act label iter_size num_iters run_gc_between_iterations=False =
dry_run = Environment.get "ENSO_BENCHMARK_TEST_DRY_RUN" "False" == "True"
result = Ref.new 0.0
single_call = _ ->
@ -90,6 +99,8 @@ type Bench
x2 = System.nano_time
x2 - x1
iteration = it_size -> it_num ->
if run_gc_between_iterations then
Runtime.gc
act_it_num = num_iters - it_num
res = times it_size single_call
avg = avg_list res

View File

@ -2,9 +2,11 @@ package org.enso.table.data.table;
import org.enso.base.Text_Utils;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.InferredBuilder;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.index.DefaultIndex;
import org.enso.table.data.index.Index;
import org.enso.table.data.mask.OrderMask;
@ -116,18 +118,13 @@ public class Column {
return new Column(name, storage);
}
/**
* Creates a new column with given name and elements.
*
* @param name the name to use
* @param items the items contained in the column
* @return a column with given name and items
*/
public static Column fromItems(String name, List<Value> items) {
/** Creates a column from an Enso array, ensuring Enso dates are converted to Java dates. */
public static Column fromItems(String name, List<Value> items, StorageType expectedType) throws ClassCastException {
Context context = Context.getCurrent();
InferredBuilder builder = new InferredBuilder(items.size());
int n = items.size();
Builder builder = expectedType == null ? new InferredBuilder(n) : Builder.getForType(expectedType, n);
// ToDo: This a workaround for an issue with polyglot layer. #5590 is related.
// to revert replace with: for (Value item : items) {
for (Object item : items) {
if (item instanceof Value v) {
Object converted = Polyglot_Utils.convertPolyglotValue(v);
@ -142,6 +139,20 @@ public class Column {
return new Column(name, storage);
}
/** Creates a column from an Enso array. No polyglot conversion happens. This is unsafe */
public static Column fromItemsNoDateConversion(String name, List<Object> items, StorageType expectedType) throws ClassCastException {
Context context = Context.getCurrent();
int n = items.size();
Builder builder = expectedType == null ? new InferredBuilder(n) : Builder.getForType(expectedType, n);
for (Object item : items) {
builder.appendNoGrow(item);
context.safepoint();
}
var storage = builder.seal();
return new Column(name, storage);
}
/**
* Creates a new column with given name and elements.
*
@ -155,7 +166,7 @@ public class Column {
}
if (repeat == 1) {
return fromItems(name, items);
return fromItems(name, items, null);
}
Context context = Context.getCurrent();

View File

@ -0,0 +1,52 @@
# Exploring Table operation performance
These benchmarks are used to compare various approaches to computing operations
on Table columns, to find out what best practices should we use for these and
find venues for optimization of the language and Table implementation.
These benchmarks are not meant to be used for tracking performance of the
current implementation itself. That is supposed to be done by another project -
`Table_Benchmarks`.
## Structure
Currently, the benchmarks are split into a few files, each exploring some
separate topic, like mapping a single column, combining two columns with some
operation, or computing an aggregate operation over a column. In each file,
there may be a few Enso types, each representing a separate benchmark. Usually,
we have two benchmarks for each operation type - one dealing with a primitive
value type like integers (`long` in the Java side) and another dealing with a
reference type like `String` or `Date`. We expect the performance
characteristics between these may differ, e.g. because Java allows to use `long`
without boxing, so we compare them separately.
Each Enso type for a given benchmark contains multiple methods which represent
various 'approaches' to computing the same operation.
Each benchmark run has a name that consists of the type it defines it, a dot and
the method representing the particular approach, e.g.
`Boxed_Map_Test.enso_map_as_vector`.
## Running
The runner is very simple. If any options are to be customized, the Enso file
itself needs to be modified. One can run the whole project to run all the
benchmarks, or run only a specific file.
## Analysis
The output of the benchmarks should be saved to a file. Then that file can be
loaded using the Enso workflow in `tools/performance/benchmark-analysis`.
The workflow is tuned to analysing these comparative benchmarks.
At the top, one can select which file is to be analyzed. Below there is a
dropdown allowing to select one particular benchmark (represented by the type,
e.g. `Boxed_Map_Test`). With that selected, one can display a scatter plot
visualization comparing various approaches of that one given benchmark. On the
plot we can see runtimes of subsequent iterations. Later, we drop the first 40
iterations (the number can easily be customized in the workflow) to ensure
sufficient warm-up for each benchmark. Then a table is displayed computing the
average runtime of each approach and how they compare relative to each other - a
dropdown allows to select one benchmark that will be used as a reference point
(100%) for the average runtime comparison.

View File

@ -0,0 +1,6 @@
name: Exploratory_Benchmarks
enso-version: default
version: 0.0.1
license: MIT
author: enso-dev@enso.org
maintainer: enso-dev@enso.org

View File

@ -0,0 +1,109 @@
package org.enso.exploratory_benchmark_helpers;
import java.util.BitSet;
import java.util.function.Function;
import org.enso.base.Text_Utils;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.InferredBuilder;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.StringStorage;
import org.enso.table.data.column.storage.datetime.DateStorage;
import org.enso.table.data.column.storage.numeric.LongStorage;
import org.enso.table.data.column.storage.type.StorageType;
public class MapHelpers {
public static StringStorage stringConcatBimap(StringStorage storage1, StringStorage storage2) {
if (storage1.size() != storage2.size()) {
throw new IllegalArgumentException("Storage sizes must match");
}
int n = storage1.size();
String[] result = new String[n];
for (int i = 0; i < n; i++) {
if (!storage1.isNa(i) && !storage2.isNa(i)) {
result[i] = storage1.getItem(i) + storage2.getItem(i);
} else {
result[i] = null;
}
}
return new StringStorage(result, n);
}
public static LongStorage longAddBimap(LongStorage storage1, LongStorage storage2) {
if (storage1.size() != storage2.size()) {
throw new IllegalArgumentException("Storage sizes must match");
}
int n = storage1.size();
long[] result = new long[n];
BitSet missing = new BitSet();
for (int i = 0; i < n; i++) {
if (!storage1.isNa(i) && !storage2.isNa(i)) {
result[i] = storage1.getItem(i) + storage2.getItem(i);
} else {
missing.set(i);
}
}
return new LongStorage(result, n, missing);
}
public static BoolStorage textEndsWith(StringStorage storage, String suffix) {
int n = storage.size();
BitSet result = new BitSet();
BitSet missing = new BitSet();
for (int i = 0; i < n; i++) {
if (storage.isNa(i)) {
missing.set(i);
} else {
if (Text_Utils.ends_with(storage.getItem(i), suffix)) {
result.set(i);
}
}
}
return new BoolStorage(result, missing, n, false);
}
public static LongStorage longAdd(LongStorage storage, long shift) {
int n = storage.size();
long[] result = new long[n];
BitSet missing = new BitSet();
for (int i = 0; i < n; i++) {
if (!storage.isNa(i)) {
result[i] = storage.getItem(i) + shift;
} else {
missing.set(i);
}
}
return new LongStorage(result, n, missing);
}
public static LongStorage getYear(DateStorage storage) {
int n = storage.size();
long[] result = new long[n];
BitSet missing = new BitSet();
for (int i = 0; i < n; i++) {
if (!storage.isNa(i)) {
result[i] = storage.getItem(i).getYear();
} else {
missing.set(i);
}
}
return new LongStorage(result, n, missing);
}
public static Storage<?> mapCallback(
Storage<?> storage, Function<Object, Object> fn, StorageType expectedType) {
int n = storage.size();
Builder builder =
expectedType == null ? new InferredBuilder(n) : Builder.getForType(expectedType, n);
for (int i = 0; i < n; i++) {
if (!storage.isNa(i)) {
builder.append(fn.apply(storage.getItemBoxed(i)));
} else {
builder.appendNulls(1);
}
}
return builder.seal();
}
}

View File

@ -0,0 +1,46 @@
package org.enso.exploratory_benchmark_helpers;
import java.time.LocalDate;
import org.enso.base.Text_Utils;
import org.enso.table.data.column.storage.StringStorage;
import org.enso.table.data.column.storage.datetime.DateStorage;
import org.enso.table.data.column.storage.numeric.LongStorage;
public class SimpleStorageAggregateHelpers {
public static long sumLongStorage(LongStorage storage) {
long sum = 0;
for (int i = 0; i < storage.size(); i++) {
if (!storage.isNa(i)) {
sum += storage.getItem(i);
}
}
return sum;
}
public static long sumMonthsOfDateStorage(DateStorage storage) {
long sum = 0;
for (LocalDate date : storage.getData()) {
if (date != null) {
sum += date.getMonthValue();
}
}
return sum;
}
public static String longestText(StringStorage storage) {
long longest = -1;
String longestText = null;
int n = storage.size();
for (int i = 0; i < n; i++) {
if (!storage.isNa(i)) {
String text = storage.getItem(i);
long length = Text_Utils.grapheme_length(text);
if (length > longest) {
longest = length;
longestText = text;
}
}
}
return longestText;
}
}

View File

@ -0,0 +1,3 @@
import project.Table.Main as Table_Main
main = Table_Main.spec

View File

@ -0,0 +1,127 @@
from Standard.Base import all
from Standard.Table import all
from Standard.Test import Bench
import project.Table.Common_Setup.Common_Setup
import project.Table.Helpers
polyglot java import org.enso.exploratory_benchmark_helpers.SimpleStorageAggregateHelpers
## Computes the Longest text in the column - aggregate with no grouping.
This is of interest, because in contrast to all benchmarks above, it can generally be done in O(1) memory.
type Boxed_Total_Aggregate
Instance text_column
current_aggregate_implementation self =
self.text_column.to_table.aggregate [Aggregate_Column.Longest 0] . at 0 . at 0
java_loop self =
SimpleStorageAggregateHelpers.longestText self.text_column.java_column.getStorage
enso_aggregate_vector_proxy self =
n = self.text_column.length
vector_proxy = self.text_column.to_vector
(0.up_to n).fold Nothing acc-> ix->
item = vector_proxy.at ix
if acc.is_nothing then item else
if item.is_nothing then acc else
if item.length > acc.length then item else acc
enso_aggregate_storage_get_item self =
n = self.text_column.length
storage = self.text_column.java_column.getStorage
(0.up_to n).fold Nothing acc-> ix->
item = storage.getItemBoxed ix
if acc.is_nothing then item else
if item.is_nothing then acc else
if item.length > acc.length then item else acc
verify_correctness self =
Helpers.check_results [self.current_aggregate_implementation, self.java_loop, self.enso_aggregate_vector_proxy, self.enso_aggregate_storage_get_item]
## Computes Sum of integers.
We have to be careful with `n` because if we use too large values Enso will start using BigInts, while Java will overflow.
type Primitive_Total_Aggregate
Instance int_column
current_aggregate_implementation self =
self.int_column.to_table.aggregate [Aggregate_Column.Sum 0] . at 0 . at 0
java_loop self =
long_storage = self.int_column.java_column.getStorage
SimpleStorageAggregateHelpers.sumLongStorage long_storage
enso_aggregate_vector_proxy self =
vector_proxy = self.int_column.to_vector
vector_proxy.fold 0 acc-> item->
if item.is_nothing then acc else
acc + item
enso_aggregate_storage_get_item self =
n = self.int_column.length
storage = self.int_column.java_column.getStorage
(0.up_to n).fold 0 acc-> ix->
if storage.isNa ix then acc else
acc + storage.getItem ix
verify_correctness self =
Helpers.check_results [self.current_aggregate_implementation, self.java_loop, self.enso_aggregate_vector_proxy, self.enso_aggregate_storage_get_item]
## An alternative to Boxed_Total_Aggregate. Computing text length is complex due
to ICU complexity. This is a simpler one - we get the month of each value and
sum these.
type Boxed_Sum_Months
Instance date_column
java_loop self =
date_storage = self.date_column.java_column.getStorage
SimpleStorageAggregateHelpers.sumMonthsOfDateStorage date_storage
enso_aggregate_vector_proxy self =
vector_proxy = self.date_column.to_vector
vector_proxy.fold 0 acc-> item->
if item.is_nothing then acc else
acc + item.month
enso_aggregate_storage_get_item self =
n = self.date_column.length
storage = self.date_column.java_column.getStorage
(0.up_to n).fold 0 acc-> ix->
item = storage.getItemBoxed ix
if item.is_nothing then acc else
acc + item.month
verify_correctness self =
Helpers.check_results [self.java_loop, self.enso_aggregate_vector_proxy, self.enso_aggregate_storage_get_item]
main = spec (Common_Setup.Config)
spec setup =
t = setup.generate_input_table
t2 = setup.generate_input_table_date
iter_size = setup.iter_size
num_iterations = setup.num_iterations
# Using ints2 to get smaller values to avoid integer overflow.
primitive_total_aggregate = Primitive_Total_Aggregate.Instance (t.at "ints2")
primitive_total_aggregate.verify_correctness
# GC not needed here as this should be O(1) memory.
Bench.measure (primitive_total_aggregate.current_aggregate_implementation) "Primitive_Total_Aggregate.current_aggregate_implementation" iter_size num_iterations run_gc_between_iterations=False
Bench.measure (primitive_total_aggregate.java_loop) "Primitive_Total_Aggregate.java_loop" iter_size num_iterations run_gc_between_iterations=False
Bench.measure (primitive_total_aggregate.enso_aggregate_vector_proxy) "Primitive_Total_Aggregate.enso_aggregate_vector_proxy" iter_size num_iterations run_gc_between_iterations=False
Bench.measure (primitive_total_aggregate.enso_aggregate_storage_get_item) "Primitive_Total_Aggregate.enso_aggregate_storage_get_item" iter_size num_iterations run_gc_between_iterations=False
boxed_sum_months = Boxed_Sum_Months.Instance (t2.at "dates")
boxed_sum_months.verify_correctness
Bench.measure (boxed_sum_months.java_loop) "Boxed_Sum_Months.java_loop" iter_size num_iterations run_gc_between_iterations=False
Bench.measure (boxed_sum_months.enso_aggregate_vector_proxy) "Boxed_Sum_Months.enso_aggregate_vector_proxy" iter_size num_iterations run_gc_between_iterations=False
Bench.measure (boxed_sum_months.enso_aggregate_storage_get_item) "Boxed_Sum_Months.enso_aggregate_storage_get_item" iter_size num_iterations run_gc_between_iterations=False
boxed_total_aggregate = Boxed_Total_Aggregate.Instance (t.at "text")
boxed_total_aggregate.verify_correctness
Bench.measure (boxed_total_aggregate.current_aggregate_implementation) "Boxed_Total_Aggregate.current_aggregate_implementation" iter_size num_iterations run_gc_between_iterations=False
Bench.measure (boxed_total_aggregate.java_loop) "Boxed_Total_Aggregate.java_loop" iter_size num_iterations run_gc_between_iterations=False
Bench.measure (boxed_total_aggregate.enso_aggregate_vector_proxy) "Boxed_Total_Aggregate.enso_aggregate_vector_proxy" iter_size num_iterations run_gc_between_iterations=False
Bench.measure (boxed_total_aggregate.enso_aggregate_storage_get_item) "Boxed_Total_Aggregate.enso_aggregate_storage_get_item" iter_size num_iterations run_gc_between_iterations=False

View File

@ -0,0 +1,104 @@
from Standard.Base import all
from Standard.Table import all
from Standard.Test import Bench
import project.Table.Common_Setup.Common_Setup
import project.Table.Helpers
polyglot java import org.enso.exploratory_benchmark_helpers.MapHelpers
polyglot java import org.enso.table.data.column.builder.NumericBuilder
polyglot java import org.enso.table.data.column.builder.StringBuilder
# Adding two String columns
type Boxed_Bi_Map_Test
Instance text_column_1 text_column_2
current_implementation self =
self.text_column_1 + self.text_column_2
java_map self =
Column.from_storage "result" <|
MapHelpers.stringConcatBimap self.text_column_1.java_column.getStorage self.text_column_2.java_column.getStorage
enso_map_as_vector self convert_polyglot_dates =
vector_proxy_1 = self.text_column_1.to_vector
vector_proxy_2 = self.text_column_2.to_vector
mapped = vector_proxy_1.zip vector_proxy_2 (+)
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
enso_map_with_builder self =
n = self.text_column_1.length
if self.text_column_2.length != n then Panic.throw "LENGTH MISMATCH" else
builder = StringBuilder.new n
storage_1 = self.text_column_1.java_column.getStorage
storage_2 = self.text_column_2.java_column.getStorage
0.up_to n . each i->
item_1 = storage_1.getItemBoxed i
item_2 = storage_2.getItemBoxed i
if item_1.is_nothing || item_2.is_nothing then builder.appendNulls 1 else
res = item_1 + item_2
builder.append res
Column.from_storage "result" builder.seal
verify_correctness self =
Helpers.check_results [self.current_implementation, self.java_map, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder]
# Adding two Long columns
type Primitive_Bi_Map_Test
Instance int_column_1 int_column_2
current_implementation self =
self.int_column_1 + self.int_column_2
java_map self =
Column.from_storage "result" <|
MapHelpers.longAddBimap self.int_column_1.java_column.getStorage self.int_column_2.java_column.getStorage
enso_map_as_vector self convert_polyglot_dates =
vector_proxy_1 = self.int_column_1.to_vector
vector_proxy_2 = self.int_column_2.to_vector
mapped = vector_proxy_1.zip vector_proxy_2 (+)
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
enso_map_with_builder self =
n = self.int_column_1.length
if self.int_column_2.length != n then Panic.throw "LENGTH MISMATCH" else
builder = NumericBuilder.createLongBuilder n
storage_1 = self.int_column_1.java_column.getStorage
storage_2 = self.int_column_2.java_column.getStorage
0.up_to n . each i->
if storage_1.isNa i || storage_2.isNa i then builder.appendNulls 1 else
item_1 = storage_1.getItem i
item_2 = storage_2.getItem i
res = item_1 + item_2
builder.appendLong res
Column.from_storage "result" builder.seal
verify_correctness self =
Helpers.check_results [self.current_implementation, self.java_map, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder]
main = spec Common_Setup.Config
spec setup =
t = setup.generate_input_table
iter_size = setup.iter_size
num_iterations = setup.num_iterations
should_run_gc = setup.force_gc
primitive_bimap = Primitive_Bi_Map_Test.Instance (t.at "ints") (t.at "ints2")
primitive_bimap.verify_correctness
Bench.measure (primitive_bimap.current_implementation) "Primitive_Bi_Map_Test.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_bimap.java_map) "Primitive_Bi_Map_Test.java_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_bimap.enso_map_as_vector convert_polyglot_dates=True) "Primitive_Bi_Map_Test.enso_map_as_vector+date_conversion" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_bimap.enso_map_as_vector convert_polyglot_dates=False) "Primitive_Bi_Map_Test.enso_map_as_vector" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_bimap.enso_map_with_builder) "Primitive_Bi_Map_Test.enso_map_with_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc
boxed_bimap = Boxed_Bi_Map_Test.Instance (t.at "text") (t.at "text2")
boxed_bimap.verify_correctness
Bench.measure (boxed_bimap.current_implementation) "Boxed_Bi_Map_Test.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_bimap.java_map) "Boxed_Bi_Map_Test.java_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_bimap.enso_map_as_vector convert_polyglot_dates=True) "Boxed_Bi_Map_Test.enso_map_as_vector+date_conversion" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_bimap.enso_map_as_vector convert_polyglot_dates=False) "Boxed_Bi_Map_Test.enso_map_as_vector" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_bimap.enso_map_with_builder) "Boxed_Bi_Map_Test.enso_map_with_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc

View File

@ -0,0 +1,111 @@
from Standard.Base import all
from Standard.Table import all
from Standard.Test import Bench
import project.Table.Common_Setup.Common_Setup
import project.Table.Helpers
polyglot java import org.enso.exploratory_benchmark_helpers.MapHelpers
polyglot java import org.enso.table.data.column.builder.BoolBuilder
polyglot java import org.enso.table.data.column.builder.NumericBuilder
## This tests an operation on a boxed value (e.g. ends_with on a String).
It is the basic benchmark for comparing the performance between the vectorized Java op and approaches relying on Enso.
We would like to see the Enso approach to have comparable performance to the Java one.
type Boxed_Map_Test
Instance text_column (suffix : Text)
current_implementation self =
self.text_column.ends_with self.suffix
java_map self =
Column.from_storage "result" <|
MapHelpers.textEndsWith self.text_column.java_column.getStorage self.suffix
enso_map_as_vector self convert_polyglot_dates =
suffix = self.suffix
vector_proxy = self.text_column.to_vector
mapped = vector_proxy.map x-> x.ends_with suffix
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
enso_map_with_builder self =
suffix = self.suffix
n = self.text_column.length
builder = BoolBuilder.new n
storage = self.text_column.java_column.getStorage
0.up_to n . each i->
item = storage.getItemBoxed i
case item of
Nothing ->
builder.appendNulls 1
_ ->
b = item.ends_with suffix
builder.appendBoolean b
Column.from_storage "result" builder.seal
verify_correctness self =
Helpers.check_results [self.current_implementation, self.java_map, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder]
## This tests an operation on a primitive value, that in Java is stored as unboxed (e.g. + on LongStorage).
This is a more demanding benchmark, because the Java side has an advantage of easily using the unboxed values everywhere.
Here it may be harder to achieve comparable performance, but we want to know what is the difference, and ideally we want to be getting closer here as well.
type Primitive_Map_Test
Instance int_column (shift : Integer)
current_implementation self =
self.int_column + self.shift
java_map self =
Column.from_storage "result" <|
MapHelpers.longAdd self.int_column.java_column.getStorage self.shift
enso_map_as_vector self convert_polyglot_dates =
shift = self.shift
vector_proxy = self.int_column.to_vector
mapped = vector_proxy.map x-> x + shift
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
enso_map_with_builder self =
shift = self.shift
n = self.int_column.length
builder = NumericBuilder.createLongBuilder n
storage = self.int_column.java_column.getStorage
0.up_to n . each i->
case storage.isNa i of
True ->
builder.appendNulls 1
False ->
item = storage.getItem i
x = item + shift
builder.appendLong x
Column.from_storage "result" builder.seal
verify_correctness self =
Helpers.check_results [self.current_implementation, self.java_map, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder]
main = spec Common_Setup.Config
spec setup =
t = setup.generate_input_table
iter_size = setup.iter_size
num_iterations = setup.num_iterations
should_run_gc = setup.force_gc
primitive_map = Primitive_Map_Test.Instance (t.at "ints") 42
primitive_map.verify_correctness
Bench.measure (primitive_map.current_implementation) "Primitive_Map_Test.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_map.java_map) "Primitive_Map_Test.java_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_map.enso_map_as_vector convert_polyglot_dates=True) "Primitive_Map_Test.enso_map_as_vector+convert_dates" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_map.enso_map_as_vector convert_polyglot_dates=False) "Primitive_Map_Test.enso_map_as_vector" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_map.enso_map_with_builder) "Primitive_Map_Test.enso_map_with_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc
# This one seems slowest so I put it at the end.
boxed_map = Boxed_Map_Test.Instance (t.at "text") "5"
boxed_map.verify_correctness
Bench.measure (boxed_map.current_implementation) "Boxed_Map_Test.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_map.java_map) "Boxed_Map_Test.java_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_map.enso_map_as_vector convert_polyglot_dates=True) "Boxed_Map_Test.enso_map_as_vector+convert_dates" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_map.enso_map_as_vector convert_polyglot_dates=False) "Boxed_Map_Test.enso_map_as_vector" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_map.enso_map_with_builder) "Boxed_Map_Test.enso_map_with_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc

View File

@ -0,0 +1,77 @@
from Standard.Base import all
from Standard.Table import all
from Standard.Test import Bench
import project.Table.Common_Setup.Common_Setup
import project.Table.Helpers
polyglot java import org.enso.exploratory_benchmark_helpers.MapHelpers
polyglot java import org.enso.table.data.column.builder.NumericBuilder
## A second variant of Boxed_Map_Test.
The first one relied on `ends_with` which is actually a costly operation due to reliance on ICU and correct grapheme cluster handling.
So as a second comparison we will do `Date.year` instead which is much simpler.
type Boxed_Map_Test_2
Instance date_column
current_implementation self =
self.date_column.year
java_map self =
Column.from_storage "result" <|
MapHelpers.getYear self.date_column.java_column.getStorage
## We can still opt-out of `convert_polyglot_dates`, because this is applied
at output which is Integer. If our output was another Date, we could not
opt-out to remain correct.
enso_map_as_vector self convert_polyglot_dates =
vector_proxy = self.date_column.to_vector
mapped = vector_proxy.map x-> x.year
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
enso_map_with_builder_append_long self =
n = self.date_column.length
builder = NumericBuilder.createLongBuilder n
storage = self.date_column.java_column.getStorage
0.up_to n . each i->
case storage.getItemBoxed i of
Nothing ->
builder.appendNulls 1
date ->
builder.appendLong date.year
Column.from_storage "result" builder.seal
## This is the same as above, but uses `appendNoGrow` instead of
`appendLong`. I suspect it could be more efficient, so I'm testing it.
enso_map_with_builder_append_object self =
n = self.date_column.length
builder = NumericBuilder.createLongBuilder n
storage = self.date_column.java_column.getStorage
0.up_to n . each i->
case storage.getItemBoxed i of
Nothing ->
builder.appendNulls 1
date ->
builder.appendNoGrow date.year
Column.from_storage "result" builder.seal
verify_correctness self =
Helpers.check_results [self.current_implementation, self.java_map, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder_append_long, self.enso_map_with_builder_append_object]
main = spec (Common_Setup.Config)
spec setup =
t = setup.generate_input_table_date
iter_size = setup.iter_size
num_iterations = setup.num_iterations
should_run_gc = setup.force_gc
boxed_map = Boxed_Map_Test_2.Instance (t.at "dates")
Bench.measure (boxed_map.current_implementation) "Boxed_Map_Test_2.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_map.java_map) "Boxed_Map_Test_2.java_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_map.enso_map_as_vector convert_polyglot_dates=True) "Boxed_Map_Test_2.enso_map_as_vector+convert_dates" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_map.enso_map_as_vector convert_polyglot_dates=False) "Boxed_Map_Test_2.enso_map_as_vector" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_map.enso_map_with_builder_append_long) "Boxed_Map_Test_2.enso_map_with_builder_append_long" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_map.enso_map_with_builder_append_object) "Boxed_Map_Test_2.enso_map_with_builder_append_object" iter_size num_iterations run_gc_between_iterations=should_run_gc

View File

@ -0,0 +1,19 @@
from Standard.Base import all
from Standard.Table import all
type Common_Setup
Config (n : Integer = 10^6) (iter_size : Integer = 1) (num_iterations : Integer = 100) (force_gc : Boolean = False)
generate_input_table : Table
generate_input_table self =
n = self.n
v1 = (0.up_to n).to_vector
v2 = (0.up_to n).to_vector.reverse.map (x-> x % 20)
Table.new [["ints", v1], ["text", v1.map .to_text], ["ints2", v2], ["text2", v2.map .to_text]]
generate_input_table_date : Table
generate_input_table_date self =
n = self.n
v1 = (0.up_to n).map i->
(Date.new 1999 1 1) . date_add i Date_Period.Day
Table.new [["dates", v1]]

View File

@ -0,0 +1,140 @@
from Standard.Base import all
from Standard.Table import all
import Standard.Table.Data.Type.Storage
from Standard.Test import Bench
import project.Table.Common_Setup.Common_Setup
import project.Table.Helpers
polyglot java import org.enso.exploratory_benchmark_helpers.MapHelpers
polyglot java import org.enso.table.data.column.builder.StringBuilder
polyglot java import org.enso.table.data.column.builder.NumericBuilder
polyglot java import org.enso.table.data.column.operation.map.MapOperationProblemBuilder
polyglot java import org.enso.table.data.table.Column as Java_Column
## This tests an operation that executes an Enso function on each element of a column.
It is meant to compare the cost of calling-back into Enso from Java vs staying in Enso.
type Boxed_Enso_Callback_Test
Instance text_column (fn : Text -> Text)
current_implementation self =
self.text_column.map self.fn
java_roundtrip self =
expected_type = Storage.from_value_type_strict Value_Type.Char
Column.from_storage "result" <|
MapHelpers.mapCallback self.text_column.java_column.getStorage self.fn expected_type
enso_map_as_vector self convert_polyglot_dates =
vector_proxy = self.text_column.to_vector
mapped = vector_proxy.map self.fn
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
enso_map_with_builder self =
n = self.text_column.length
fn = self.fn
builder = StringBuilder.new n
storage = self.text_column.java_column.getStorage
0.up_to n . each i->
case storage.getItemBoxed i of
Nothing ->
builder.appendNulls 1
item ->
builder.append (fn item)
Column.from_storage "result" builder.seal
verify_correctness self =
Helpers.check_results [self.current_implementation, self.java_roundtrip, self.enso_map_as_vector convert_polyglot_dates=True, self.enso_map_as_vector convert_polyglot_dates=False, self.enso_map_with_builder]
## This variant checks how a primitive type column will behave.
type Primitive_Enso_Callback_Test
Instance int_column (shift : Integer)
fn self =
shift = self.shift
x-> x + shift
current_implementation_as_map self =
self.int_column.map self.fn
java_vectorized self =
Column.from_storage "result" <|
MapHelpers.longAdd self.int_column.java_column.getStorage self.shift
java_roundtrip self =
expected_type = Storage.from_value_type_strict Value_Type.Integer
Column.from_storage "result" <|
MapHelpers.mapCallback self.int_column.java_column.getStorage self.fn expected_type
enso_map_as_vector_inferred_builder self convert_polyglot_dates =
vector_proxy = self.int_column.to_vector
mapped = vector_proxy.map self.fn
Helpers.column_from_vector "result" mapped convert_polyglot_dates=convert_polyglot_dates
enso_map_as_vector_long_builder self =
vector_proxy = self.int_column.to_vector
mapped = vector_proxy.map self.fn
# No expected storage will use inferred builder.
expected_storage_type = Storage.from_value_type_strict Value_Type.Integer
Column.Value (Java_Column.fromItemsNoDateConversion "result" mapped expected_storage_type)
enso_map_with_builder_2_calls_unboxed self =
n = self.int_column.length
fn = self.fn
builder = NumericBuilder.createLongBuilder n
storage = self.int_column.java_column.getStorage
0.up_to n . each i->
case storage.isNa i of
True ->
builder.appendNulls 1
False ->
item = storage.getItem i
builder.append (fn item)
Column.from_storage "result" builder.seal
enso_map_with_builder_1_call_boxed self =
n = self.int_column.length
fn = self.fn
builder = NumericBuilder.createLongBuilder n
storage = self.int_column.java_column.getStorage
0.up_to n . each i->
case storage.getItemBoxed i of
Nothing ->
builder.appendNulls 1
item ->
builder.append (fn item)
Column.from_storage "result" builder.seal
verify_correctness self =
Helpers.check_results [self.current_implementation_as_map, self.java_vectorized, self.java_roundtrip, self.enso_map_as_vector_inferred_builder convert_polyglot_dates=True, self.enso_map_as_vector_inferred_builder convert_polyglot_dates=False, self.enso_map_as_vector_long_builder, self.enso_map_with_builder_2_calls_unboxed, self.enso_map_with_builder_1_call_boxed]
main = spec (Common_Setup.Config)
spec setup =
t = setup.generate_input_table
iter_size = setup.iter_size
num_iterations = setup.num_iterations
should_run_gc = setup.force_gc
primitive_callback_test = Primitive_Enso_Callback_Test.Instance (t.at "ints") 42
primitive_callback_test.verify_correctness
Bench.measure (primitive_callback_test.current_implementation_as_map) "Primitive_Enso_Callback_Test.current_implementation_as_map" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_callback_test.java_vectorized) "Primitive_Enso_Callback_Test.java_vectorized" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_callback_test.java_roundtrip) "Primitive_Enso_Callback_Test.java_roundtrip" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_callback_test.enso_map_as_vector_inferred_builder convert_polyglot_dates=False) "Primitive_Enso_Callback_Test.enso_map_as_vector_inferred_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_callback_test.enso_map_as_vector_inferred_builder convert_polyglot_dates=True) "Primitive_Enso_Callback_Test.enso_map_as_vector_inferred_builder_and_date_conversions" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_callback_test.enso_map_as_vector_long_builder) "Primitive_Enso_Callback_Test.enso_map_as_vector_long_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_callback_test.enso_map_with_builder_2_calls_unboxed) "Primitive_Enso_Callback_Test.enso_map_with_builder_2_calls_unboxed" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (primitive_callback_test.enso_map_with_builder_1_call_boxed) "Primitive_Enso_Callback_Test.enso_map_with_builder_1_call_boxed" iter_size num_iterations run_gc_between_iterations=should_run_gc
fn x = "|" + x + "|"
boxed_callback_test = Boxed_Enso_Callback_Test.Instance (t.at "text") fn
boxed_callback_test.verify_correctness
Bench.measure (boxed_callback_test.current_implementation) "Boxed_Enso_Callback_Test.current_implementation" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_callback_test.java_roundtrip) "Boxed_Enso_Callback_Test.java_roundtrip" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_callback_test.enso_map_as_vector convert_polyglot_dates=False) "Boxed_Enso_Callback_Test.enso_map_as_vector_without_date_conversion" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_callback_test.enso_map_as_vector convert_polyglot_dates=True) "Boxed_Enso_Callback_Test.enso_map_as_vector_with_date_conversion" iter_size num_iterations run_gc_between_iterations=should_run_gc
Bench.measure (boxed_callback_test.enso_map_with_builder) "Boxed_Enso_Callback_Test.enso_map_with_builder" iter_size num_iterations run_gc_between_iterations=should_run_gc

View File

@ -0,0 +1,34 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
from Standard.Table import all
polyglot java import org.enso.table.data.table.Column as Java_Column
## PRIVATE
A helper implementation essentially mimicking Column.from_vector, but
allowing to control whether polyglot conversions are performed.
Used to make tests stable regardless of changes to Column.from_vector.
column_from_vector : Text -> Vector -> Boolean -> Column
column_from_vector name items convert_polyglot_dates =
expected_storage_type = Nothing
Illegal_Argument.handle_java_exception <|
java_column = case convert_polyglot_dates of
True ->
Java_Column.fromItems name items expected_storage_type
False ->
Java_Column.fromItemsNoDateConversion name items expected_storage_type
Column.Value java_column
check_results results =
mapped = results.map x-> case x of
_ : Column -> x.to_vector
_ -> x
reference = mapped.first
mapped.each_with_index ix-> result->
if result != reference then
IO.println "Mismatched results: "
IO.println "Reference: "+reference.to_display_text
IO.println "Result (ix="+ix.to_text+"): "+result.to_display_text
Panic.throw (Illegal_State.Error "The benchmark result ix="+ix.to_text+" does not match the 0th one.")

View File

@ -0,0 +1,19 @@
## NOTE
This file is _not_ automatically run when the benchmarks are run.
It can be used to run all performance tests in a single run.
import project.Table.Column_Aggregate
import project.Table.Column_Bi_Map
import project.Table.Column_Map
import project.Table.Column_Map_2
import project.Table.Common_Setup.Common_Setup
import project.Table.Enso_Callback
spec =
setup = (Common_Setup.Config)
Column_Map.spec setup
Column_Map_2.spec setup
Column_Bi_Map.spec setup
Column_Aggregate.spec setup
Enso_Callback.spec setup
main = spec

View File

@ -0,0 +1,5 @@
# Analysing benchmarks
This workflow is prepared mostly to analyse the output of benchmarks from
`test/Exploratory_Benchmarks`. See `test/Exploratory_Benchmarks/README.md` for
more information.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,8 @@
name: Vectorized_Benchmarks
namespace: local
version: 0.0.1
license: ""
authors: []
maintainers: []
edition: 0.0.0-dev
prefer-local-libraries: true

File diff suppressed because one or more lines are too long