mirror of
https://github.com/enso-org/enso.git
synced 2024-12-22 23:31:42 +03:00
Add benchmarks related to add_row_number
performance investigation (#8091)
- Follow-up of #8055 - Adds a benchmark comparing performance of Enso Map and Java HashMap in two scenarios - _only incremental_ updates (like `Vector.distinct`) and _replacing_ updates (like keeping a counter for each key). These benchmarks can be used as a metric for #8090
This commit is contained in:
parent
cec115d25b
commit
93a31fcc8b
26
build.sbt
26
build.sbt
@ -308,7 +308,8 @@ lazy val enso = (project in file("."))
|
||||
`std-aws`,
|
||||
`simple-httpbin`,
|
||||
`enso-test-java-helpers`,
|
||||
`exploratory-benchmark-java-helpers`
|
||||
`exploratory-benchmark-java-helpers`,
|
||||
`benchmark-java-helpers`
|
||||
)
|
||||
.settings(Global / concurrentRestrictions += Tags.exclusive(Exclusive))
|
||||
.settings(
|
||||
@ -1386,6 +1387,7 @@ lazy val runtime = (project in file("engine/runtime"))
|
||||
(Runtime / compile) := (Runtime / compile)
|
||||
.dependsOn(`std-base` / Compile / packageBin)
|
||||
.dependsOn(`enso-test-java-helpers` / Compile / packageBin)
|
||||
.dependsOn(`benchmark-java-helpers` / Compile / packageBin)
|
||||
.dependsOn(`exploratory-benchmark-java-helpers` / Compile / packageBin)
|
||||
.dependsOn(`std-image` / Compile / packageBin)
|
||||
.dependsOn(`std-database` / Compile / packageBin)
|
||||
@ -2200,6 +2202,26 @@ lazy val `exploratory-benchmark-java-helpers` = project
|
||||
.dependsOn(`std-base` % "provided")
|
||||
.dependsOn(`std-table` % "provided")
|
||||
|
||||
lazy val `benchmark-java-helpers` = project
|
||||
.in(
|
||||
file(
|
||||
"test/Benchmarks/polyglot-sources/benchmark-java-helpers"
|
||||
)
|
||||
)
|
||||
.settings(
|
||||
frgaalJavaCompilerSetting,
|
||||
autoScalaLibrary := false,
|
||||
Compile / packageBin / artifactPath :=
|
||||
file(
|
||||
"test/Benchmarks/polyglot/java/benchmark-java-helpers.jar"
|
||||
),
|
||||
libraryDependencies ++= Seq(
|
||||
"org.graalvm.sdk" % "graal-sdk" % graalMavenPackagesVersion % "provided"
|
||||
)
|
||||
)
|
||||
.dependsOn(`std-base` % "provided")
|
||||
.dependsOn(`std-table` % "provided")
|
||||
|
||||
lazy val `std-table` = project
|
||||
.in(file("std-bits") / "table")
|
||||
.enablePlugins(Antlr4Plugin)
|
||||
@ -2531,12 +2553,14 @@ pkgStdLibInternal := Def.inputTask {
|
||||
case "TestHelpers" =>
|
||||
(`enso-test-java-helpers` / Compile / packageBin).value
|
||||
(`exploratory-benchmark-java-helpers` / Compile / packageBin).value
|
||||
(`benchmark-java-helpers` / Compile / packageBin).value
|
||||
case "AWS" =>
|
||||
(`std-aws` / Compile / packageBin).value
|
||||
case _ if buildAllCmd =>
|
||||
(`std-base` / Compile / packageBin).value
|
||||
(`enso-test-java-helpers` / Compile / packageBin).value
|
||||
(`exploratory-benchmark-java-helpers` / Compile / packageBin).value
|
||||
(`benchmark-java-helpers` / Compile / packageBin).value
|
||||
(`std-table` / Compile / packageBin).value
|
||||
(`std-database` / Compile / packageBin).value
|
||||
(`std-image` / Compile / packageBin).value
|
||||
|
@ -159,10 +159,10 @@ type Bench
|
||||
count = self.total_specs
|
||||
IO.println <| "Found " + count.to_text + " cases to execute (ETA " + self.estimated_runtime.to_display_text + ")"
|
||||
|
||||
case Environment.get "ENSO_BENCHMARK_REPORT_PATH" of
|
||||
case get_benchmark_report_path of
|
||||
Nothing -> Nothing
|
||||
path ->
|
||||
line = 'Label,Phase,"Invocations count","Average time (ms)"'
|
||||
line = 'Label,Phase,"Invocations count","Average time (ms)","Time Stdev"'
|
||||
line.write path on_existing_file=Existing_File_Behavior.Backup
|
||||
|
||||
self.fold Nothing _-> g-> s->
|
||||
@ -211,8 +211,10 @@ type Bench
|
||||
computation.
|
||||
single_call ~act =
|
||||
start = System.nano_time
|
||||
Runtime.no_inline act
|
||||
r = Runtime.no_inline act
|
||||
end = System.nano_time
|
||||
# If the computation returned a dataflow error, we raise it to a panic - we do not want silent failures in benchmarks.
|
||||
Panic.rethrow r
|
||||
end - start
|
||||
|
||||
## Run a single phase of the benchmark.
|
||||
@ -237,13 +239,15 @@ type Bench
|
||||
durations_builder.append dur
|
||||
@Tail_Call go (cur_ns + dur)
|
||||
go phase_start
|
||||
durations = durations_builder.to_vector
|
||||
sum = durations.reduce (_ + _)
|
||||
nanos_in_ms = 1000000
|
||||
durations = durations_builder.to_vector.map (x-> x / nanos_in_ms)
|
||||
stats = durations.compute_bulk [Statistic.Mean, Statistic.Standard_Deviation]
|
||||
avg = stats.first
|
||||
stddev = stats.second
|
||||
run_iters = durations.length
|
||||
avg = (sum / run_iters) / 1000000
|
||||
phase_end = System.nano_time
|
||||
phase_duration = Duration.new nanoseconds=(phase_end - phase_start)
|
||||
Bench.summarize_phase label phase_name run_iters avg phase_duration
|
||||
Bench.summarize_phase label phase_name run_iters avg stddev phase_duration
|
||||
|
||||
## PRIVATE
|
||||
This is a very simple implementation of summarizing the benchmark
|
||||
@ -251,16 +255,17 @@ type Bench
|
||||
|
||||
We may want to improve it later, but it gets the job done to give us
|
||||
simple summary that can be analysed more easily than logs.
|
||||
summarize_phase (label:Text) (phase_name:Text) (invocations:Integer) (average_time:Float) (phase_duration:Duration) =
|
||||
fmt = average_time.format "#.###"
|
||||
summarize_phase (label:Text) (phase_name:Text) (invocations:Integer) (average_time:Float) (time_stddev:Float) (phase_duration:Duration) =
|
||||
avg_fmt = average_time.format "#.###"
|
||||
stddev_fmt = time_stddev.format "#.###"
|
||||
IO.println <| phase_name + " duration: " + (phase_duration.total_milliseconds.format "#.##") + " ms"
|
||||
IO.println <| phase_name + " invocations: " + invocations.to_text
|
||||
IO.println <| phase_name + " avg time: " + fmt + " ms"
|
||||
IO.println <| phase_name + " avg time: " + avg_fmt + " ms (+-" + stddev_fmt + "))"
|
||||
|
||||
case Environment.get "ENSO_BENCHMARK_REPORT_PATH" of
|
||||
case get_benchmark_report_path of
|
||||
Nothing -> Nothing
|
||||
path ->
|
||||
line = '\n"'+label+'","'+phase_name+'",'+invocations.to_text+','+fmt
|
||||
line = '\n"'+label+'","'+phase_name+'",'+invocations.to_text+','+avg_fmt+','+stddev_fmt
|
||||
line.write path on_existing_file=Existing_File_Behavior.Append
|
||||
|
||||
## PRIVATE
|
||||
@ -274,3 +279,7 @@ validate_name name =
|
||||
valid_java_identifier_regex = Regex.compile "[A-Za-z_$][a-zA-Z0-9_$]*"
|
||||
if valid_java_identifier_regex.matches name then Nothing else
|
||||
Panic.throw (Illegal_Argument.Error ("Invalid benchmark name: '" + name + "'"))
|
||||
|
||||
## PRIVATE
|
||||
get_benchmark_report_path : Text | Nothing
|
||||
get_benchmark_report_path = Environment.get "ENSO_BENCHMARK_REPORT_PATH"
|
||||
|
@ -73,4 +73,9 @@ public class OrderedMultiValueKey extends MultiValueKeyBase
|
||||
"Currently no hash_code implementation consistent with the ObjectComparator is exposed, so"
|
||||
+ " OrderedMultiValueKey is not hashable.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "OrderedMultiValueKey{row="+rowIndex+"}";
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,36 @@
|
||||
package org.enso.benchmark_helpers;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Wraps a Java HashMap into an interface hiding it, to ensure that we are calling the raw HashMap
|
||||
* and are not using the Enso conversions that may be applied automatically. This allows us to
|
||||
* compare the raw HashMap performance with other variants.
|
||||
*/
|
||||
public class JavaHashMapWrapper {
|
||||
private final HashMap<Object, Object> map = new HashMap<>();
|
||||
|
||||
public JavaHashMapWrapper insert(Object key, Object value) {
|
||||
map.put(key, value);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Object get(Object key) {
|
||||
return map.get(key);
|
||||
}
|
||||
|
||||
public long size() {
|
||||
return map.size();
|
||||
}
|
||||
|
||||
public Object[][] to_vector() {
|
||||
Object[][] result = new Object[map.size()][2];
|
||||
int i = 0;
|
||||
for (var entry : map.entrySet()) {
|
||||
result[i][0] = entry.getKey();
|
||||
result[i][1] = entry.getValue();
|
||||
i++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
@ -11,6 +11,7 @@ import project.Table.Arithmetic
|
||||
import project.Table.Column_From_Vector
|
||||
import project.Table.Cross_Tab
|
||||
import project.Table.Sorting
|
||||
import project.Table.Internal.Multi_Value_Key
|
||||
import project.Text.Build
|
||||
import project.Text.Compare
|
||||
import project.Text.Contains
|
||||
@ -22,6 +23,7 @@ import project.Collections
|
||||
import project.Column_Numeric
|
||||
import project.Equality
|
||||
import project.Json_Bench
|
||||
import project.Map.Hash_Map
|
||||
import project.Natural_Order_Sort
|
||||
import project.Number_Parse
|
||||
import project.Numeric
|
||||
@ -39,6 +41,9 @@ all_benchmarks =
|
||||
builder.append Operations.collect_benches
|
||||
builder.append Sort.collect_benches
|
||||
|
||||
# Map
|
||||
builder.append Hash_Map.collect_benches
|
||||
|
||||
# Statistics
|
||||
builder.append Count_Min_Max.collect_benches
|
||||
|
||||
@ -49,6 +54,7 @@ all_benchmarks =
|
||||
builder.append Column_From_Vector.collect_benches
|
||||
builder.append Cross_Tab.collect_benches
|
||||
builder.append Sorting.collect_benches
|
||||
builder.append Multi_Value_Key.collect_benches
|
||||
|
||||
# Text
|
||||
builder.append Build.collect_benches
|
||||
|
59
test/Benchmarks/src/Map/Hash_Map.enso
Normal file
59
test/Benchmarks/src/Map/Hash_Map.enso
Normal file
@ -0,0 +1,59 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table import Column, Value_Type, Auto
|
||||
import Standard.Table.Data.Type.Value_Type.Bits
|
||||
|
||||
from Standard.Test import Bench
|
||||
|
||||
polyglot java import org.enso.benchmark_helpers.JavaHashMapWrapper
|
||||
|
||||
options = Bench.options . set_warmup (Bench.phase_conf 2 2) . set_measure (Bench.phase_conf 2 3)
|
||||
|
||||
type Data
|
||||
Value ~ints
|
||||
|
||||
create n =
|
||||
create_ints =
|
||||
rng = Random.new
|
||||
Vector.new n _->
|
||||
rng.integer 0 (n.div 100)
|
||||
Data.Value create_ints
|
||||
|
||||
type Scenario
|
||||
Instance map_constructor
|
||||
|
||||
# Counts distinct values in a vector
|
||||
run_distinct self ints =
|
||||
new_map = ints.fold (self.map_constructor Nothing) acc-> x->
|
||||
if acc.get x . is_nothing . not then acc else
|
||||
acc.insert x True
|
||||
new_map.size
|
||||
|
||||
# Finds the most frequent value in a vector
|
||||
run_count_keys self ints =
|
||||
new_map = ints.fold (self.map_constructor Nothing) acc-> x->
|
||||
current_count = (acc.get x . if_nothing 0) + 1
|
||||
acc.insert x current_count
|
||||
max_key = new_map.to_vector.fold (Pair.new Nothing 0) acc-> entry->
|
||||
freq = entry.second
|
||||
if freq > acc.second then Pair.new entry.first freq else acc
|
||||
max_key
|
||||
|
||||
collect_benches = Bench.build builder->
|
||||
n = 100000
|
||||
data = Data.create n
|
||||
|
||||
builder.group ("Enso_Hash_Map_" + n.to_text) options group_builder->
|
||||
# Scenario similar to what is done in distinct
|
||||
group_builder.specify "Enso_Incremental" <|
|
||||
Scenario.Instance (_ -> Map.empty) . run_distinct data.ints
|
||||
group_builder.specify "Java_Incremental" <|
|
||||
Scenario.Instance (_ -> JavaHashMapWrapper.new) . run_distinct data.ints
|
||||
|
||||
# A scenario similar to what is done in add_row_number with grouping
|
||||
group_builder.specify "Enso_Replacement" <|
|
||||
Scenario.Instance (_ -> Map.empty) . run_count_keys data.ints
|
||||
group_builder.specify "Java_Replacement" <|
|
||||
Scenario.Instance (_ -> JavaHashMapWrapper.new) . run_count_keys data.ints
|
||||
|
||||
main = collect_benches . run_main
|
119
test/Benchmarks/src/Table/Internal/Multi_Value_Key.enso
Normal file
119
test/Benchmarks/src/Table/Internal/Multi_Value_Key.enso
Normal file
@ -0,0 +1,119 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table import Table, Value_Type, Aggregate_Column
|
||||
import Standard.Table.Internal.Multi_Value_Key.Ordered_Multi_Value_Key
|
||||
import Standard.Table.Internal.Multi_Value_Key.Unordered_Multi_Value_Key
|
||||
from Standard.Test import Bench
|
||||
|
||||
polyglot java import org.enso.table.data.index.OrderedMultiValueKey
|
||||
polyglot java import org.enso.table.data.index.UnorderedMultiValueKey
|
||||
polyglot java import org.enso.base.text.TextFoldingStrategy
|
||||
|
||||
options = Bench.options . set_warmup (Bench.phase_conf 2 3) . set_measure (Bench.phase_conf 2 2)
|
||||
|
||||
type My_Pair
|
||||
Value x1 x2
|
||||
|
||||
type My_Pair_Comparator
|
||||
compare x y =
|
||||
Ordering.compare x.x2 y.x2 . and_then <|
|
||||
Ordering.compare x.x1 y.x1
|
||||
|
||||
hash x = x.x1.bit_xor x.x2
|
||||
|
||||
Comparable.from (_:My_Pair) = My_Pair_Comparator
|
||||
|
||||
create_table : Integer -> Table
|
||||
create_table num_rows =
|
||||
rng = Random.new 42
|
||||
x = Vector.new num_rows _-> rng.integer min=0 max=100
|
||||
y = Vector.new num_rows _-> rng.integer min=0 max=20 . to_text
|
||||
z = Vector.new num_rows _->
|
||||
a = rng.integer min=0 max=100
|
||||
b = rng.integer min=0 max=100
|
||||
My_Pair.Value a b
|
||||
t = Table.new [["X", x], ["Y", y], ["Z", z]]
|
||||
|
||||
assert condition =
|
||||
if condition.not then Panic.throw "Assertion failed"
|
||||
|
||||
assert ((t.at "X" . value_type) == Value_Type.Integer)
|
||||
assert ((t.at "Y" . value_type) == Value_Type.Char)
|
||||
assert ((t.at "Z" . value_type) == Value_Type.Mixed)
|
||||
t
|
||||
|
||||
|
||||
type Data
|
||||
Value ~table
|
||||
|
||||
create num_rows = Data.Value (create_table num_rows)
|
||||
|
||||
compare_ordered_keys make_key table compare_keys =
|
||||
n = table.row_count
|
||||
keys = 0.up_to n . map ix-> make_key ix
|
||||
blackhole = 1.up_to n . fold 0 acc-> ix->
|
||||
current = keys.at ix
|
||||
previous = keys.at (ix - 1)
|
||||
if compare_keys current previous then acc+1 else acc-1
|
||||
blackhole
|
||||
|
||||
compute_hashcodes make_key table get_hash =
|
||||
n = table.row_count
|
||||
keys = 0.up_to n . map ix-> make_key ix
|
||||
blackhole = keys.fold 0 acc-> key->
|
||||
h = get_hash key
|
||||
(acc + h) % 1997
|
||||
blackhole
|
||||
|
||||
collect_benches = Bench.build builder->
|
||||
num_rows = 100000
|
||||
data = Data.create num_rows
|
||||
|
||||
builder.group ("Ordered_Multi_Value_Key" + num_rows.to_text) options group_builder->
|
||||
run_enso table =
|
||||
key_columns = table.columns
|
||||
directions = Vector.fill key_columns.length False
|
||||
make_key row_ix = Ordered_Multi_Value_Key.from_row key_columns directions row_ix
|
||||
compare_keys key1 key2 = key1 < key2
|
||||
compare_ordered_keys make_key table compare_keys
|
||||
|
||||
run_java table =
|
||||
key_storages = table.columns.map c-> c.java_column.getStorage
|
||||
directions = Vector.fill key_storages.length 1
|
||||
make_key row_ix = OrderedMultiValueKey.new key_storages row_ix directions
|
||||
compare_keys key1 key2 = key1.compareTo key2 < 0
|
||||
compare_ordered_keys make_key table compare_keys
|
||||
|
||||
group_builder.specify "Primitive_Enso" <|
|
||||
run_enso (data.table.select_columns ["X", "Y"])
|
||||
group_builder.specify "Primitive_Java" <|
|
||||
run_java (data.table.select_columns ["X", "Y"])
|
||||
group_builder.specify "Custom_Object_Enso" <|
|
||||
run_enso (data.table.select_columns ["X", "Z"])
|
||||
group_builder.specify "Custom_Object_Java" <|
|
||||
run_java (data.table.select_columns ["X", "Z"])
|
||||
|
||||
builder.group ("Unordered_Multi_Value_Key" + num_rows.to_text) options group_builder->
|
||||
run_enso table =
|
||||
key_columns = table.columns
|
||||
make_key row_ix = Unordered_Multi_Value_Key.from_row key_columns row_ix
|
||||
get_hash key = key.hash_code
|
||||
compute_hashcodes make_key table get_hash
|
||||
|
||||
run_java table =
|
||||
key_storages = table.columns.map c-> c.java_column.getStorage
|
||||
text_folding_strategies = Vector.fill key_storages.length TextFoldingStrategy.unicodeNormalizedFold
|
||||
make_key row_ix = UnorderedMultiValueKey.new key_storages row_ix text_folding_strategies
|
||||
get_hash key = key.hashCode
|
||||
compute_hashcodes make_key table get_hash
|
||||
|
||||
group_builder.specify "Primitive_Enso" <|
|
||||
run_enso (data.table.select_columns ["X", "Y"])
|
||||
group_builder.specify "Primitive_Java" <|
|
||||
run_java (data.table.select_columns ["X", "Y"])
|
||||
group_builder.specify "Custom_Object_Enso" <|
|
||||
run_enso (data.table.select_columns ["X", "Z"])
|
||||
group_builder.specify "Custom_Object_Java" <|
|
||||
run_java (data.table.select_columns ["X", "Z"])
|
||||
|
||||
main = collect_benches . run_main
|
Loading…
Reference in New Issue
Block a user