enso/test/Benchmarks/src/Main.enso
Radosław Waśko 1b8b30a68d
Improve performance of Join_Condition.Between by sorting on one dimension (#8212)
- Closes #5303
- Refactors `JoinStrategy` allowing us to 'stack' join strategies on top of each other (to some extent) - currently a `HashJoin` can be followed by another join strategy (currently `SortJoin`)
- Adds benchmarks for join
- Due to limitations of the sorting approach this will still not be as fast as possible for cases where there is more than 1 `Between` condition in a single query - trying to demonstrate that in benchmarks.
- We can replace sorting by d-dimensional [RangeTrees](https://en.wikipedia.org/wiki/Range_tree) to get `O((n + m) log^d n + k)` performance (where `n` and `m` are sizes of joined tables, `d` is the amount of `Between` conditions used in the query and `k` is the result set size).
- Follow up ticket for consideration later:
#8216
- Closes #8215
- After all, it turned out that `TreeSet` was problematic (because of not enough flexibility with duplicate key handling), so the simplest solution was to immediately implement this sub-task.
- Closes #8204
- Unrelated, but I ran into this here: adds type checks to other arguments of `set`.
- Before, putting in a Column as `new_name` (i.e. mistakenly messing up the order of arguments), lead to a hard to understand `Method `if_then_else` of type Column could not be found.`, instead now it would file with type error 'expected Text got Column`.
2023-11-08 12:59:55 +00:00

112 lines
3.6 KiB
Plaintext

from Standard.Base import all hiding Range
from Standard.Test import Bench
import project.Vector.Array_Proxy_Bench
import project.Vector.Distinct
import project.Vector.Operations
import project.Vector.Sort
import project.Statistics.Count_Min_Max
import project.Table.Add_Row_Number
import project.Table.Aggregate
import project.Table.Arithmetic
import project.Table.Column_From_Vector
import project.Table.Cross_Tab
import project.Table.Join
import project.Table.Sorting
import project.Table.Internal.Multi_Value_Key
import project.Text.Build
import project.Text.Compare
import project.Text.Contains
import project.Text.Pretty
import project.Text.Reverse
import project.Time.Work_Days
import project.Time.Format
import project.Collections
import project.Column_Numeric
import project.Equality
import project.Json_Bench
import project.Map.Hash_Map
import project.Natural_Order_Sort
import project.Number_Parse
import project.Numeric
import project.Range
import project.Sum
import project.Runtime.Panics_And_Errors
from Standard.Base.Runtime import Debug
all_benchmarks : Vector Bench.All
all_benchmarks =
builder = Vector.new_builder
# Vector
builder.append Array_Proxy_Bench.collect_benches
builder.append Distinct.collect_benches
builder.append Operations.collect_benches
builder.append Sort.collect_benches
# Map
builder.append Hash_Map.collect_benches
# Statistics
builder.append Count_Min_Max.collect_benches
# Table
builder.append Aggregate.collect_benches
builder.append Arithmetic.collect_benches
builder.append Add_Row_Number.collect_benches
builder.append Column_From_Vector.collect_benches
builder.append Cross_Tab.collect_benches
builder.append Join.collect_benches
builder.append Sorting.collect_benches
builder.append Multi_Value_Key.collect_benches
# Text
builder.append Build.collect_benches
builder.append Compare.collect_benches
builder.append Contains.collect_benches
builder.append Pretty.collect_benches
builder.append Reverse.collect_benches
# Time
builder.append Format.collect_benches
builder.append Work_Days.collect_benches
# Vector
builder.append Collections.collect_benches
builder.append Column_Numeric.collect_benches
builder.append Equality.collect_benches
builder.append Json_Bench.collect_benches
builder.append Natural_Order_Sort.collect_benches
builder.append Number_Parse.collect_benches
builder.append Numeric.collect_benches
builder.append Range.collect_benches
builder.append Sum.collect_benches
# Runtime
builder.append Panics_And_Errors.collect_benches
builder.to_vector
main =
benchmarks = all_benchmarks
total_specs = benchmarks.map .total_specs . fold 0 (+)
IO.println "Found "+benchmarks.length.to_text+" benchmark suites, containing "+total_specs.to_text+" specs in total."
estimated_duration = benchmarks.map .estimated_runtime . fold Duration.zero (+)
IO.println "The minimal estimated run time based on configurations is "+estimated_duration.to_display_text+"."
benchmarks.each suite->
suite.run_main
## Prints all benchmarks along with their configuration
list_names =
builder = Vector.new_builder
all_benchmarks.each suite->
suite.groups.each group->
group.specs.each spec->
bench_name = group.name + "." + spec.name
with_conf = bench_name + " " + group.configuration.to_text
builder.append with_conf
all_names = builder.to_vector
IO.println <| "Benchmarks: (count = " + all_names.length.to_text + ")"
all_names.each name->
IO.println <| " " + name