mirror of
https://github.com/enso-org/enso.git
synced 2024-12-25 16:22:36 +03:00
1b8b30a68d
- Closes #5303 - Refactors `JoinStrategy` allowing us to 'stack' join strategies on top of each other (to some extent) - currently a `HashJoin` can be followed by another join strategy (currently `SortJoin`) - Adds benchmarks for join - Due to limitations of the sorting approach this will still not be as fast as possible for cases where there is more than 1 `Between` condition in a single query - trying to demonstrate that in benchmarks. - We can replace sorting by d-dimensional [RangeTrees](https://en.wikipedia.org/wiki/Range_tree) to get `O((n + m) log^d n + k)` performance (where `n` and `m` are sizes of joined tables, `d` is the amount of `Between` conditions used in the query and `k` is the result set size). - Follow up ticket for consideration later: #8216 - Closes #8215 - After all, it turned out that `TreeSet` was problematic (because of not enough flexibility with duplicate key handling), so the simplest solution was to immediately implement this sub-task. - Closes #8204 - Unrelated, but I ran into this here: adds type checks to other arguments of `set`. - Before, putting in a Column as `new_name` (i.e. mistakenly messing up the order of arguments), lead to a hard to understand `Method `if_then_else` of type Column could not be found.`, instead now it would file with type error 'expected Text got Column`.
112 lines
3.6 KiB
Plaintext
112 lines
3.6 KiB
Plaintext
from Standard.Base import all hiding Range
|
|
from Standard.Test import Bench
|
|
import project.Vector.Array_Proxy_Bench
|
|
import project.Vector.Distinct
|
|
import project.Vector.Operations
|
|
import project.Vector.Sort
|
|
import project.Statistics.Count_Min_Max
|
|
import project.Table.Add_Row_Number
|
|
import project.Table.Aggregate
|
|
import project.Table.Arithmetic
|
|
import project.Table.Column_From_Vector
|
|
import project.Table.Cross_Tab
|
|
import project.Table.Join
|
|
import project.Table.Sorting
|
|
import project.Table.Internal.Multi_Value_Key
|
|
import project.Text.Build
|
|
import project.Text.Compare
|
|
import project.Text.Contains
|
|
import project.Text.Pretty
|
|
import project.Text.Reverse
|
|
import project.Time.Work_Days
|
|
import project.Time.Format
|
|
import project.Collections
|
|
import project.Column_Numeric
|
|
import project.Equality
|
|
import project.Json_Bench
|
|
import project.Map.Hash_Map
|
|
import project.Natural_Order_Sort
|
|
import project.Number_Parse
|
|
import project.Numeric
|
|
import project.Range
|
|
import project.Sum
|
|
import project.Runtime.Panics_And_Errors
|
|
from Standard.Base.Runtime import Debug
|
|
|
|
all_benchmarks : Vector Bench.All
|
|
all_benchmarks =
|
|
builder = Vector.new_builder
|
|
|
|
# Vector
|
|
builder.append Array_Proxy_Bench.collect_benches
|
|
builder.append Distinct.collect_benches
|
|
builder.append Operations.collect_benches
|
|
builder.append Sort.collect_benches
|
|
|
|
# Map
|
|
builder.append Hash_Map.collect_benches
|
|
|
|
# Statistics
|
|
builder.append Count_Min_Max.collect_benches
|
|
|
|
# Table
|
|
builder.append Aggregate.collect_benches
|
|
builder.append Arithmetic.collect_benches
|
|
builder.append Add_Row_Number.collect_benches
|
|
builder.append Column_From_Vector.collect_benches
|
|
builder.append Cross_Tab.collect_benches
|
|
builder.append Join.collect_benches
|
|
builder.append Sorting.collect_benches
|
|
builder.append Multi_Value_Key.collect_benches
|
|
|
|
# Text
|
|
builder.append Build.collect_benches
|
|
builder.append Compare.collect_benches
|
|
builder.append Contains.collect_benches
|
|
builder.append Pretty.collect_benches
|
|
builder.append Reverse.collect_benches
|
|
|
|
# Time
|
|
builder.append Format.collect_benches
|
|
builder.append Work_Days.collect_benches
|
|
|
|
# Vector
|
|
builder.append Collections.collect_benches
|
|
builder.append Column_Numeric.collect_benches
|
|
builder.append Equality.collect_benches
|
|
builder.append Json_Bench.collect_benches
|
|
builder.append Natural_Order_Sort.collect_benches
|
|
builder.append Number_Parse.collect_benches
|
|
builder.append Numeric.collect_benches
|
|
builder.append Range.collect_benches
|
|
builder.append Sum.collect_benches
|
|
|
|
# Runtime
|
|
builder.append Panics_And_Errors.collect_benches
|
|
|
|
builder.to_vector
|
|
|
|
main =
|
|
benchmarks = all_benchmarks
|
|
total_specs = benchmarks.map .total_specs . fold 0 (+)
|
|
IO.println "Found "+benchmarks.length.to_text+" benchmark suites, containing "+total_specs.to_text+" specs in total."
|
|
estimated_duration = benchmarks.map .estimated_runtime . fold Duration.zero (+)
|
|
IO.println "The minimal estimated run time based on configurations is "+estimated_duration.to_display_text+"."
|
|
benchmarks.each suite->
|
|
suite.run_main
|
|
|
|
## Prints all benchmarks along with their configuration
|
|
list_names =
|
|
builder = Vector.new_builder
|
|
all_benchmarks.each suite->
|
|
suite.groups.each group->
|
|
group.specs.each spec->
|
|
bench_name = group.name + "." + spec.name
|
|
with_conf = bench_name + " " + group.configuration.to_text
|
|
builder.append with_conf
|
|
all_names = builder.to_vector
|
|
|
|
IO.println <| "Benchmarks: (count = " + all_names.length.to_text + ")"
|
|
all_names.each name->
|
|
IO.println <| " " + name
|