Add Join Large Small benchmarks (#8845)

Adds new benchmark joining a large table to a small table in preparation for a coming optimisation that will only index the smaller of the 2 tables in #8342
This commit is contained in:
AdRiley 2024-01-24 19:13:09 +00:00 committed by GitHub
parent 5eb3f3bd1d
commit 1c6898b19d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -150,11 +150,21 @@ create_scenario_antijoin num_rows =
table2 = Table.new [["key", ys]]
Scenario.Value table1 table2
## This is a scenario where we join a very large table with a much smaller table
to check an optimisation where we only index the smaller of the 2 tables
create_scenario_large_small_table =
xs = (0.up_to 1000000).map _-> Random.integer 0 99
ys = (0.up_to 100).to_vector
table1 = Table.new [["key", xs]]
table2 = Table.new [["key", ys]]
Scenario.Value table1 table2
type Data
Value ~equals ~equals_medium_groups ~equals_ignore_case ~between ~mixed ~equals2d ~between2d ~between2d_belts ~antijoin
Value ~equals ~equals_medium_groups ~equals_ignore_case ~between ~mixed ~equals2d ~between2d ~between2d_belts ~antijoin ~large_small_table
create num_rows =
Data.Value (create_scenario_equals num_rows) (create_scenario_equals_medium_groups num_rows) (create_scenario_equals_ignore_case num_rows) (create_scenario_between num_rows) (create_scenario_mixed num_rows) (create_scenario_equals_2d num_rows) (create_scenario_between_2d num_rows) (create_scenario_between_2d_belts num_rows) (create_scenario_antijoin num_rows)
Data.Value (create_scenario_equals num_rows) (create_scenario_equals_medium_groups num_rows) (create_scenario_equals_ignore_case num_rows) (create_scenario_between num_rows) (create_scenario_mixed num_rows) (create_scenario_equals_2d num_rows) (create_scenario_between_2d num_rows) (create_scenario_between_2d_belts num_rows) (create_scenario_antijoin num_rows) (create_scenario_large_small_table)
collect_benches = Bench.build builder->
num_rows = 50000
@ -218,4 +228,14 @@ collect_benches = Bench.build builder->
r = scenario.table2.join scenario.table1 on="key" join_kind=Join_Kind.Left_Exclusive
assert (r.row_count == 1000)
if extended_tests then group_builder.specify "Join_Large_Table_to_Small_Table" <|
scenario = data.large_small_table
r = scenario.table1.join scenario.table2 on="key"
assert (r.row_count == scenario.table1.row_count)
if extended_tests then group_builder.specify "Join_Small_Table_to_Large_Table" <|
scenario = data.large_small_table
r = scenario.table2.join scenario.table1 on="key"
assert (r.row_count == scenario.table1.row_count)
main = collect_benches . run_main