Adding most of remaining aggregates to Database Table (#3375)

This commit is contained in:
Radosław Waśko 2022-04-06 12:06:50 +02:00 committed by GitHub
parent 29e3f05f27
commit a71db71645
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 402 additions and 100 deletions

View File

@ -7,6 +7,7 @@ import Standard.Table.Data.Column as Materialized_Column
from Standard.Database.Data.Sql import Sql_Type
from Standard.Database.Data.Table import Integrity_Error
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
type Column
@ -118,7 +119,7 @@ type Column
Column _ _ _ other_expr _ ->
case Helpers.check_integrity this operand of
False ->
Error.throw <| Illegal_State_Error "Cannot compare columns coming from different contexts. Only columns of a single table can be compared."
Error.throw <| Unsupported_Database_Operation_Error "Cannot compare columns coming from different contexts. Only columns of a single table can be compared."
True ->
new_expr = IR.Operation op_kind [this.expression, other_expr]
Column this.name this.connection actual_new_type new_expr this.context

View File

@ -0,0 +1,48 @@
from Standard.Base import all
import Standard.Database.Data.Sql
## PRIVATE
Arguments:
- make_raw_concat_expr: a function which takes two expressions - a (possibly
processed) column expression and a separator and returns a SQL expression
which concatenates the expressions with separators.
- make_contains_expr: a function which takes two expressions - a string to
search in and a substring to search for and returns an expression which is
true if the string contains the substring.
- has_quote: specifies whether it should expect an additional SQL argument
containing a quote character.
- args: the processed SQL arguments from the generator.
It takes 4 or 5 SQL arguments depending on `has_quote`.
The SQL arguments are following:
- the column expression to concatenate,
- the separator to use when joining the expressions,
- a prefix to prepend,
- a suffix to append,
- a quote character to use to escape separator if it is present inside of a
column expression; it should be provided only if `has_quote` is `True` and
must not be empty then. If the quote character occurs in the expression, it
is escaped by doubling each occurrence.
make_concat make_raw_concat_expr make_contains_expr has_quote args =
expected_args = if has_quote then 5 else 4
if args.length != expected_args then Error.throw (Illegal_State_Error "Unexpected number of arguments for the concat operation.") else
expr = args.at 0
separator = args.at 1
prefix = args.at 2
suffix = args.at 3
append = Sql.code " || "
possibly_quoted = case has_quote of
True ->
quote = args.at 4
includes_separator = separator ++ Sql.code " != '' AND " ++ make_contains_expr expr separator
## We use the assumption that `has_quote` is True iff `quote` is not empty.
includes_quote = make_contains_expr expr quote
needs_quoting = includes_separator.paren ++ Sql.code " OR " ++ includes_quote.paren
escaped = Sql.code "replace(" ++ expr ++ Sql.code ", " ++ quote ++ Sql.code ", " ++ quote ++ append ++ quote ++ Sql.code ")"
quoted = quote ++ append ++ escaped ++ append ++ quote
Sql.code "CASE WHEN " ++ needs_quoting ++ Sql.code " THEN " ++ quoted ++ Sql.code " ELSE " ++ expr ++ Sql.code " END"
False -> expr
transformed_expr = Sql.code "CASE WHEN " ++ expr ++ Sql.code " IS NULL THEN '' ELSE " ++ possibly_quoted.paren ++ Sql.code " END"
concatenated = make_raw_concat_expr transformed_expr separator
prefix.paren ++ append ++ concatenated ++ append ++ suffix.paren

View File

@ -3,6 +3,7 @@ from Standard.Base import all
from Standard.Table.Data.Aggregate_Column import all
from Standard.Database.Data.Sql import Sql_Type
import Standard.Database.Data.Dialect
import Standard.Database.Data.Dialect.Helpers
import Standard.Database.Data.Internal.Base_Generator
## PRIVATE
@ -71,10 +72,13 @@ make_internal_generator_dialect =
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation contains")
text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]
counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, here.agg_count_distinct_including_nulls]
stats = [here.agg_median]
my_mappings = text + counts + stats
text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with], here.agg_shortest, here.agg_longest]+here.concat_ops
counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty]
stddev_pop = ["STDDEV_POP", Base_Generator.make_function "stddev_pop"]
stddev_samp = ["STDDEV_SAMP", Base_Generator.make_function "stddev_samp"]
stats = [here.agg_median, here.agg_mode, here.agg_percentile, stddev_pop, stddev_samp]
my_mappings = text + counts + stats + here.first_last_aggregators
Base_Generator.base_dialect . extend_with my_mappings
## PRIVATE
@ -115,10 +119,61 @@ agg_count_empty = Base_Generator.lift_unary_op "COUNT_EMPTY" arg->
agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg->
Sql.code "COUNT(CASE WHEN (" ++ arg.paren ++ Sql.code " IS NOT NULL) AND (" ++ arg.paren ++ Sql.code " != '') THEN 1 END)"
## PRIVATE
agg_count_distinct_including_nulls = Base_Generator.lift_unary_op "COUNT_DISTINCT_INCLUDE_NULL" arg->
Sql.code "(COUNT(DISTINCT " ++ arg.paren ++ Sql.code ") + CASE WHEN COUNT(CASE WHEN " ++ arg.paren ++ Sql.code " IS NULL THEN 1 END) > 0 THEN 1 ELSE 0 END)"
## PRIVATE
agg_median = Base_Generator.lift_unary_op "MEDIAN" arg->
Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")"
## PRIVATE
agg_mode = Base_Generator.lift_unary_op "MODE" arg->
Sql.code "mode() WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")"
agg_percentile = Base_Generator.lift_binary_op "PERCENTILE" p-> expr->
Sql.code "percentile_cont(" ++ p ++ Sql.code ") WITHIN GROUP (ORDER BY " ++ expr ++ Sql.code ")"
## PRIVATE
These are written in a not most-efficient way, but a way that makes them
compatible with other group-by aggregations out-of-the-box. In the future, we
may want to consider some alternative solutions.
first_last_aggregators =
first = here.make_first_aggregator reverse=False ignore_null=False
first_not_null = here.make_first_aggregator reverse=False ignore_null=True
last = here.make_first_aggregator reverse=True ignore_null=False
last_not_null = here.make_first_aggregator reverse=True ignore_null=True
[["FIRST", first], ["FIRST_NOT_NULL", first_not_null], ["LAST", last], ["LAST_NOT_NULL", last_not_null]]
make_first_aggregator reverse ignore_null args =
if args.length < 2 then Error.throw (Illegal_State_Error "Insufficient number of arguments for the operation.") else
result_expr = args.head
order_exprs = args.tail
filter_clause = if ignore_null.not then Sql.code "" else
Sql.code " FILTER (WHERE " ++ result_expr.paren ++ Sql.code " IS NOT NULL)"
modified_order_exprs =
order_exprs.map expr-> expr ++ Sql.code " ASC NULLS LAST"
order_clause =
Sql.code " ORDER BY " ++ Sql.join "," modified_order_exprs
index_expr = case reverse of
True -> if ignore_null.not then Sql.code "COUNT(*)" else
Sql.code "COUNT(" ++ result_expr ++ Sql.code ")"
False -> Sql.code "1"
Sql.code "(array_agg(" ++ result_expr.paren ++ order_clause ++ Sql.code ")" ++ filter_clause ++ Sql.code ")[" ++ index_expr ++ Sql.code "]"
agg_shortest = Base_Generator.lift_unary_op "SHORTEST" arg->
order_clause =
Sql.code " ORDER BY char_length(" ++ arg ++ Sql.code ") ASC NULLS LAST"
Sql.code "(array_agg(" ++ arg.paren ++ order_clause ++ Sql.code "))[1]"
agg_longest = Base_Generator.lift_unary_op "LONGEST" arg->
order_clause =
Sql.code " ORDER BY char_length(" ++ arg ++ Sql.code ") DESC NULLS LAST"
Sql.code "(array_agg(" ++ arg.paren ++ order_clause ++ Sql.code "))[1]"
## PRIVATE
concat_ops =
make_raw_concat_expr expr separator =
Sql.code "array_to_string(array_agg(" ++ expr ++ Sql.code "), " ++ separator ++ Sql.code ")"
make_contains_expr expr substring =
Sql.code "position(" ++ expr ++ Sql.code ", " ++ substring ++ Sql.code ") > 0"
concat = Helpers.make_concat make_raw_concat_expr make_contains_expr
[["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]]

View File

@ -3,6 +3,7 @@ from Standard.Base import all
from Standard.Table.Data.Aggregate_Column import all
from Standard.Database.Data.Sql import Sql_Type
import Standard.Database.Data.Dialect
import Standard.Database.Data.Dialect.Helpers
import Standard.Database.Data.Internal.Base_Generator
## PRIVATE
@ -70,9 +71,10 @@ make_internal_generator_dialect =
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation contains")
text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]
counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, here.agg_count_distinct_including_nulls]
my_mappings = text + counts
text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]+here.concat_ops
counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty]
stats = [here.agg_stddev_pop, here.agg_stddev_samp]
my_mappings = text + counts + stats
Base_Generator.base_dialect . extend_with my_mappings
## PRIVATE
@ -114,5 +116,49 @@ agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg->
Sql.code "COALESCE(SUM((" ++ arg.paren ++ Sql.code " IS NOT NULL) AND (" ++ arg.paren ++ Sql.code " != '')), 0)"
## PRIVATE
agg_count_distinct_including_nulls = Base_Generator.lift_unary_op "COUNT_DISTINCT_INCLUDE_NULL" arg->
Sql.code "(COUNT(DISTINCT " ++ arg.paren ++ Sql.code ") + CASE WHEN SUM(" ++ arg.paren ++ Sql.code " IS NULL) > 0 THEN 1 ELSE 0 END)"
agg_stddev_pop = Base_Generator.lift_unary_op "STDDEV_POP" arg->
sum_of_squares = Sql.code "SUM(" ++ arg.paren ++ Sql.code "*" ++ arg.paren ++ Sql.code ")"
square_of_sums = Sql.code "SUM(" ++ arg ++ Sql.code ") * SUM(" ++ arg ++ Sql.code ")"
n = Sql.code "COUNT(" ++ arg ++ Sql.code ")"
var = Sql.code "(" ++ sum_of_squares ++ Sql.code " - (" ++ square_of_sums ++ Sql.code " / " ++ n ++ Sql.code ")) / " ++ n
Sql.code "SQRT(" ++ var ++ Sql.code ")"
## PRIVATE
agg_stddev_samp = Base_Generator.lift_unary_op "STDDEV_SAMP" arg->
sum_of_squares = Sql.code "SUM(" ++ arg.paren ++ Sql.code "*" ++ arg.paren ++ Sql.code ")"
square_of_sums = Sql.code "SUM(" ++ arg ++ Sql.code ") * SUM(" ++ arg ++ Sql.code ")"
n = Sql.code "COUNT(" ++ arg ++ Sql.code ")"
var = Sql.code "(" ++ sum_of_squares ++ Sql.code " - (" ++ square_of_sums ++ Sql.code " / " ++ n ++ Sql.code ")) / (" ++ n ++ Sql.code " - 1)"
Sql.code "SQRT(" ++ var ++ Sql.code ")"
## PRIVATE
This is a prototype that doesn't work correctly. Left for reference for
future implementation.
first_last_aggregators =
first_value = "first_value"
first = here.window_aggregate first_value ignore_null=False
first_not_null = here.window_aggregate first_value ignore_null=True
last_value = "last_value"
last = here.window_aggregate last_value ignore_null=False
last_not_null = here.window_aggregate last_value ignore_null=True
[["FIRST", first], ["FIRST_NOT_NULL", first_not_null], ["LAST", last], ["LAST_NOT_NULL", last_not_null]]
## PRIVATE
window_aggregate window_type ignore_null args =
if args.length < 2 then Error.throw (Illegal_State_Error "Insufficient number of arguments for the operation.") else
result_expr = args.head
order_exprs = args.tail
filter_clause = if ignore_null.not then Sql.code "" else
Sql.code " FILTER (WHERE " ++ result_expr.paren ++ Sql.code " IS NOT NULL)"
Sql.code window_type+"(" ++ result_expr ++ Sql.code ")" ++ filter_clause ++ Sql.code " OVER (ORDER BY " ++ Sql.join "," order_exprs ++ Sql.code ")"
## PRIVATE
concat_ops =
make_raw_concat_expr expr separator =
Sql.code "group_concat(" ++ expr ++ Sql.code ", " ++ separator ++ Sql.code ")"
make_contains_expr expr substring =
Sql.code "instr(" ++ expr ++ Sql.code ", " ++ substring ++ Sql.code ") > 0"
concat = Helpers.make_concat make_raw_concat_expr make_contains_expr
[["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]]

View File

@ -3,6 +3,7 @@ from Standard.Base import all
from Standard.Table.Data.Aggregate_Column import all
import Standard.Database.Data.Internal.IR
from Standard.Database.Data.Sql import Sql_Type
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
make_aggregate_column : Table -> Aggregate_Column -> Text -> IR.Internal_Column
make_aggregate_column table aggregate new_name =
@ -11,36 +12,47 @@ make_aggregate_column table aggregate new_name =
IR.Internal_Column new_name sql_type expression
make_expression : Aggregate_Column -> IR.Expression
make_expression aggregate = case aggregate of
Group_By c _ -> c.expression
Count _ -> IR.Operation "COUNT_ROWS" []
Count_Distinct columns _ ignore_nothing -> case columns.length > 1 of
True ->
## TODO
Error.throw (Illegal_State_Error "Multi column distinct is not implemented yet.")
False ->
column = columns.first
make_expression aggregate =
is_non_empty_vector v = if v.is_nothing then False else v.not_empty
case aggregate of
Group_By c _ -> c.expression
Count _ -> IR.Operation "COUNT_ROWS" []
Count_Distinct columns _ ignore_nothing -> if columns.is_empty then Error.throw (Illegal_Argument_Error "Count_Distinct must have at least one column.") else
case ignore_nothing of
True -> IR.Operation "COUNT_DISTINCT" [column.expression]
False -> IR.Operation "COUNT_DISTINCT_INCLUDE_NULL" [column.expression]
Count_Not_Nothing c _ -> IR.Operation "COUNT" [c.expression]
Count_Nothing c _ -> IR.Operation "COUNT_IS_NULL" [c.expression]
Count_Not_Empty c _ -> IR.Operation "COUNT_NOT_EMPTY" [c.expression]
Count_Empty c _ -> IR.Operation "COUNT_EMPTY" [c.expression]
Percentile p c _ -> IR.Operation "PERCENTILE" [IR.Constant Sql_Type.double p, c.expression]
Mode c _ -> IR.Operation "MODE" [c.expression]
First _ _ _ _ -> Error.throw (Illegal_State_Error "Not implemented yet.")
Last _ _ _ _ -> Error.throw (Illegal_State_Error "Not implemented yet.")
Maximum c _ -> IR.Operation "MAX" [c.expression]
Minimum c _ -> IR.Operation "MIN" [c.expression]
Shortest c _ -> IR.Operation "MIN" [c.expression]
Longest c _ -> IR.Operation "MAX" [c.expression]
Standard_Deviation c _ population ->
## TODO!
_ = population
IR.Operation "STD_DEV" [c.expression]
Concatenate c _ _ _ _ _ ->
IR.Operation "CONCAT" [c.expression]
Sum c _ -> IR.Operation "SUM" [c.expression]
Average c _ -> IR.Operation "AVG" [c.expression]
Median c _ -> IR.Operation "MEDIAN" [c.expression]
True -> IR.Operation "COUNT_DISTINCT" (columns.map .expression)
False -> IR.Operation "COUNT_DISTINCT_INCLUDE_NULL" (columns.map .expression)
Count_Not_Nothing c _ -> IR.Operation "COUNT" [c.expression]
Count_Nothing c _ -> IR.Operation "COUNT_IS_NULL" [c.expression]
Count_Not_Empty c _ -> IR.Operation "COUNT_NOT_EMPTY" [c.expression]
Count_Empty c _ -> IR.Operation "COUNT_EMPTY" [c.expression]
Percentile p c _ -> IR.Operation "PERCENTILE" [IR.Constant Sql_Type.double p, c.expression]
Mode c _ -> IR.Operation "MODE" [c.expression]
First c _ ignore_nothing order_by -> case is_non_empty_vector order_by of
False -> Error.throw (Unsupported_Database_Operation_Error "`First` aggregation requires at least one `order_by` column.")
True ->
order_exprs = order_by.map .expression
case ignore_nothing of
False -> IR.Operation "FIRST" [c.expression]+order_exprs
True -> IR.Operation "FIRST_NOT_NULL" [c.expression]+order_exprs
Last c _ ignore_nothing order_by -> case is_non_empty_vector order_by of
False -> Error.throw (Unsupported_Database_Operation_Error "`Last` aggregation requires at least one `order_by` column.")
True ->
order_exprs = order_by.map .expression
case ignore_nothing of
False -> IR.Operation "LAST" [c.expression]+order_exprs
True -> IR.Operation "LAST_NOT_NULL" [c.expression]+order_exprs
Maximum c _ -> IR.Operation "MAX" [c.expression]
Minimum c _ -> IR.Operation "MIN" [c.expression]
Shortest c _ -> IR.Operation "SHORTEST" [c.expression]
Longest c _ -> IR.Operation "LONGEST" [c.expression]
Standard_Deviation c _ population -> case population of
True -> IR.Operation "STDDEV_POP" [c.expression]
False -> IR.Operation "STDDEV_SAMP" [c.expression]
Concatenate c _ separator prefix suffix quote_char ->
base_args = [c.expression, IR.Constant Sql_Type.text separator, IR.Constant Sql_Type.text prefix, IR.Constant Sql_Type.text suffix]
case quote_char.is_empty of
True -> IR.Operation "CONCAT" base_args
False -> IR.Operation "CONCAT_QUOTE_IF_NEEDED" base_args+[IR.Constant Sql_Type.text quote_char]
Sum c _ -> IR.Operation "SUM" [c.expression]
Average c _ -> IR.Operation "AVG" [c.expression]
Median c _ -> IR.Operation "MEDIAN" [c.expression]

View File

@ -4,6 +4,7 @@ import Standard.Database.Data.Sql
import Standard.Database.Data.Internal.IR
from Standard.Database.Data.Sql import Sql_Type
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
type Internal_Dialect
@ -50,7 +51,7 @@ make_binary_op name =
op = Sql.code " "+name+" "
(arguments.at 0)++op++(arguments.at 1) . paren
False ->
Error.throw ("Invalid amount of arguments for operation " + name)
Error.throw <| Illegal_State_Error ("Invalid amount of arguments for operation " + name)
## PRIVATE
@ -65,7 +66,7 @@ make_unary_op name =
True ->
(Sql.code name+" ")++(arguments.at 0) . paren
False ->
Error.throw ("Invalid amount of arguments for operation " + name)
Error.throw <| Illegal_State_Error ("Invalid amount of arguments for operation " + name)
## PRIVATE
@ -79,10 +80,26 @@ make_unary_op name =
lift_unary_op : Text -> (Sql.Builder -> Sql.Builder) -> [Text, (Vector Sql.Builder -> Sql.Builder)]
lift_unary_op name function =
generator = arguments -> case arguments.length == 1 of
False -> Error.throw ("Invalid amount of arguments for operation " + name + ".")
False -> Error.throw <| Illegal_State_Error ("Invalid amount of arguments for operation " + name + ".")
True -> function (arguments.at 0)
[name, generator]
## PRIVATE
A helper function to create a binary operation from a function.
Arguments:
- name: Name of the operation, used for error reporting.
- function: A function taking exactly two arguments: the generated SQL code
for the argument of the operation, and returning the generated SQL code for
the whole operation.
lift_binary_op : Text -> (Sql.Builder -> Sql.Builder -> Sql.Builder) -> [Text, (Vector Sql.Builder -> Sql.Builder)]
lift_binary_op name function =
generator = arguments -> case arguments.length == 2 of
False -> Error.throw <| Illegal_State_Error ("Invalid amount of arguments for operation " + name + ".")
True -> function (arguments.at 0) (arguments.at 1)
[name, generator]
## PRIVATE
A helper function to create a unary operator which is added to the right of
@ -151,14 +168,21 @@ base_dialect =
logic = [bin "AND", bin "OR", unary "NOT"]
compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">="]
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
counts = [fun "COUNT", ["COUNT_ROWS", here.make_constant "COUNT(*)"], here.count_distinct]
counts = [fun "COUNT", ["COUNT_ROWS", here.make_constant "COUNT(*)"], ["COUNT_DISTINCT", here.count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.count_distinct_include_null]]
nulls = [["ISNULL", here.make_right_unary_op "IS NULL"], ["FILLNULL", here.make_function "COALESCE"]]
base_map = Map.from_vector (arith + logic + compare + agg + nulls + counts)
Internal_Dialect base_map here.wrap_in_quotes
## PRIVATE
count_distinct = here.lift_unary_op "COUNT_DISTINCT" arg->
Sql.code "COUNT(DISTINCT " ++ arg.paren ++ Sql.code ")"
count_distinct args =
Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code "))"
## PRIVATE
count_distinct_include_null args =
count = here.count_distinct args
are_nulls = args.map arg-> arg.paren ++ Sql.code " IS NULL"
all_nulls_case = Sql.code "CASE WHEN COUNT(CASE WHEN " ++ Sql.join " AND " are_nulls ++ Sql.code " THEN 1 END) > 0 THEN 1 ELSE 0 END"
count ++ Sql.code " + " ++ all_nulls_case
## PRIVATE
@ -174,7 +198,7 @@ generate_expression dialect expr = case expr of
dialect.wrap_identifier origin ++ dot ++ dialect.wrap_identifier name
IR.Constant sql_type value -> Sql.interpolation sql_type value
IR.Operation kind arguments ->
op = dialect.operation_map.get_or_else kind (Error.throw <| Illegal_State_Error "Operation "+kind+" is not supported.")
op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error kind)
parsed_args = arguments.map (here.generate_expression dialect)
op parsed_args
@ -290,4 +314,4 @@ generate_query dialect query = case query of
(Sql.code "SELECT * ") ++ here.generate_select_context dialect ctx
IR.Insert table_name pairs ->
here.generate_insert_query dialect table_name pairs
_ -> Error.throw <| Illegal_State_Error "Unsupported query type."
_ -> Error.throw <| Unsupported_Database_Operation_Error "Unsupported query type."

View File

@ -18,6 +18,7 @@ from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index
from Standard.Table.Data.Sort_Method as Sort_Method_Module import Sort_Method
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
import Standard.Table.Data.Column_Mapping
import Standard.Table.Data.Position
@ -510,7 +511,7 @@ type Table
chosen_nulls = if my_nulls.is_nothing then missing_last else my_nulls
[this.resolve elem . expression, order_to_ir chosen_order, missing_to_ir chosen_nulls]
Order_Rule _ _ _ _ ->
Error.throw <| Illegal_State_Error "Custom comparators are not supported in Database"
Error.throw <| Unsupported_Database_Operation_Error "Custom comparators are not supported in Database"
elems = Helpers.unify_vector_singleton by . map to_ir
new_ctx = this.context.set_orders elems
this.updated_context new_ctx
@ -651,11 +652,15 @@ type Table
resolved_aggregates = validated.valid_columns
key_expressions = key_columns.map .expression
new_ctx = this.context.set_groups key_expressions
new_columns = resolved_aggregates.map p->
results = resolved_aggregates.map p->
agg = p.second
new_name = p.first
Aggregate_Helper.make_aggregate_column this agg new_name
this.updated_context_and_columns new_ctx new_columns
Aggregate_Helper.make_aggregate_column this agg new_name . catch
partitioned = results.partition (_.is_an Internal_Column)
new_columns = partitioned.first
problems = partitioned.second
on_problems.attach_problems_before problems <|
this.updated_context_and_columns new_ctx new_columns
## UNSTABLE
@ -744,7 +749,7 @@ type Table
to_sql =
cols = this.internal_columns.map (c -> [c.name, c.expression])
case cols.is_empty of
True -> Error.throw <| Illegal_State_Error "Cannot generate SQL for a table with no columns."
True -> Error.throw <| Unsupported_Database_Operation_Error "Cannot generate SQL for a table with no columns."
False ->
query = IR.Select cols this.context
this.connection.dialect.generate_sql query

View File

@ -0,0 +1,9 @@
from Standard.Base import all
## Indicates that a requested operation is not supported, for example because a
particular database backend does not support it.
type Unsupported_Database_Operation_Error message
Unsupported_Database_Operation_Error.to_display_text : Text
Unsupported_Database_Operation_Error.to_display_text =
"Unsupported database operation: " + this.message

View File

@ -126,7 +126,7 @@ type Aggregate_Column
- prefix: added at the start of the result.
- suffix: added at the end of the result.
- quote_char: character used to quote the values if the value is `Empty`
or contains the separtor.
or contains the separator.
type Concatenate (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (separator:Text="") (prefix:Text="") (suffix:Text="") (quote_char:Text="")
## Creates a new column with the first value in each group. If no rows,

View File

@ -5,7 +5,7 @@ gjb,6,False,74.19642,-11.06,kmqxqkl6qx,431A3D20
nfw,1,True,88.65713,-68.71,"",01896EAB
ivt,7,False,-17.12076,24.83,"",22BAF9F6
zfd,2,True,48.50013,-13.3,"",BFFCC5FC
zoh,1,False,-62.63629,22.61,byo6kn5l3s,CB056D3A
zoh,1,False,-62.63629,22.61,byo6kn5l3sz,CB056D3A
gtt,7,False,-29.82496,87.69,tlujzya1c1,B433E5B9
wmh,9,False,-73.1439,53.1,r6k0uepdx6,9DCB7D82
doc,3,True,28.66156,38.82,utbeyhgq4o,C1C05DD3

1 Code Index Flag Value ValueWithNothing TextWithNothing Hexadecimal
5 nfw 1 True 88.65713 -68.71 01896EAB
6 ivt 7 False -17.12076 24.83 22BAF9F6
7 zfd 2 True 48.50013 -13.3 BFFCC5FC
8 zoh 1 False -62.63629 22.61 byo6kn5l3s byo6kn5l3sz CB056D3A
9 gtt 7 False -29.82496 87.69 tlujzya1c1 B433E5B9
10 wmh 9 False -73.1439 53.1 r6k0uepdx6 9DCB7D82
11 doc 3 True 28.66156 38.82 utbeyhgq4o C1C05DD3

View File

@ -9,16 +9,16 @@ import Standard.Test
import Standard.Test.Problems
import Standard.Base.Error.Problem_Behavior
type Test_Selection problem_handling=True advanced_stats=True text=True first_last=True std_dev=True multi_distinct=True aggregation_problems=True
type Test_Selection problem_handling=True advanced_stats=True text_concat=True text_shortest_longest=True first_last=True first_last_row_order=True std_dev=True multi_distinct=True aggregation_problems=True
all_tests = Test_Selection True True True True True
all_tests = Test_Selection True True True True True True True True True
spec =
file_contents = (Enso_Project.data / "data.csv") . read
table = Table.from_csv file_contents
empty_table = Table.new <| table.columns.map c->[c.name, []]
materialize = x->x
here.aggregate_spec "[In-Memory] " table empty_table materialize
here.aggregate_spec "[In-Memory] " table empty_table materialize test_selection=(Test_Selection first_last=False)
## Runs the common aggregate tests.
@ -135,6 +135,16 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test
materialized.columns.at 5 . at 0 . should_equal -17.960000 epsilon=0.000001
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <|
grouped = table.aggregate [First "Index" (order_by = By_Name ["Hexadecimal", "TextWithNothing"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])]
materialized = materialize grouped
grouped.row_count . should_equal 1
materialized.columns.length . should_equal 2
materialized.columns.at 0 . name . should_equal "First Index"
materialized.columns.at 0 . at 0 . should_equal 9
materialized.columns.at 1 . name . should_equal "Last ValueWithNothing"
materialized.columns.at 1 . at 0 . should_equal -89.78 epsilon=0.000001
Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <|
grouped = table.aggregate [First "Index", Last "Value"]
materialized = materialize grouped
grouped.row_count . should_equal 1
@ -158,17 +168,23 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test
materialized.columns.at 3 . name . should_equal "Maximum ValueWithNothing"
materialized.columns.at 3 . at 0 . should_equal 99.95 epsilon=0.000001
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported.") <|
grouped = table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <|
grouped = table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing"]
materialized = materialize grouped
grouped.row_count . should_equal 1
materialized.columns.length . should_equal 3
materialized.columns.length . should_equal 2
materialized.columns.at 0 . name . should_equal "Shortest TextWithNothing"
materialized.columns.at 0 . at 0 . should_equal "f5"
materialized.columns.at 1 . name . should_equal "Longest TextWithNothing"
materialized.columns.at 1 . at 0 . should_equal "setp295gjvbanana"
materialized.columns.at 2 . name . should_equal "Concatenate Code"
materialized.columns.at 2 . at 0 . length . should_equal 7500
Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <|
grouped = table.aggregate [Concatenate "Code"]
materialized = materialize grouped
grouped.row_count . should_equal 1
materialized.columns.length . should_equal 1
materialized.columns.at 0 . name . should_equal "Concatenate Code"
materialized.columns.at 0 . at 0 . length . should_equal 7500
Test.group prefix+"Table.aggregate should summarize empty table" pending=pending <|
Test.specify "should be able to count" <|
@ -234,6 +250,16 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test
materialized.columns.at 2 . at 0 . should_equal Nothing
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <|
grouped = empty_table.aggregate [First "Index" (order_by = By_Name ["Hexadecimal", "TextWithNothing"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])]
materialized = materialize grouped
grouped.row_count . should_equal 1
materialized.columns.length . should_equal 2
materialized.columns.at 0 . name . should_equal "First Index"
materialized.columns.at 0 . at 0 . should_equal Nothing
materialized.columns.at 1 . name . should_equal "Last ValueWithNothing"
materialized.columns.at 1 . at 0 . should_equal Nothing
Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <|
grouped = empty_table.aggregate [First "Index", Last "Value"]
materialized = materialize grouped
grouped.row_count . should_equal 1
@ -253,17 +279,23 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test
materialized.columns.at 1 . name . should_equal "Maximum ValueWithNothing"
materialized.columns.at 1 . at 0 . should_equal Nothing
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported.") <|
grouped = empty_table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <|
grouped = empty_table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing"]
materialized = materialize grouped
grouped.row_count . should_equal 1
materialized.columns.length . should_equal 3
materialized.columns.length . should_equal 2
materialized.columns.at 0 . name . should_equal "Shortest TextWithNothing"
materialized.columns.at 0 . at 0 . should_equal Nothing
materialized.columns.at 1 . name . should_equal "Longest TextWithNothing"
materialized.columns.at 1 . at 0 . should_equal Nothing
materialized.columns.at 2 . name . should_equal "Concatenate Code"
materialized.columns.at 2 . at 0 . should_equal Nothing
Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <|
grouped = empty_table.aggregate [Concatenate "Code"]
materialized = materialize grouped
grouped.row_count . should_equal 1
materialized.columns.length . should_equal 1
materialized.columns.at 0 . name . should_equal "Concatenate Code"
materialized.columns.at 0 . at 0 . should_equal Nothing
Test.group prefix+"Table.aggregate should not summarize empty table when grouped" pending=pending <|
Test.specify "should be able to count" <|
@ -322,6 +354,15 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test
materialized.columns.at 3 . name . should_equal "25%-ile Value"
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <|
grouped = empty_table.aggregate [Group_By 0, First "Index" (order_by = By_Name ["Hexadecimal", "TextWithNothing"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])]
materialized = materialize grouped
grouped.row_count . should_equal 0
materialized.columns.length . should_equal 3
materialized.columns.at 0 . name . should_equal "Code"
materialized.columns.at 1 . name . should_equal "First Index"
materialized.columns.at 2 . name . should_equal "Last ValueWithNothing"
Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <|
grouped = empty_table.aggregate [Group_By 0, First "Index", Last "Value"]
materialized = materialize grouped
grouped.row_count . should_equal 0
@ -339,15 +380,22 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test
materialized.columns.at 1 . name . should_equal "Minimum Value"
materialized.columns.at 2 . name . should_equal "Maximum ValueWithNothing"
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported.") <|
grouped = empty_table.aggregate [Group_By 0, Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <|
grouped = empty_table.aggregate [Group_By 0, Shortest "TextWithNothing", Longest "TextWithNothing"]
materialized = materialize grouped
grouped.row_count . should_equal 0
materialized.columns.length . should_equal 4
materialized.columns.length . should_equal 3
materialized.columns.at 0 . name . should_equal "Code"
materialized.columns.at 1 . name . should_equal "Shortest TextWithNothing"
materialized.columns.at 2 . name . should_equal "Longest TextWithNothing"
materialized.columns.at 3 . name . should_equal "Concatenate Code"
Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <|
grouped = empty_table.aggregate [Group_By 0, Concatenate "Code"]
materialized = materialize grouped
grouped.row_count . should_equal 0
materialized.columns.length . should_equal 2
materialized.columns.at 0 . name . should_equal "Code"
materialized.columns.at 1 . name . should_equal "Concatenate Code"
Test.group prefix+"Table.aggregate should be able to group on single field" pending=pending <|
Test.specify "should be able to count" <|
@ -461,6 +509,19 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test
materialized.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <|
grouped = table.aggregate [Group_By "Index", First "TextWithNothing" (order_by = By_Name ["Hexadecimal", "Flag"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])]
materialized = materialize grouped
grouped.row_count . should_equal 10
materialized.columns.length . should_equal 3
materialized.columns.at 0 . name . should_equal "Index"
idx = find_row [6] materialized
idx.is_nothing . should_be_false
materialized.columns.at 1 . name . should_equal "First TextWithNothing"
materialized.columns.at 1 . at idx . should_equal "v78nbv8fr1"
materialized.columns.at 2 . name . should_equal "Last ValueWithNothing"
materialized.columns.at 2 . at idx . should_equal 19.77 epsilon=0.000001
Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <|
grouped = table.aggregate [Group_By "Index", First "TextWithNothing", Last "Value"]
materialized = materialize grouped
grouped.row_count . should_equal 10
@ -490,20 +551,29 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test
materialized.columns.at 4 . name . should_equal "Maximum ValueWithNothing"
materialized.columns.at 4 . at idx . should_equal 99.79 epsilon=0.000001
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported.") <|
grouped = table.aggregate [Group_By "Index", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <|
grouped = table.aggregate [Group_By "Index", Shortest "TextWithNothing", Longest "TextWithNothing"]
materialized = materialize grouped
grouped.row_count . should_equal 10
materialized.columns.length . should_equal 4
materialized.columns.length . should_equal 3
materialized.columns.at 0 . name . should_equal "Index"
idx = find_row [1] materialized
idx.is_nothing . should_be_false
materialized.columns.at 1 . name . should_equal "Shortest TextWithNothing"
materialized.columns.at 1 . at idx . should_equal "f5"
materialized.columns.at 2 . name . should_equal "Longest TextWithNothing"
materialized.columns.at 2 . at idx . should_equal "byo6kn5l3sz"
Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <|
grouped = table.aggregate [Group_By "Index", Concatenate "Code"]
materialized = materialize grouped
grouped.row_count . should_equal 10
materialized.columns.length . should_equal 2
materialized.columns.at 0 . name . should_equal "Index"
idx = find_row [6] materialized
idx.is_nothing . should_be_false
materialized.columns.at 1 . name . should_equal "Shortest TextWithNothing"
materialized.columns.at 1 . at idx . should_equal "kmqxqkl6qx"
materialized.columns.at 2 . name . should_equal "Longest TextWithNothing"
materialized.columns.at 2 . at idx . should_equal "kmqxqkl6qx"
materialized.columns.at 3 . name . should_equal "Concatenate Code"
materialized.columns.at 3 . at idx . length . should_equal 783
materialized.columns.at 1 . name . should_equal "Concatenate Code"
materialized.columns.at 1 . at idx . length . should_equal 783
Test.group prefix+"Table.aggregate should be able to group on multiple fields not in left columns" pending=pending <|
Test.specify "should be able to count" <|
@ -624,6 +694,20 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test
materialized.columns.at 7 . at idx . should_equal -17.174000 epsilon=0.000001
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <|
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing" (order_by = By_Name ["Hexadecimal", "Flag"]), Last "ValueWithNothing" (order_by = By_Name ["Value"]), Group_By "Index"]
materialized = materialize grouped
grouped.row_count . should_equal 20
materialized.columns.length . should_equal 4
materialized.columns.at 0 . name . should_equal "Flag"
materialized.columns.at 3 . name . should_equal "Index"
idx = find_row ["False", 6] materialized [0, 3]
idx.is_nothing . should_be_false
materialized.columns.at 1 . name . should_equal "First TextWithNothing"
materialized.columns.at 1 . at idx . should_equal "v78nbv8fr1"
materialized.columns.at 2 . name . should_equal "Last ValueWithNothing"
materialized.columns.at 2 . at idx . should_equal 42.17 epsilon=0.000001
Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <|
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing", Last "Value", Group_By "Index"]
materialized = materialize grouped
grouped.row_count . should_equal 20
@ -655,21 +739,31 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test
materialized.columns.at 5 . name . should_equal "Maximum ValueWithNothing"
materialized.columns.at 5 . at idx . should_equal 97.17 epsilon=0.000001
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported.") <|
grouped = table.aggregate [Group_By "Index", Group_By "Flag", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <|
grouped = table.aggregate [Group_By "Index", Group_By "Flag", Shortest "TextWithNothing", Longest "TextWithNothing"]
materialized = materialize grouped
grouped.row_count . should_equal 20
materialized.columns.length . should_equal 5
materialized.columns.length . should_equal 4
materialized.columns.at 0 . name . should_equal "Index"
materialized.columns.at 1 . name . should_equal "Flag"
idx = find_row [1, "False"] materialized
idx.is_nothing . should_be_false
materialized.columns.at 2 . name . should_equal "Shortest TextWithNothing"
materialized.columns.at 2 . at idx . should_equal "f5"
materialized.columns.at 3 . name . should_equal "Longest TextWithNothing"
materialized.columns.at 3 . at idx . should_equal "byo6kn5l3sz"
Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <|
grouped = table.aggregate [Group_By "Index", Group_By "Flag", Concatenate "Code"]
materialized = materialize grouped
grouped.row_count . should_equal 20
materialized.columns.length . should_equal 3
materialized.columns.at 0 . name . should_equal "Index"
materialized.columns.at 1 . name . should_equal "Flag"
idx = find_row [6, "False"] materialized
idx.is_nothing . should_be_false
materialized.columns.at 2 . name . should_equal "Shortest TextWithNothing"
materialized.columns.at 2 . at idx . should_equal "kmqxqkl6qx"
materialized.columns.at 3 . name . should_equal "Longest TextWithNothing"
materialized.columns.at 3 . at idx . should_equal "kmqxqkl6qx"
materialized.columns.at 4 . name . should_equal "Concatenate Code"
materialized.columns.at 4 . at idx . length . should_equal 381
materialized.columns.at 2 . name . should_equal "Concatenate Code"
materialized.columns.at 2 . at idx . length . should_equal 381
problem_pending = case pending.is_nothing of
False -> pending

View File

@ -9,6 +9,7 @@ from Standard.Table.Data.Aggregate_Column import all
from Standard.Database import all
from Standard.Database.Data.Sql import Sql_Type
from Standard.Table import No_Such_Column_Error, Order_Rule
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
spec =
int = Sql_Type.integer
@ -58,7 +59,7 @@ spec =
json = Json.from_pairs [["query", Nothing], ["message", "The table has no columns so a query cannot be generated."]]
empty.to_json . should_equal json
empty.columns.length . should_equal 0
empty.to_sql . should_fail_with Illegal_State_Error
empty.to_sql . should_fail_with Unsupported_Database_Operation_Error
Test.group "[Codegen] Building Expressions" <|
Test.specify "should allow building expressions from columns and constants" <|

View File

@ -52,7 +52,7 @@ run_tests connection pending=Nothing =
here.postgres_specific_spec connection pending=pending
Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=True pending=pending
selection = Aggregate_Spec.Test_Selection advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False aggregation_problems=False
selection = Aggregate_Spec.Test_Selection text_shortest_longest=True first_last_row_order=False aggregation_problems=False
agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv
agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table
tables.append agg_table.name

View File

@ -52,7 +52,7 @@ run_tests connection pending=Nothing =
here.redshift_specific_spec connection pending=pending
Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=True pending=pending
selection = Aggregate_Spec.Test_Selection advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False aggregation_problems=False
selection = Aggregate_Spec.Test_Selection text_concat=False text_shortest_longest=False first_last=False first_last_row_order=False multi_distinct=False aggregation_problems=False
agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv
agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table
tables.append agg_table.name

View File

@ -62,7 +62,14 @@ spec =
here.sqlite_specific_spec connection
Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=False
selection = Aggregate_Spec.Test_Selection advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False aggregation_problems=False
## For now `advanced_stats` remain disabled, because SQLite does not provide
aggregate functions for median, mode and percentile and emulating them is
highly problematic. We can rethink in the future how these could be
emulated. Two of the possible solutions are:
- creating complex nested queries using NTILE to compute the stats,
- compiling SQLite library on our own and adding native extensions for
the missing statistics.
selection = Aggregate_Spec.Test_Selection advanced_stats=False text_shortest_longest=False first_last=False first_last_row_order=False multi_distinct=False aggregation_problems=False
agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv
agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table
empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0)