Review Table.order_by (#4104)

This commit is contained in:
Radosław Waśko 2023-01-31 19:29:02 +01:00 committed by GitHub
parent d3b350f460
commit c965ad3455
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 214 additions and 193 deletions

View File

@ -2,7 +2,7 @@ from Standard.Base import all
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
import Standard.Base.Error.Illegal_State.Illegal_State
from Standard.Table import Sort_Column, Sort_Column_Selector
from Standard.Table import Sort_Column
import Standard.Table.Data.Value_Type.Value_Type
import Standard.Table.Data.Column.Column as Materialized_Column
@ -637,7 +637,7 @@ type Column
column.sort Sort_Direction.Descending
sort : Sort_Direction -> Column
sort self order=Sort_Direction.Ascending =
self.to_table.order_by (Sort_Column_Selector.By_Column [Sort_Column.Column self order]) . at self.name
self.to_table.order_by (Sort_Column.Index 0 order) . at 0
## UNSTABLE
Creates a new Column with the specified range of rows from the input

View File

@ -11,7 +11,7 @@ import Standard.Base.Error.Unimplemented.Unimplemented
from Standard.Base.Metadata.Widget import Single_Choice
import Standard.Base.Metadata.Display
from Standard.Table import Auto_Detect, Aggregate_Column, Data_Formatter, Column_Name_Mapping, Column_Selector, Sort_Column_Selector, Sort_Column, Match_Columns, Position
from Standard.Table import Auto_Detect, Aggregate_Column, Data_Formatter, Column_Name_Mapping, Column_Selector, Sort_Column, Match_Columns, Position
import Standard.Table.Data.Column_Type_Selection.Column_Type_Selection
import Standard.Table.Data.Expression.Expression
import Standard.Table.Data.Expression.Expression_Error
@ -20,7 +20,6 @@ import Standard.Table.Data.Join_Kind.Join_Kind
import Standard.Table.Data.Report_Unmatched.Report_Unmatched
import Standard.Table.Data.Row.Row
import Standard.Table.Data.Table.Table as Materialized_Table
import Standard.Table.Internal.Java_Exports
import Standard.Table.Internal.Table_Helpers
import Standard.Table.Internal.Table_Helpers.Table_Column_Helper
import Standard.Table.Internal.Problem_Builder.Problem_Builder
@ -498,7 +497,7 @@ type Table
In the call below, assuming that the table of `t1` contains rows for
numbers 1, 2, ..., 10, will return rows starting from 6 and not an empty
result as one could expect if the limit was applied before the filters.
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . limit 5
t1 = table.order_by ([Sort_Column.Name "A"]) . limit 5
t2 = t1.filter 'A' (Greater than=5)
t2.read
limit : Integer -> Table
@ -590,12 +589,12 @@ type Table
> Example
Sorting `table` in ascending order by the value in column `'Quantity'`.
table.order_by (Sort_Column_Selector.By_Name ['Quantity'])
table.order_by ['Quantity']
> Example
Sorting `table` in descending order by the value in column `'Quantity'`.
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity' Sort_Direction.Descending])
table.order_by [Sort_Column.Name 'Quantity' Sort_Direction.Descending]
> Example
Sorting `table` in ascending order by the value in column `'Quantity'`,
@ -608,16 +607,21 @@ type Table
using the value in column `'Rating'` in descending order for breaking
ties.
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating' Sort_Direction.Descending])
table.order_by [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating' Sort_Direction.Descending]
> Example
Order the table by the second column in ascending order. In case of any
ties, break them based on the 7th column from the end of the table in
descending order.
table.order_by (Sort_Column_Selector.By_Index [1, Sort_Column.Index -7 Sort_Direction.Descending])
order_by : Text | Sort_Column | Vector (Text | Sort_Column) | Sort_Column_Selector -> Text_Ordering -> Table ! Incomparable_Values
order_by self (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (self.columns.at 0 . name))])) text_ordering=Text_Ordering.Default = Panic.handle_wrapped_dataflow_error <|
table.order_by [1, Sort_Column.Index -7 Sort_Direction.Descending]
> Example
Sort the table by columns whose names start with letter `a`.
table.order_by [(Sort_Column.Select_By_Name "a.*" use_regex=True case_sensitivity=Case_Sensitivity.Insensitive)]
order_by : Text | Sort_Column | Vector (Text | Sort_Column) -> Text_Ordering -> Table ! Incomparable_Values
order_by self (columns = ([(Sort_Column.Name (self.columns.at 0 . name))])) text_ordering=Text_Ordering.Default = Panic.handle_wrapped_dataflow_error <|
problem_builder = Problem_Builder.new
columns_for_ordering = Table_Helpers.prepare_order_by self.columns columns problem_builder
problem_builder.attach_problems_before Problem_Behavior.Report_Error <|

View File

@ -29,7 +29,7 @@ make_aggregate_column table aggregate new_name =
requested statistic.
make_expression : Aggregate_Column -> Dialect -> SQL_Expression
make_expression aggregate dialect =
is_non_empty_selector v = if v.is_nothing then False else v.columns.not_empty
is_non_empty_selector v = v.is_nothing.not && v.not_empty
case aggregate of
Group_By c _ -> c.expression
Count _ -> SQL_Expression.Operation "COUNT_ROWS" []
@ -46,14 +46,14 @@ make_expression aggregate dialect =
First c _ ignore_nothing order_by -> case is_non_empty_selector order_by of
False -> Error.throw (Unsupported_Database_Operation.Error "`First` aggregation requires at least one `order_by` column.")
True ->
order_bys = order_by.columns.map c-> dialect.prepare_order_descriptor c.column.as_internal c.direction Text_Ordering.Default
order_bys = order_by.map c-> dialect.prepare_order_descriptor c.column.as_internal c.direction Text_Ordering.Default
case ignore_nothing of
False -> SQL_Expression.Operation "FIRST" [c.expression]+order_bys
True -> SQL_Expression.Operation "FIRST_NOT_NULL" [c.expression]+order_bys
Last c _ ignore_nothing order_by -> case is_non_empty_selector order_by of
False -> Error.throw (Unsupported_Database_Operation.Error "`Last` aggregation requires at least one `order_by` column.")
True ->
order_bys = order_by.columns.map c-> dialect.prepare_order_descriptor c.column.as_internal c.direction Text_Ordering.Default
order_bys = order_by.map c-> dialect.prepare_order_descriptor c.column.as_internal c.direction Text_Ordering.Default
case ignore_nothing of
False -> SQL_Expression.Operation "LAST" [c.expression]+order_bys
True -> SQL_Expression.Operation "LAST_NOT_NULL" [c.expression]+order_bys

View File

@ -48,11 +48,11 @@
break ties in descending order.
import Standard.Examples
from Standard.Table import Sort_Column, Sort_Column_Selector
from Standard.Table import Sort_Column
example_sort =
table = Examples.inventory_table
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock", Sort_Column.Name "sold_stock" Sort_Direction.Descending])
table.order_by ([Sort_Column.Name "total_stock", Sort_Column.Name "sold_stock" Sort_Direction.Descending])
> Example
Compute the number of transactions that each item has participated in, as

View File

@ -27,11 +27,11 @@
Sort the shop inventory based on the per-item price in descending order.
import Standard.Examples
from Standard.Table import Sort_Column_Selector, Sort_Column
from Standard.Table import Sort_Column
example_sort =
table = Examples.inventory_table
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price" Sort_Direction.Descending])
table.order_by ([Sort_Column.Name "price" Sort_Direction.Descending])
> Example
Add two columns to each other.

View File

@ -2,7 +2,7 @@ from Standard.Base import all
import project.Data.Column.Column
import project.Data.Column_Selector.Column_Selector
import project.Data.Sort_Column_Selector.Sort_Column_Selector
import project.Data.Sort_Column.Sort_Column
## Defines an Aggregate Column
type Aggregate_Column
@ -142,7 +142,7 @@ type Aggregate_Column
not missing value returned.
- order_by: required for database tables. Specifies how to order the
results within the group.
First (column:Column|Text|Integer=0) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Sort_Column_Selector|Nothing=Nothing)
First (column:Column|Text|Integer=0) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Sort_Column|Nothing=Nothing)
## Creates a new column with the last value in each group. If no rows,
evaluates to `Nothing`.
@ -155,7 +155,7 @@ type Aggregate_Column
not missing value returned.
- order_by: required for database tables. Specifies how to order the
results within the group.
Last (column:Column|Text|Integer=0) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Sort_Column_Selector|Nothing=Nothing)
Last (column:Column|Text|Integer=0) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Sort_Column|Nothing=Nothing)
## Creates a new column with the maximum value in each group. If no rows,
evaluates to `Nothing`.

View File

@ -1,6 +1,20 @@
from Standard.Base import all
type Sort_Column
## A column to sort by, selected by its name.
For case insensitive column name matching, use the `Select_By_Name`
selector.
Name name:Text direction:Sort_Direction=Sort_Direction.Ascending
## A column to sort by, selected by its index.
Index index:Integer direction:Sort_Direction=Sort_Direction.Ascending
Column column:Column direction:Sort_Direction=Sort_Direction.Ascending
## A selector allowing to match columns by their name.
It can do regex-based and case insensitive matching if requested.
It is possible for it to match multiple columns, in which case all the
matched columns will be placed in the ORDER BY clause at the position of
this selector. Their relative order between each other will be the same
as in the table.
Select_By_Name name:Text direction:Sort_Direction=Sort_Direction.Ascending case_sensitivity:Case_Sensitivity=Case_Sensitivity.Insensitive use_regex:Boolean=False

View File

@ -1,9 +0,0 @@
from Standard.Base import all
import project.Data.Column.Column
import project.Data.Sort_Column.Sort_Column
type Sort_Column_Selector
By_Name (columns : Vector (Sort_Column.Name | Text)) (matcher:Matcher=Text_Matcher.Case_Sensitive)
By_Index (columns : Vector (Sort_Column.Index | Integer))
By_Column (columns : Vector (Sort_Column.Column | Column))

View File

@ -27,7 +27,6 @@ import project.Data.Report_Unmatched.Report_Unmatched
import project.Data.Row.Row
import project.Data.Storage.Storage
import project.Data.Value_Type.Value_Type
import project.Data.Sort_Column_Selector.Sort_Column_Selector
import project.Data.Sort_Column.Sort_Column
import project.Data.Aggregate_Column.Aggregate_Column
import project.Data.Storage.Storage
@ -566,12 +565,12 @@ type Table
> Example
Sorting `table` in ascending order by the value in column `'Quantity'`.
table.order_by (Sort_Column_Selector.By_Name ['Quantity'])
table.order_by ['Quantity']
> Example
Sorting `table` in descending order by the value in column `'Quantity'`.
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity' Sort_Direction.Descending])
table.order_by [Sort_Column.Name 'Quantity' Sort_Direction.Descending]
> Example
Sorting `table` in ascending order by the value in column `'Quantity'`,
@ -584,24 +583,31 @@ type Table
using the value in column `'Rating'` in descending order for breaking
ties.
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating' Sort_Direction.Descending])
table.order_by [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating' Sort_Direction.Descending]
> Example
Order the table by the second column in ascending order. In case of any
ties, break them based on the 7th column from the end of the table in
descending order.
table.order_by (Sort_Column_Selector.By_Index [1, Sort_Column.Index -7 Sort_Direction.Descending])
order_by : Text | Sort_Column | Vector (Text | Sort_Column) | Sort_Column_Selector -> Text_Ordering -> Table ! Incomparable_Values
order_by self (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (self.columns.at 0 . name))])) text_ordering=Text_Ordering.Default =
table.order_by [1, Sort_Column.Index -7 Sort_Direction.Descending]
> Example
Sort the table by columns whose names start with letter `a`.
table.order_by [(Sort_Column.Select_By_Name "a.*" use_regex=True case_sensitivity=Case_Sensitivity.Insensitive)]
order_by : Text | Sort_Column | Vector (Text | Sort_Column) -> Text_Ordering -> Table ! Incomparable_Values
order_by self (columns = ([(Sort_Column.Name (self.columns.at 0 . name))])) text_ordering=Text_Ordering.Default =
problem_builder = Problem_Builder.new
columns_for_ordering = Table_Helpers.prepare_order_by self.columns columns problem_builder
problem_builder.attach_problems_before Problem_Behavior.Report_Error <|
selected_columns = columns_for_ordering.map c->c.column.java_column
ordering = columns_for_ordering.map c->c.associated_selector.direction.to_sign
java_columns = columns_for_ordering.map c->
c.column.java_column
directions = columns_for_ordering.map c->
c.associated_selector.direction.to_sign
comparator = Comparator.for_text_ordering text_ordering
java_table = Illegal_Argument.handle_java_exception <| Incomparable_Values.handle_errors <|
self.java_table.orderBy selected_columns.to_array ordering.to_array comparator
self.java_table.orderBy java_columns.to_array directions.to_array comparator
Table.Value java_table
## Returns the distinct set of rows within the specified columns from the

View File

@ -4,7 +4,6 @@ import Standard.Base.Data.Ordering.Comparator
import project.Data.Column.Column
import project.Data.Column_Selector.Column_Selector
import project.Data.Sort_Column.Sort_Column
import project.Data.Sort_Column_Selector.Sort_Column_Selector
import project.Internal.Problem_Builder.Problem_Builder
import project.Internal.Table_Helpers
import project.Internal.Unique_Name_Strategy.Unique_Name_Strategy
@ -113,8 +112,8 @@ all_same_column aggregates =
indices or column references potentially from a different table) are
replaced with column references from the provided table.
Sort_Column_Selectors are resolved to Sort_Column_Select.By_Column with the
matched columns coming from the provided table.
`Sort_Column`s are replaced with column references of matched columns coming
from the provided table.
This preprocess step is required by some helper function, to avoid having
to pass the table reference and resolve the column descriptors all the
@ -142,8 +141,8 @@ resolve_aggregate table problem_builder aggregate_column =
_ ->
columns_for_ordering = Table_Helpers.prepare_order_by table_columns selector problem_builder
sort_columns = columns_for_ordering.map c->
Sort_Column.Column c.column c.associated_selector.direction
Sort_Column_Selector.By_Column sort_columns
Internal_Order_By_Column_Reference.Value c.column c.associated_selector.direction
sort_columns
result = case aggregate_column of
Group_By c new_name -> Group_By (resolve c) new_name
@ -175,6 +174,11 @@ resolve_aggregate table problem_builder aggregate_column =
value, keeping any other dataflow errors intact.
result.catch Internal_Missing_Column_Error (_->Nothing)
## PRIVATE
type Internal_Order_By_Column_Reference
## PRIVATE
Value column direction
## PRIVATE
A marker for missing columns during resolution.
type Internal_Missing_Column_Error
@ -201,13 +205,13 @@ java_aggregator name column =
Mode c _ -> ModeAggregator.new name c.java_column
First c _ ignore_nothing ordering ->
if ordering.is_nothing then FirstAggregator.new name c.java_column ignore_nothing else
order_columns = ordering.columns.map c->c.column.java_column
order_directions = ordering.columns.map c->c.direction.to_sign
order_columns = ordering.map c->c.column.java_column
order_directions = ordering.map c->c.direction.to_sign
FirstAggregator.new name c.java_column ignore_nothing order_columns.to_array order_directions.to_array Comparator.new
Last c _ ignore_nothing ordering ->
if ordering.is_nothing then LastAggregator.new name c.java_column ignore_nothing else
order_columns = ordering.columns.map c->c.column.java_column
order_direction = ordering.columns.map c->c.direction.to_sign
order_columns = ordering.map c->c.column.java_column
order_direction = ordering.map c->c.direction.to_sign
LastAggregator.new name c.java_column ignore_nothing order_columns.to_array order_direction.to_array Comparator.new
Maximum c _ -> MinOrMaxAggregator.new name c.java_column 1 Comparator.new
Minimum c _ -> MinOrMaxAggregator.new name c.java_column -1 Comparator.new

View File

@ -8,7 +8,6 @@ import project.Data.Column.Column
import project.Data.Column_Name_Mapping.Column_Name_Mapping
import project.Data.Column_Selector.Column_Selector
import project.Data.Position.Position
import project.Data.Sort_Column_Selector.Sort_Column_Selector
import project.Data.Sort_Column.Sort_Column
import project.Data.Table.Table
import project.Data.Value_Type.Value_Type
@ -465,41 +464,38 @@ type Column_Transform_Element
Value column associated_selector
## PRIVATE
prepare_order_by : Vector -> Text | Sort_Column | Vector (Text | Sort_Column) | Sort_Column_Selector -> Problem_Builder -> Vector Column_Transform_Element
prepare_order_by : Vector -> Text | Sort_Column | Vector (Text | Sort_Column) -> Problem_Builder -> Vector Column_Transform_Element
prepare_order_by internal_columns column_selectors problem_builder =
selected_elements = case column_selectors of
_ : Text ->
unified_name_selectors = [Sort_Column.Name column_selectors]
select_columns_by_name internal_columns unified_name_selectors Text_Matcher.Case_Sensitive problem_builder name_extractor=(_.name)
Sort_Column.Name _ _ -> select_columns_by_name internal_columns [column_selectors] Text_Matcher.Case_Sensitive problem_builder name_extractor=(_.name)
Sort_Column.Index _ _ -> select_columns_by_index internal_columns [column_selectors] problem_builder index_extractor=(_.index)
Sort_Column.Column _ _ -> select_columns_by_column_reference internal_columns [column_selectors] problem_builder column_extractor=(_.column)
_ : Vector ->
unified_name_selectors = column_selectors.map selector-> case selector of
_ : Text -> Sort_Column.Name selector
Sort_Column.Name _ _ -> selector
_ -> Error.throw (Illegal_Argument.Error "Invalid column selector passed to order_by. Only Text and Sort_Column.Name are allowed as a Vector.")
select_columns_by_name internal_columns unified_name_selectors Text_Matcher.Case_Sensitive problem_builder name_extractor=(_.name)
Sort_Column_Selector.By_Name name_selectors matcher ->
unified_name_selectors = name_selectors.map selector-> case selector of
_ : Text -> Sort_Column.Name selector
Sort_Column.Name _ _ -> selector
_ -> Error.throw (Illegal_Argument.Error "Invalid column selector passed to order_by. Only Text and Sort_Column.Name are allowed for Sort_Column_Selector.By_Name.")
select_columns_by_name internal_columns unified_name_selectors matcher problem_builder name_extractor=(_.name)
Sort_Column_Selector.By_Index index_selectors ->
unified_index_selectors = index_selectors.map selector-> case selector of
_ : Integer -> Sort_Column.Index selector
Sort_Column.Index _ _ -> selector
_ -> Error.throw (Illegal_Argument.Error "Invalid column selector passed to order_by. Only Integer and Sort_Column.Index are allowed for Sort_Column_Selector.By_Index.")
select_columns_by_index internal_columns unified_index_selectors problem_builder index_extractor=(_.index)
Sort_Column_Selector.By_Column column_selectors ->
unified_column_selectors = column_selectors.map selector-> case selector of
Sort_Column.Column _ _ -> selector
## We cannot match by type here, as there is no common `Column`
type - the type is different for In-Memory and Database
tables, and we do not have interfaces yet.
column_reference -> Sort_Column.Column column_reference
select_columns_by_column_reference internal_columns unified_column_selectors problem_builder column_extractor=(_.column)
resolve_selector selector = case selector of
name : Text -> resolve_selector (Sort_Column.Name name)
ix : Integer -> resolve_selector (Sort_Column.Index ix)
Sort_Column.Name name direction ->
resolve_selector (Sort_Column.Select_By_Name name direction Case_Sensitivity.Sensitive use_regex=False)
Sort_Column.Index ix _ ->
actual_index = if ix < 0 then internal_columns.length+ix else ix
case (actual_index >= 0) && (actual_index < internal_columns.length) of
True -> [Column_Transform_Element.Value (internal_columns.at actual_index) selector]
False ->
problem_builder.report_oob_indices [ix]
[]
Sort_Column.Select_By_Name name _ case_sensitivity use_regex ->
matcher = case use_regex of
True -> Regex_Matcher.Value case_sensitivity=case_sensitivity
False -> case case_sensitivity of
Case_Sensitivity.Default -> Text_Matcher.Case_Sensitive
Case_Sensitivity.Sensitive -> Text_Matcher.Case_Sensitive
Case_Sensitivity.Insensitive locale ->
Text_Matcher.Case_Insensitive locale=locale
matches = internal_columns.filter c->
matcher.match_single_criterion c.name name
if matches.is_empty then
problem_builder.report_missing_input_columns [name]
matches.map c->
Column_Transform_Element.Value c selector
selectors_vec = case column_selectors of
_ : Vector -> column_selectors
_ -> [column_selectors]
selected_elements = selectors_vec.flat_map resolve_selector
if selected_elements.is_empty then
problem_builder.report_other_warning No_Input_Columns_Selected
selected_elements

View File

@ -11,7 +11,6 @@ import project.Data.Match_Columns.Match_Columns
import project.Data.Position.Position
import project.Data.Report_Unmatched.Report_Unmatched
import project.Data.Sort_Column.Sort_Column
import project.Data.Sort_Column_Selector.Sort_Column_Selector
import project.Data.Table.Table
import project.Data.Table_Conversions
import project.Delimited.Delimited_Format.Delimited_Format
@ -31,7 +30,6 @@ export project.Data.Match_Columns.Match_Columns
export project.Data.Position.Position
export project.Data.Report_Unmatched.Report_Unmatched
export project.Data.Sort_Column.Sort_Column
export project.Data.Sort_Column_Selector.Sort_Column_Selector
export project.Data.Table.Table
export project.Data.Table_Conversions
export project.Delimited.Delimited_Format.Delimited_Format

View File

@ -1,7 +1,6 @@
from Standard.Base import all
from Standard.Table import Table, Sort_Column_Selector
from Standard.Table.Data.Aggregate_Column import all
from Standard.Table import Table, Sort_Column
from Standard.Test import Bench
@ -27,13 +26,13 @@ bench =
dates_table = Table.new [['dates', dates]]
objects_table = Table.new [['objects', objects]]
Bench.measure (ints_table.order_by (Sort_Column_Selector.By_Index [0])) "Table.order_by ints" iter_size num_iterations
Bench.measure (ints_table.order_by [Sort_Column.Index 0]) "Table.order_by ints" iter_size num_iterations
Bench.measure (ints.sort) "Vector.sort ints" iter_size num_iterations
Bench.measure (dates_table.order_by (Sort_Column_Selector.By_Index [0])) "Table.order_by dates" iter_size num_iterations
Bench.measure (dates_table.order_by [Sort_Column.Index 0]) "Table.order_by dates" iter_size num_iterations
Bench.measure (dates.sort) "Vector.sort dates" iter_size num_iterations
Bench.measure (objects_table.order_by (Sort_Column_Selector.By_Index [0])) "Table.order_by objects" iter_size num_iterations
Bench.measure (objects_table.order_by [Sort_Column.Index 0]) "Table.order_by objects" iter_size num_iterations
Bench.measure (objects.sort) "Vector.sort objects" iter_size num_iterations
main = bench

View File

@ -1,6 +1,6 @@
from Standard.Base import all hiding First, Last
from Standard.Table import Table, Sort_Column, Sort_Column_Selector, Column_Selector
from Standard.Table import Table, Sort_Column, Column_Selector
from Standard.Table.Data.Column_Selector.Column_Selector import By_Name
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
from Standard.Table.Errors import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter
@ -129,7 +129,7 @@ spec setup =
materialized.columns.at 5 . at 0 . should_equal -17.960000 epsilon=0.000001
Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <|
grouped = table.aggregate [First "Index" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Hexadecimal", Sort_Column.Name "TextWithNothing"]), Last "ValueWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value"])]
grouped = table.aggregate [First "Index" (order_by = [Sort_Column.Name "Hexadecimal", Sort_Column.Name "TextWithNothing"]), Last "ValueWithNothing" (order_by = [Sort_Column.Name "Value"])]
materialized = materialize grouped
grouped.row_count . should_equal 1
materialized.column_count . should_equal 2
@ -139,7 +139,7 @@ spec setup =
materialized.columns.at 1 . at 0 . should_equal -89.78 epsilon=0.000001
Test.specify "should be able to get first and last values with mixed ordering" (pending = resolve_pending test_selection.first_last) <|
grouped = table.aggregate [First "TextWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value" Sort_Direction.Descending, Sort_Column.Name "Code"]), First "TextWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Code", Sort_Column.Name "Value" Sort_Direction.Descending]), Last "ValueWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value" Sort_Direction.Descending])]
grouped = table.aggregate [First "TextWithNothing" (order_by = [Sort_Column.Name "Value" Sort_Direction.Descending, Sort_Column.Name "Code"]), First "TextWithNothing" (order_by = [Sort_Column.Name "Code", Sort_Column.Name "Value" Sort_Direction.Descending]), Last "ValueWithNothing" (order_by = [Sort_Column.Name "Value" Sort_Direction.Descending])]
materialized = materialize grouped
grouped.row_count . should_equal 1
materialized.column_count . should_equal 3
@ -258,7 +258,7 @@ spec setup =
materialized.columns.at 2 . at 0 . should_equal Nothing
Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <|
grouped = empty_table.aggregate [First "Index" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Hexadecimal", Sort_Column.Name "TextWithNothing"]), Last "ValueWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value"])]
grouped = empty_table.aggregate [First "Index" (order_by = [Sort_Column.Name "Hexadecimal", Sort_Column.Name "TextWithNothing"]), Last "ValueWithNothing" (order_by = [Sort_Column.Name "Value"])]
materialized = materialize grouped
grouped.row_count . should_equal 1
materialized.column_count . should_equal 2
@ -362,7 +362,7 @@ spec setup =
materialized.columns.at 3 . name . should_equal "25%-ile Value"
Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <|
grouped = empty_table.aggregate [Group_By 0, First "Index" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Hexadecimal", Sort_Column.Name "TextWithNothing"]), Last "ValueWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value"])]
grouped = empty_table.aggregate [Group_By 0, First "Index" (order_by = [Sort_Column.Name "Hexadecimal", Sort_Column.Name "TextWithNothing"]), Last "ValueWithNothing" (order_by = [Sort_Column.Name "Value"])]
materialized = materialize grouped
grouped.row_count . should_equal 0
materialized.column_count . should_equal 3
@ -516,7 +516,7 @@ spec setup =
materialized.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001
Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <|
grouped = table.aggregate [Group_By "Index", First "TextWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value", Sort_Column.Name "Flag"]), Last "ValueWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value"])]
grouped = table.aggregate [Group_By "Index", First "TextWithNothing" (order_by = [Sort_Column.Name "Value", Sort_Column.Name "Flag"]), Last "ValueWithNothing" (order_by = [Sort_Column.Name "Value"])]
materialized = materialize grouped
grouped.row_count . should_equal 10
materialized.column_count . should_equal 3
@ -529,7 +529,7 @@ spec setup =
materialized.columns.at 2 . at idx . should_equal -89.78 epsilon=0.000001
Test.specify "should be able to get first and last values with mixed ordering" (pending = resolve_pending test_selection.first_last) <|
grouped = table.aggregate [Group_By "Index", First "TextWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value" Sort_Direction.Descending, Sort_Column.Name "Flag"]), Last "ValueWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value" Sort_Direction.Descending])]
grouped = table.aggregate [Group_By "Index", First "TextWithNothing" (order_by = [Sort_Column.Name "Value" Sort_Direction.Descending, Sort_Column.Name "Flag"]), Last "ValueWithNothing" (order_by = [Sort_Column.Name "Value" Sort_Direction.Descending])]
materialized = materialize grouped
grouped.row_count . should_equal 10
materialized.column_count . should_equal 3
@ -713,7 +713,7 @@ spec setup =
materialized.columns.at 7 . at idx . should_equal -17.174000 epsilon=0.000001
Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <|
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value", Sort_Column.Name "Flag"]), Last "ValueWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value"]), Group_By "Index"]
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing" (order_by = [Sort_Column.Name "Value", Sort_Column.Name "Flag"]), Last "ValueWithNothing" (order_by = [Sort_Column.Name "Value"]), Group_By "Index"]
materialized = materialize grouped
grouped.row_count . should_equal 20
materialized.column_count . should_equal 4
@ -727,7 +727,7 @@ spec setup =
materialized.columns.at 2 . at idx . should_equal -89.78 epsilon=0.000001
Test.specify "should be able to get first and last values with mixed ordering" (pending = resolve_pending test_selection.first_last) <|
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value" Sort_Direction.Descending, Sort_Column.Name "Flag"]), Last "ValueWithNothing" (order_by = Sort_Column_Selector.By_Name [Sort_Column.Name "Value" Sort_Direction.Descending]), Group_By "Index"]
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing" (order_by = [Sort_Column.Name "Value" Sort_Direction.Descending, Sort_Column.Name "Flag"]), Last "ValueWithNothing" (order_by = [Sort_Column.Name "Value" Sort_Direction.Descending]), Group_By "Index"]
materialized = materialize grouped
grouped.row_count . should_equal 20
materialized.column_count . should_equal 4
@ -815,7 +815,7 @@ spec setup =
table = table_builder [["A", ["foo", "bar", "foo", "foo"]], ["B", ["a", "b", "c", "d"]]]
result = table.aggregate [Group_By "A", (Concatenate "B" prefix="[[" suffix="]]" separator="; ")]
result.row_count . should_equal 2
materialized = materialize result . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"])
materialized = materialize result . order_by ([Sort_Column.Name "A"])
materialized.column_count . should_equal 2
materialized.columns.at 0 . name . should_equal "A"
materialized.columns.at 0 . to_vector . should_equal ["bar", "foo"]
@ -895,14 +895,14 @@ spec setup =
r1 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=True)]
r1.row_count . should_equal 2
m1 = materialize r1 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"])
m1 = materialize r1 . order_by ([Sort_Column.Name "G"])
m1.column_count . should_equal 2
m1.columns.first.to_vector . should_equal ["bar", "foo"]
m1.columns.second.to_vector . should_equal [0, 1]
r2 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=False)]
r2.row_count . should_equal 2
m2 = materialize r2 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"])
m2 = materialize r2 . order_by ([Sort_Column.Name "G"])
m2.column_count . should_equal 2
m2.columns.first.to_vector . should_equal ["bar", "foo"]
m2.columns.second.to_vector . should_equal [1, 2]
@ -944,7 +944,7 @@ spec setup =
r2 = table.aggregate [Group_By "G", Average "X"]
r2.row_count.should_equal 2
m2 = materialize r2 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"])
m2 = materialize r2 . order_by ([Sort_Column.Name "G"])
m2.column_count . should_equal 2
m2.columns.first.to_vector . should_equal ["a", "b"]
m2.columns.second.to_vector . should_equal [0.5, 1]
@ -1116,7 +1116,7 @@ spec setup =
Test.group prefix+"Table.aggregate First and Last" <|
Test.specify "should not return the same value for groups with different values but equal ordering keys" (pending = resolve_pending test_selection.first_last) <|
t1 = table_builder [["G", ["a", "a"]], ["X", [1, 2]]]
order = Sort_Column_Selector.By_Name [Sort_Column.Name "G"]
order = [Sort_Column.Name "G"]
r1 = t1.aggregate [First "X" (order_by=order), Last "X" (order_by=order)]
r1.row_count.should_equal 1
m1 = materialize r1
@ -1130,7 +1130,7 @@ spec setup =
table = table_builder [["A", [1, 1, 2, 1]], ["B", [3, 2, 2, 3]], ["C", [11, 12, 13, 14]]]
grouped = table.aggregate [Group_By "B", Group_By "A"]
grouped.row_count . should_equal 3
materialized = materialize grouped . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B"])
materialized = materialize grouped . order_by ([Sort_Column.Name "A", Sort_Column.Name "B"])
materialized.column_count . should_equal 2
materialized.columns.at 1 . name . should_equal "A"
materialized.columns.at 1 . to_vector . should_equal [1, 1, 2]
@ -1158,17 +1158,17 @@ spec setup =
table = table_builder [dates, times, datetimes, ints]
g1 = table.aggregate [Group_By "Date", Sum "Int"]
m1 = materialize g1 . order_by (Sort_Column_Selector.By_Name ["Date"])
m1 = materialize g1 . order_by (["Date"])
m1.at "Date" . to_vector . should_equal [Date.new 1997, Date.new 2000 2 2, Date.new 2022 12 31]
m1.at "Sum Int" . to_vector . should_equal [17, 10, 4]
g2 = table.aggregate [Group_By "Time", Sum "Int"]
m2 = materialize g2 . order_by (Sort_Column_Selector.By_Name ["Time"])
m2 = materialize g2 . order_by (["Time"])
m2.at "Time" . to_vector . should_equal [Time_Of_Day.new, Time_Of_Day.new 0 0 0 500 100 900, Time_Of_Day.new 1 2 3, Time_Of_Day.new 11 25 40]
m2.at "Sum Int" . to_vector . should_equal [9, 2, 4, 16]
g3 = table.aggregate [Group_By "DateTime", Sum "Int"]
m3 = materialize g3 . order_by (Sort_Column_Selector.By_Name ["DateTime"])
m3 = materialize g3 . order_by (["DateTime"])
m3.at "DateTime" . to_vector . should_equal [Date_Time.new 1998, Date_Time.new 1999, Date_Time.new 2022 8 29 17 28 5]
m3.at "Sum Int" . to_vector . should_equal [24, 5, 2]
@ -1356,7 +1356,7 @@ spec setup =
if test_selection.first_last.not then
Test.specify "with First and Last with ordering" <|
table = table_builder [["A", [3,2,1]], ["X", [1,2,3]]]
order = Sort_Column_Selector.By_Name [Sort_Column.Name "A"]
order = [Sort_Column.Name "A"]
expect_sum_and_unsupported_errors 2 <|
table.aggregate [Sum "X", First "X" (order_by=order), Last "X" (order_by=order)]

View File

@ -1,6 +1,6 @@
from Standard.Base import all
from Standard.Table import Column_Selector, Sort_Column, Sort_Column_Selector
from Standard.Table import Column_Selector, Sort_Column
from Standard.Table.Errors import all
from Standard.Test import Test, Problems
@ -45,7 +45,7 @@ spec setup =
a = ["A", ["a", "a", "a", "a", "a", "a"]]
b = ["B", [1, 1, 2, 2, 1, 2]]
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
t = table_builder [a, b, c] . order_by (Sort_Column_Selector.By_Name [(Sort_Column.Name "C" Sort_Direction.Descending)])
t = table_builder [a, b, c] . order_by ([(Sort_Column.Name "C" Sort_Direction.Descending)])
r2 = t.distinct ["A", "B"] on_problems=Report_Error |> materialize |> _.order_by "B"
r2.at "A" . to_vector . should_equal ["a", "a"]

View File

@ -1,6 +1,6 @@
from Standard.Base import all
from Standard.Table import Table, Column, Sort_Column, Column_Selector, Sort_Column_Selector, Aggregate_Column
from Standard.Table import Table, Column, Sort_Column, Column_Selector, Aggregate_Column
import Standard.Table.Data.Expression.Expression_Error
from Standard.Test import Test, Test_Suite, Problems

View File

@ -76,7 +76,7 @@ spec setup =
a = ["A", ["a", "a", "a", "a", "a", "a"]]
b = ["B", [1, 1, 2, 2, 1, 2]]
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
t = table_builder [a, b, c] . order_by (Sort_Column_Selector.By_Name [(Sort_Column.Name "C" Sort_Direction.Descending)])
t = table_builder [a, b, c] . order_by ([(Sort_Column.Name "C" Sort_Direction.Descending)])
t2 = t.distinct ["A", "B"] on_problems=Report_Error
# Now, reverse the order!

View File

@ -133,7 +133,7 @@ spec setup =
t2 = table_builder [["Z", ['a', 'b', 'c']], ["W", ['x', 'd', 'd']]]
t3 = t1.order_by "X"
t4 = t2.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Z" Sort_Direction.Descending])
t4 = t2.order_by ([Sort_Column.Name "Z" Sort_Direction.Descending])
t5 = t3.cross_join t4
expect_column_names ["X", "Y", "Z", "W"] t5

View File

@ -42,7 +42,7 @@ spec setup =
t2 = table_builder [["Z", ['a', 'b']], ["W", ['x', 'd']]]
t3 = t1.order_by "X"
t4 = t2.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Z" Sort_Direction.Descending])
t4 = t2.order_by [Sort_Column.Name "Z" Sort_Direction.Descending]
t5 = t3.zip t4
expect_column_names ["X", "Y", "Z", "W"] t5

View File

@ -2,7 +2,7 @@ from Standard.Base import all
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
import Standard.Base.Error.Incomparable_Values.Incomparable_Values
from Standard.Table import Sort_Column, Sort_Column_Selector
from Standard.Table import Sort_Column
from Standard.Table.Errors import all
from Standard.Test import Test, Problems
@ -36,16 +36,19 @@ spec setup =
table_builder [col1, col2, col3, col4, col5, col6, col7, col8, col9, col10]
Test.specify "should work as shown in the doc examples" <|
t1 = table.order_by (Sort_Column_Selector.By_Name ["alpha"])
t1 = table.order_by ["alpha"]
t1.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t1.at "gamma" . to_vector . should_equal [4, 3, 2, 1]
## Assumes stable sorting on database engine.
t2 = table.order_by (Sort_Column_Selector.By_Index [1, Sort_Column.Index -8 Sort_Direction.Descending])
t2 = table.order_by [1, Sort_Column.Index -8 Sort_Direction.Descending]
t2.at "beta" . to_vector . should_equal ["a", "a", "b", "b"]
t2.at "gamma" . to_vector . should_equal [3, 1, 4, 2]
t2.at "alpha" . to_vector . should_equal [1, 3, 0, 2]
t3 = table.order_by [Sort_Column.Select_By_Name "a.*" use_regex=True case_sensitivity=Case_Sensitivity.Insensitive]
t3.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
Test.specify "should work with single column name" <|
t1 = table.order_by "alpha"
t1.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
@ -68,53 +71,64 @@ spec setup =
t4.at "alpha" . to_vector . should_equal [3, 2, 1, 0]
t4.at "gamma" . to_vector . should_equal [1, 2, 3, 4]
Test.specify "should allow the selector to mix regex and case insensitive matching" <|
t4 = table.order_by [Sort_Column.Select_By_Name "A.*" use_regex=True case_sensitivity=Case_Sensitivity.Insensitive]
t4.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
Test.specify "should correctly handle regexes matching multiple names" <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name ".*ta" Sort_Direction.Descending] (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Sensitive))
t1 = table.order_by [Sort_Column.Select_By_Name ".*ta" Sort_Direction.Descending use_regex=True]
t1.at "beta" . to_vector . should_equal ["b", "b", "a", "a"]
t1.at "delta" . to_vector . should_equal ["a1", "a03", "a2", "a10"]
t1.at "gamma" . to_vector . should_equal [2, 4, 3, 1]
Test.specify "should correctly handle problems: out of bounds indices" <|
selector = Sort_Column_Selector.By_Index [0, 100, Sort_Column.Index -200, Sort_Column.Index 300]
selector = [0, 100, Sort_Column.Index -200, Sort_Column.Index 300]
t1 = table.order_by selector
t1.should_fail_with Column_Indexes_Out_Of_Range
t1.catch . should_equal <|
Column_Indexes_Out_Of_Range.Error [100, -200, 300]
Test.specify "should correctly handle edge-cases: duplicate selectors" <|
selector1 = Sort_Column_Selector.By_Name ["alpha", Sort_Column.Name "alpha" Sort_Direction.Descending]
selector1 = ["alpha", Sort_Column.Name "alpha" Sort_Direction.Descending]
t1 = table.order_by selector1
Problems.assume_no_problems t1
t1.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t1.at "gamma" . to_vector . should_equal [4, 3, 2, 1]
t2 = table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 0, Sort_Column.Index 0 Sort_Direction.Descending])
t2 = table.order_by [Sort_Column.Index 0, Sort_Column.Index 0 Sort_Direction.Descending]
Problems.assume_no_problems t2
t2.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t2.at "gamma" . to_vector . should_equal [4, 3, 2, 1]
t3 = table.order_by [Sort_Column.Index 0, Sort_Column.Name "alpha" Sort_Direction.Descending]
Problems.assume_no_problems t3
t3.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t3.at "gamma" . to_vector . should_equal [4, 3, 2, 1]
Test.specify "should correctly handle edge-cases: duplicate matches due to case insensitivity" <|
selector = Sort_Column_Selector.By_Name [Sort_Column.Name "ALPHA", Sort_Column.Name "alpha" Sort_Direction.Descending] Text_Matcher.Case_Insensitive
selector = [Sort_Column.Select_By_Name "ALPHA" case_sensitivity=Case_Sensitivity.Insensitive, Sort_Column.Select_By_Name "alpha" Sort_Direction.Descending]
t1 = table.order_by selector
Problems.assume_no_problems t1
t1.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t1.at "gamma" . to_vector . should_equal [4, 3, 2, 1]
Test.specify "should correctly handle edge-cases: wrong types in the selectors" <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Index 0 Sort_Direction.Descending])
t1.should_fail_with Illegal_Argument
t2 = table.order_by (Sort_Column_Selector.By_Name [0])
t2.should_fail_with Illegal_Argument
t3 = table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Name "alpha" Sort_Direction.Descending])
t3.should_fail_with Illegal_Argument
t4 = table.order_by (Sort_Column_Selector.By_Index ["alpha"])
t4.should_fail_with Illegal_Argument
Test.specify "should correctly handle edge-cases: duplicate matches due to regexes" <|
selector = [Sort_Column.Select_By_Name "a.*" use_regex=True, Sort_Column.Select_By_Name "alpha" Sort_Direction.Descending]
t1 = table.order_by selector
Problems.assume_no_problems t1
t1.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t1.at "gamma" . to_vector . should_equal [4, 3, 2, 1]
Test.expect_panic_with matcher=Any <|
table.order_by 0
Test.specify "should correctly handle edge-cases: mixed selector types" <|
t1 = table.order_by [Sort_Column.Name "alpha", Sort_Column.Index 1]
t1.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t1.at "beta" . to_vector . should_equal ["b", "a", "b", "a"]
t1.at "gamma" . to_vector . should_equal [4, 3, 2, 1]
Test.expect_panic_with matcher=Any <|
table.order_by ["alpha", 1]
t2 = table.order_by [Sort_Column.Select_By_Name "a.*a" use_regex=True, Sort_Column.Index 1]
t2.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t2.at "beta" . to_vector . should_equal ["b", "a", "b", "a"]
t2.at "gamma" . to_vector . should_equal [4, 3, 2, 1]
Test.specify "should work correctly with shorthands" pending="The last test case does not work currently. We want to get rid of Sort_Column_Selector and use a vector directly. TODO" <|
t1 = table.order_by "alpha"
@ -132,37 +146,37 @@ spec setup =
Test.specify "should correctly handle problems: unmatched names" <|
weird_name = '.*?-!@#!"'
selector = Sort_Column_Selector.By_Name [Sort_Column.Name "alpha", "hmm", Sort_Column.Name weird_name]
selector = [Sort_Column.Name "alpha", "hmm", Sort_Column.Name weird_name]
t1 = table.order_by selector
t1.should_fail_with Missing_Input_Columns
t1.catch . should_equal <|
Missing_Input_Columns.Error [Sort_Column.Name "hmm", Sort_Column.Name weird_name]
Missing_Input_Columns.Error ["hmm", weird_name]
Test.specify "should report a problem if no columns are selected for ordering" <|
t2 = table.order_by (Sort_Column_Selector.By_Name [])
t2 = table.order_by []
t2.should_fail_with No_Input_Columns_Selected
Test.specify "should stack consecutive ordering operations" <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "alpha"])
t1 = table.order_by [Sort_Column.Name "alpha"]
t1.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t1.at "beta" . to_vector . should_equal ["b", "a", "b", "a"]
# Now we reverse the order
t2 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "alpha" Sort_Direction.Descending])
t2 = t1.order_by [Sort_Column.Name "alpha" Sort_Direction.Descending]
t2.at "alpha" . to_vector . should_equal [3, 2, 1, 0]
t2.at "beta" . to_vector . should_equal ["a", "b", "a", "b"]
# Now we add another primary ordering, but the order from t1/t2 is kept for tie breaking.
t3 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "beta"])
t3 = t1.order_by [Sort_Column.Name "beta"]
t3.at "beta" . to_vector . should_equal ["a", "a", "b", "b"]
t3.at "alpha" . to_vector . should_equal [1, 3, 0, 2]
t4 = t2.order_by (Sort_Column_Selector.By_Name ["beta"])
t4 = t2.order_by ["beta"]
t4.at "beta" . to_vector . should_equal ["a", "a", "b", "b"]
t4.at "alpha" . to_vector . should_equal [3, 1, 2, 0]
Test.specify "should give priority to the first selected column and use the next ones for breaking ties" <|
t1 = table.order_by (Sort_Column_Selector.By_Name ["beta", Sort_Column.Name "alpha" Sort_Direction.Ascending])
t1 = table.order_by ["beta", Sort_Column.Name "alpha" Sort_Direction.Ascending]
t1.at "beta" . to_vector . should_equal ["a", "a", "b", "b"]
t1.at "alpha" . to_vector . should_equal [1, 3, 0, 2]
t1.at "gamma" . to_vector . should_equal [3, 1, 4, 2]
@ -172,44 +186,39 @@ spec setup =
t1a.at "alpha" . to_vector . should_equal [1, 3, 0, 2]
t1a.at "gamma" . to_vector . should_equal [3, 1, 4, 2]
t2 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "beta", Sort_Column.Name "alpha" Sort_Direction.Descending])
t2 = table.order_by [Sort_Column.Name "beta", Sort_Column.Name "alpha" Sort_Direction.Descending]
t2.at "beta" . to_vector . should_equal ["a", "a", "b", "b"]
t2.at "alpha" . to_vector . should_equal [3, 1, 2, 0]
t2.at "gamma" . to_vector . should_equal [1, 3, 2, 4]
t3 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "alpha", Sort_Column.Name "beta"])
t3 = table.order_by [Sort_Column.Name "alpha", Sort_Column.Name "beta"]
t3.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t3.at "beta" . to_vector . should_equal ["b", "a", "b", "a"]
t3.at "gamma" . to_vector . should_equal [4, 3, 2, 1]
t4 = table.order_by (Sort_Column_Selector.By_Index [1, Sort_Column.Index 0 Sort_Direction.Ascending])
t4 = table.order_by [Sort_Column.Index 1, Sort_Column.Index 0 Sort_Direction.Ascending]
t4.at "beta" . to_vector . should_equal ["a", "a", "b", "b"]
t4.at "alpha" . to_vector . should_equal [1, 3, 0, 2]
t4.at "gamma" . to_vector . should_equal [3, 1, 4, 2]
t5 = table.order_by (Sort_Column_Selector.By_Column [table.at "beta", Sort_Column.Column (table.at "alpha") Sort_Direction.Ascending])
t5.at "beta" . to_vector . should_equal ["a", "a", "b", "b"]
t5.at "alpha" . to_vector . should_equal [1, 3, 0, 2]
t5.at "gamma" . to_vector . should_equal [3, 1, 4, 2]
Test.specify "should deal with real numbers" <|
t1 = table.order_by (Sort_Column_Selector.By_Name ["tau"])
t1 = table.order_by ["tau"]
t1.at "tau" . to_vector . should_equal [-0.1, 0.5, 1.6, 32.0]
t1.at "alpha" . to_vector . should_equal [1, 2, 0, 3]
Test.specify "should deal with nulls" <|
t1 = table.order_by (Sort_Column_Selector.By_Name ["xi"])
t1 = table.order_by ["xi"]
t1.at "xi" . to_vector . should_equal [Nothing, 0.5, 1.0, 1.5]
t1.at "alpha" . to_vector . should_equal [1, 0, 3, 2]
t2 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "rho"])
t2 = table.order_by [Sort_Column.Name "rho"]
t2.at "rho" . to_vector . should_equal [Nothing, Nothing, "B", "BB"]
t3 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "rho" Sort_Direction.Descending])
t3 = table.order_by [Sort_Column.Name "rho" Sort_Direction.Descending]
t3.at "rho" . to_vector . should_equal ["BB", "B", Nothing, Nothing]
Test.specify "should behave as expected with Unicode normalization, depending on the defaults settings" <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "phi"])
t1 = table.order_by [Sort_Column.Name "phi"]
case test_selection.order_by_unicode_normalization_by_default of
True ->
t1.at "phi" . to_vector . should_equal [Nothing, "śa", 's\u0301b', "śc"]
@ -219,46 +228,46 @@ spec setup =
t1.at "alpha" . to_vector . should_equal [2, 1, 0, 3]
Test.specify "should support natural ordering" pending=(if test_selection.natural_ordering.not then "Natural ordering is not supported.") <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "delta"]) text_ordering=(Text_Ordering.Default sort_digits_as_numbers=True)
t1 = table.order_by [Sort_Column.Name "delta"] text_ordering=(Text_Ordering.Default sort_digits_as_numbers=True)
t1.at "delta" . to_vector . should_equal ["a1", "a2", "a03", "a10"]
t1.at "alpha" . to_vector . should_equal [2, 1, 0, 3]
t2 = table.order_by (Sort_Column_Selector.By_Name ["delta"]) text_ordering=(Text_Ordering.Default sort_digits_as_numbers=False)
t2 = table.order_by ["delta"] text_ordering=(Text_Ordering.Default sort_digits_as_numbers=False)
t2.at "delta" . to_vector . should_equal ["a03", "a1", "a10", "a2"]
t2.at "alpha" . to_vector . should_equal [0, 2, 3, 1]
Test.specify "should support case insensitive ordering" pending=(if test_selection.case_insensitive_ordering.not then "Case insensitive ordering is not supported.") <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "eta"]) text_ordering=(Text_Ordering.Case_Insensitive)
t1 = table.order_by [Sort_Column.Name "eta"] text_ordering=(Text_Ordering.Case_Insensitive)
expected = case test_selection.case_insensitive_ascii_only of
True -> ["Aleph", "alpha", "Beta", "bądź"]
False -> ["Aleph", "alpha", "bądź", "Beta"]
t1.at "eta" . to_vector . should_equal expected
t2 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "eta"]) text_ordering=(Text_Ordering.Case_Sensitive)
t2 = table.order_by [Sort_Column.Name "eta"] text_ordering=(Text_Ordering.Case_Sensitive)
t2.at "eta" . to_vector . should_equal ["Aleph", "Beta", "alpha", "bądź"]
t3 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering.Case_Insensitive)
t3 = table.order_by [Sort_Column.Name "psi"] text_ordering=(Text_Ordering.Case_Insensitive)
t3.at "psi" . to_vector . should_equal [Nothing, "c01", "c10", "C2"]
t4 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi" Sort_Direction.Descending]) text_ordering=(Text_Ordering.Case_Sensitive)
t4 = table.order_by [Sort_Column.Name "psi" Sort_Direction.Descending] text_ordering=(Text_Ordering.Case_Sensitive)
t4.at "psi" . to_vector . should_equal ["c10", "c01", "C2", Nothing]
Test.specify "should support natural and case insensitive ordering at the same time" pending=(if (test_selection.natural_ordering.not || test_selection.case_insensitive_ordering.not) then "Natural ordering or case sensitive ordering is not supported.") <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering.Case_Insensitive sort_digits_as_numbers=True)
t1 = table.order_by [Sort_Column.Name "psi"] text_ordering=(Text_Ordering.Case_Insensitive sort_digits_as_numbers=True)
t1.at "psi" . to_vector . should_equal [Nothing, "c01", "C2", "c10"]
t2 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering.Default sort_digits_as_numbers=True)
t2 = table.order_by [Sort_Column.Name "psi"] text_ordering=(Text_Ordering.Default sort_digits_as_numbers=True)
t2.at "psi" . to_vector . should_equal [Nothing, "C2", "c01", "c10"]
t3 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering.Case_Insensitive)
t3 = table.order_by [Sort_Column.Name "psi"] text_ordering=(Text_Ordering.Case_Insensitive)
t3.at "psi" . to_vector . should_equal [Nothing, "c01", "c10", "C2"]
t4 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"])
t4 = table.order_by [Sort_Column.Name "psi"]
t4.at "psi" . to_vector . should_equal [Nothing, "C2", "c01", "c10"]
Test.specify "text ordering settings should not affect numeric columns" <|
ordering = Text_Ordering.Case_Insensitive sort_digits_as_numbers=True
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "alpha"]) text_ordering=ordering
t1 = table.order_by [Sort_Column.Name "alpha"] text_ordering=ordering
t1.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t1.at "gamma" . to_vector . should_equal [4, 3, 2, 1]

View File

@ -1,7 +1,7 @@
from Standard.Base import all
import Standard.Base.Error.Illegal_State.Illegal_State
from Standard.Table import Sort_Column, Sort_Column_Selector, Column_Selector, Join_Condition
from Standard.Table import Sort_Column, Column_Selector, Join_Condition
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
from Standard.Table.Errors import No_Input_Columns_Selected, Missing_Input_Columns, No_Such_Column
@ -123,22 +123,22 @@ spec =
Test.group "[Codegen] Sorting" <|
Test.specify "should allow sorting by a single column name" <|
r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . at "B"
r1 = t1.order_by ([Sort_Column.Name "A"]) . at "B"
r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC', []]
r2 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "B" Sort_Direction.Descending]) . at "A"
r2 = t1.order_by ([Sort_Column.Name "B" Sort_Direction.Descending]) . at "A"
r2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" ORDER BY "T1"."B" DESC', []]
Test.specify 'should allow sorting by multiple column names' <|
r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'A', Sort_Column.Name 'B'])
r1 = t1.order_by ([Sort_Column.Name 'A', Sort_Column.Name 'B'])
r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC, "T1"."B" ASC', []]
Test.specify 'should allow sorting with specific by-column rules' <|
r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B" Sort_Direction.Descending])
r1 = t1.order_by ([Sort_Column.Name "A", Sort_Column.Name "B" Sort_Direction.Descending])
r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC, "T1"."B" DESC', []]
Test.specify 'should return warnings and errors when passed a non-existent column' <|
t2 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar'])
t2 = t1.order_by ([Sort_Column.Name 'foobar'])
t2.should_fail_with Missing_Input_Columns
Test.group "Helpers" <|

View File

@ -1,6 +1,6 @@
from Standard.Base import all
from Standard.Table import Table, Sort_Column, Column_Selector, Sort_Column_Selector
from Standard.Table import Table, Sort_Column, Column_Selector
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
from Standard.Table.Errors import No_Input_Columns_Selected, Missing_Input_Columns
@ -67,21 +67,21 @@ spec prefix connection =
Table.new [["id", [1,2,3,4,5,6]], ["name", ["shoes","trousers","dress","skirt","blouse","t-shirt"]], ["quantity", [20,10,20,10,30,30]], ["rating", [3.0,Nothing,7.3,3.0,2.2,Nothing]], ["price", [37.2,42.1,64.1,87.4,13.5,64.2]]]
Test.specify "should allow sorting by a single column name" <|
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'quantity'])
r_1 = df.order_by ([Sort_Column.Name 'quantity'])
r_1.at 'id' . to_vector . should_equal [2,4,1,3,5,6]
r_3 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending])
r_3 = df.order_by ([Sort_Column.Name 'rating' Sort_Direction.Descending])
r_3.at 'id' . to_vector . should_equal [3,1,4,5,2,6]
Test.specify 'should allow sorting by multiple column names' <|
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'quantity', Sort_Column.Name 'rating'])
r_1 = df.order_by ([Sort_Column.Name 'quantity', Sort_Column.Name 'rating'])
r_1.at 'id' . to_vector . should_equal [2,4,1,3,6,5]
r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending, Sort_Column.Name 'quantity' Sort_Direction.Descending])
r_2 = df.order_by ([Sort_Column.Name 'rating' Sort_Direction.Descending, Sort_Column.Name 'quantity' Sort_Direction.Descending])
r_2.at 'id' . to_vector . should_equal [3,1,4,5,6,2]
Test.specify 'should allow sorting with specific by-column rules' <|
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "quantity", Sort_Column.Name "price" Sort_Direction.Descending])
r_1 = df.order_by ([Sort_Column.Name "quantity", Sort_Column.Name "price" Sort_Direction.Descending])
r_1.at 'id' . to_vector . should_equal [4,2,3,1,6,5]
Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <|
@ -91,7 +91,7 @@ spec prefix connection =
texts = ["foo", "foo", "bar", "baz", "spam"]
df = upload "T8" <|
Table.new [["ord", [0,3,2,4,1]], ["ints", ints], ["reals", reals], ["bools", bools], ["texts", texts]]
r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'ord'])
r = df.order_by ([Sort_Column.Name 'ord'])
r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4]
df.at 'ints' . to_vector . should_equal ints
@ -141,7 +141,7 @@ spec prefix connection =
(InMemory) table are ordered according to a specified column or list
of columns.
determinize_by order_column table =
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name order_column])
table.order_by ([Sort_Column.Name order_column])
Test.specify "should allow counting group sizes and elements" <|
## Names set to lower case to avoid issue with Redshift where columns are

View File

@ -4,7 +4,7 @@ import Standard.Base.Error.Common.Type_Error
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
import Standard.Base.Error.Incomparable_Values.Incomparable_Values
from Standard.Table import Table, Column, Sort_Column, Column_Selector, Sort_Column_Selector, Aggregate_Column
from Standard.Table import Table, Column, Sort_Column, Column_Selector, Aggregate_Column
import Standard.Table.Main as Table_Module
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all hiding First, Last
from Standard.Table.Data.Storage import Storage
@ -367,7 +367,7 @@ spec =
c_1 = ['id', [1, 2, 3, 4, 5, 6]]
c_2 = ['val', [My.Data 1 2, My.Data 3 4, My.Data 2 1, My.Data 5 2, My.Data 7 0, My.Data 4 -1]]
df = Table.new [c_1, c_2]
r = df.order_by (Sort_Column_Selector.By_Name ['val'])
r = df.order_by (['val'])
r.at 'id' . to_vector . should_equal [1,3,6,2,4,5]
Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <|
@ -383,7 +383,7 @@ spec =
mixed_dates = [Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40, Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40, Date.new 2000]
df = Table.new [['ord', ord], ['ints', ints], ['reals', reals], ['bools', bools], ['texts', texts], ['objs', objs], ['dates', dates], ['times', times], ['datetimes', datetimes], ['mixed_dates', mixed_dates]]
r = df.order_by (Sort_Column_Selector.By_Name ['ord'])
r = df.order_by (['ord'])
r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4]
df.at 'ints' . to_vector . should_equal ints
@ -410,23 +410,23 @@ spec =
df.at 'datetimes' . to_vector . should_equal datetimes
# TODO move this test to Common_Table_Operations once we support dates there
r2 = df.order_by (Sort_Column_Selector.By_Name ['dates'])
r2 = df.order_by (['dates'])
r2.at 'dates' . to_vector . should_equal [Date.new 1999, Date.new 1999 12 31, Date.new 2000 2 7, Date.new 2000 10 3, Date.new 2020]
r2.at 'ints' . to_vector . should_equal [2, 4, 5, 3, 1]
r3 = df.order_by (Sort_Column_Selector.By_Name ['times'])
r3 = df.order_by (['times'])
r3.at 'times' . to_vector . should_equal [Time_Of_Day.new 1 30 40, Time_Of_Day.new 10 20 30, Time_Of_Day.new 12, Time_Of_Day.new 12 30 0, Time_Of_Day.new 23 59 59]
r3.at 'ints' . to_vector . should_equal [2, 5, 1, 4, 3]
r4 = df.order_by (Sort_Column_Selector.By_Name ['datetimes'])
r4 = df.order_by (['datetimes'])
r4.at 'datetimes' . to_vector . should_equal [Date_Time.new 1999 1 1 1 30 40, Date_Time.new 1999 12 31 12 30 0, Date_Time.new 2000 10 3 10 20 30, Date_Time.new 2000 10 3 23 59 59, Date_Time.new 2020 1 1 12]
r4.at 'ints' . to_vector . should_equal [2, 4, 5, 3, 1]
r5 = df.order_by (Sort_Column_Selector.By_Name ['objs'])
r5 = df.order_by (['objs'])
r5.at 'objs' . to_vector . should_equal [My.Data 2 3, My.Data 6 7, My.Data 8 9, My.Data 10 30, My.Data 100 2]
r5.at 'ints' . to_vector . should_equal [2, 3, 4, 5, 1]
r6 = df.order_by (Sort_Column_Selector.By_Name ['mixed_dates'])
r6 = df.order_by (['mixed_dates'])
r6 . should_fail_with Incomparable_Values
Test.group "Sorting Columns" <|
@ -663,7 +663,7 @@ spec =
texts = ["texts", ['ściana', 'ściana', 'łąka', 's\u0301ciana', 'ła\u0328ka', 'sciana']]
ints = ["ints", [1, 2, 4, 8, 16, 32]]
table = Table.new [texts, ints]
r1 = table.aggregate [Group_By "texts", Sum "ints"] . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "texts"])
r1 = table.aggregate [Group_By "texts", Sum "ints"] . order_by ([Sort_Column.Name "texts"])
r1.at "texts" . to_vector . should_equal ['sciana', 'ściana', 'łąka']
r1.at "Sum ints" . to_vector . should_equal [32, 11, 20]