Removing old functions and tidy up of Table types (#3519)

- Removed `select` method.
- Removed `group` method.
- Removed `Aggregate_Table` type.
- Removed `Order_Rule` type.
- Removed `sort` method from Table.
- Expanded comments on `order_by`.
- Update comment on `aggregate` on Database.
- Update Visualisation to use new APIs.
- Updated Data Science examples to use new APIs.
- Moved Examples test out of Tests to own test.

# Important Notes
Need to get Examples_Tests added to CI.
This commit is contained in:
James Dunkerley 2022-06-14 14:37:20 +01:00 committed by GitHub
parent e83c36d9d6
commit a0c6fa9c96
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 215 additions and 841 deletions

View File

@ -286,6 +286,7 @@ jobs:
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Image_Tests
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Examples_Tests
- name: Compile the Standard Libraries (Unix)
shell: bash
@ -311,6 +312,7 @@ jobs:
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Image_Tests
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Examples_Tests
- name: Test Engine Distribution Without Caches (Windows)
shell: bash
@ -321,6 +323,7 @@ jobs:
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Image_Tests
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Examples_Tests
- name: Compile the Standard Libraries (Windows)
shell: bash
@ -346,6 +349,7 @@ jobs:
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Image_Tests
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Examples_Tests
# Publish
- name: Compress the built artifacts for upload

View File

@ -139,6 +139,8 @@
API and added builders for customizing less common settings.][3516]
- [Allow control of sort direction in `First` and `Last` aggregations.][3517]
- [Implemented `Text.write`, replacing `File.write_text`.][3518]
- [Removed obsolete `select`, `group`, `sort` and releated types from tables.]
[3519]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -219,6 +221,7 @@
[3516]: https://github.com/enso-org/enso/pull/3516
[3517]: https://github.com/enso-org/enso/pull/3517
[3518]: https://github.com/enso-org/enso/pull/3518
[3519]: https://github.com/enso-org/enso/pull/3519
#### Enso Compiler

View File

@ -24,8 +24,8 @@ component-groups:
- Standard.Base.Join:
exports:
- Standard.Database.Data.Table.Table.join
- Standard.Database.Data.Table.Table.group
- Standard.Database.Data.Table.Table.aggregate
- Standard.Base.Transform:
exports:
- Standard.Database.Data.Table.Table.sort
- Standard.Database.Data.Table.Table.order_by
- Standard.Database.Data.Column.Column.to_table

View File

@ -4,6 +4,8 @@ import Standard.Database.Data.Internal.Helpers
import Standard.Database.Data.Internal.IR
import Standard.Database.Data.Table
import Standard.Table.Data.Column as Materialized_Column
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
from Standard.Database.Data.Sql import Sql_Type
from Standard.Database.Data.Table import Integrity_Error
@ -458,12 +460,11 @@ type Column
column.sort
> Example
Sorting `column` in descending order, placing missing values at the
top of the resulting column.
column.sort order=Sort_Direction.Descending missing_last=False
sort : Sort_Direction -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column
sort order=Sort_Direction.Ascending missing_last=True =
this.to_table.sort by=this order=order missing_last=missing_last . at this.name
Sorting `column` in descending order.
column.sort order=Sort_Direction.Descending
sort : Sort_Direction -> Column
sort order=Sort_Direction.Ascending =
this.to_table.order_by (Sort_Column_Selector.By_Column [Sort_Column.Column this order]) . at this.name
## UNSTABLE
@ -637,4 +638,3 @@ lift_aggregate new_name connection expected_type expr context =
new_ixes = cols.second
new_ctx = IR.subquery_as_ctx subquery . set_index new_ixes
Column new_name connection new_col.sql_type new_col.expression new_ctx

View File

@ -17,7 +17,6 @@ import Standard.Table.Internal.Aggregate_Column_Helper
from Standard.Database.Data.Column as Column_Module import Column, Aggregate_Column_Builder
from Standard.Database.Data.Internal.IR import Internal_Column
from Standard.Table.Data.Table import No_Such_Column_Error
from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
@ -356,7 +355,7 @@ type Table
Since this Table is backed by an SQL database, the Table returned by the
`limit` method is deterministic only if the Table has been ordered (using
the `sort` method).
the `order_by` method).
Otherwise, no order is imposed, so the returned Table will include at most
`max_rows` rows, but there are no guarantees on which rows will be
@ -365,7 +364,7 @@ type Table
Table is materialized.
The limit is applied at the very end, so the new Table behaves exactly as
the old one, just limitting its results when being materialized.
the old one, just limiting its results when being materialized.
Specifically, applying further filters will still apply to the whole
result set and the limit will be taken after applying these filters.
@ -373,7 +372,7 @@ type Table
In the call below, assuming that the table of `t1` contains rows for
numbers 1, 2, ..., 10, will return rows starting from 6 and not an empty
result as one could expect if the limit was applied before the filters.
t1 = table.sort by='A' . limit 5
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . limit 5
t2 = t1.where (t1.at 'A' > 5)
t2.to_dataframe
limit : Integer -> Table
@ -481,6 +480,28 @@ type Table
descending order.
table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -7 Sort_Direction.Descending])
> Example
Sorting `table` in ascending order by the value in column `'Quantity'`.
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity'])
> Example
Sorting `table` in descending order by the value in column `'Quantity'`.
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity' Sort_Direction.Descending])
> Example
Sorting `table` in ascending order by the value in column `'Quantity'`,
using the value in column `'Rating'` for breaking ties.
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating'])
> Example
Sorting `table` in ascending order by the value in column `'Quantity'`,
using the value in column `'Rating'` in descending order for breaking
ties.
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating' Sort_Direction.Descending])
order_by : Sort_Column_Selector -> Text_Ordering -> Problem_Behavior -> Table
order_by (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (this.columns.at 0 . name))])) text_ordering=Text_Ordering on_problems=Report_Warning = Panic.handle_wrapped_dataflow_error <|
problem_builder = Problem_Builder.new
@ -497,91 +518,6 @@ type Table
new_ctx = this.context.add_orders new_order_descriptors
this.updated_context new_ctx
## UNSTABLE
Sorts the table according to the specified rules.
Arguments:
- by: Specifies the columns used for reordering the table. This
argument may be one of:
- a text: The text is treated as a column name.
- a column: Any column, which is an expression computed from this
table.
- an order rule: Specifies both the sorting column and additional
settings, that will take precedence over the global parameters of
this sort operation. The `column` field of the rule may be a text
or a column, with the semantics described above.
- a vector of any of the above: This will result in a hierarchical
sorting, such that the first rule is applied first, the second is
used for breaking ties, etc.
- order: Specifies the default sort order for this operation. All the
rules specified in the `by` argument will default to this setting,
unless specified in the rule.
- missing_last: Specifies the default placement of missing values when
compared to non-missing ones. This setting may be overridden by the
particular rules of the `by` argument. Note thet this argument is
independent from `order`, i.e. missing values will always be sorted
according to this rule, ignoring the ascending / descending setting.
> Example
Sorting `table` in ascending order by the value in column `'Quantity'`
table.sort by='Quantity'
> Example
Sorting `table` in descending order by the value in column `'Quantity'`,
placing missing values at the top of the table.
table.sort by='Quantity' order=Sort_Direction.Descending missing_last=False
> Example
Sorting `table` in ascending order by the value in column `'Quantity'`,
using the value in column `'Rating'` for breaking ties.
table.sort by=['Quantity', 'Rating']
> Example
Sorting `table` in ascending order by the value in column `'Quantity'`,
using the value in column `'Rating'` in descending order for breaking
ties.
table.sort by=['Quantity', Order_Rule 'Rating' (order=Sort_Direction.Descending)]
> Example
Sorting `table` in ascending order by the value in an externally
computed column, using the value in column `'Rating'` for breaking
ties.
quality_ratio = table.at 'Rating' / table.at 'Price'
table.sort by=[quality_ratio, 'Rating']
sort : Text | Column | Order_Rule | Vector.Vector (Text | Column | Order_Rule) -> Sort_Direction -> Boolean -> Table
sort by order=Sort_Direction.Ascending missing_last=True = Panic.recover Any <|
missing_to_ir last = case last of
True -> IR.Nulls_Last
False -> IR.Nulls_First
wrap_elem elem =
IR.Order_Descriptor (this.resolve elem . expression) order (missing_to_ir missing_last) collation=Nothing
to_ir elem = case elem of
Text -> wrap_elem elem
Column _ _ _ _ _ -> wrap_elem elem
Order_Rule elem Nothing my_order my_nulls ->
chosen_order = my_order.if_nothing order
chosen_nulls = my_nulls.if_nothing missing_last
IR.Order_Descriptor (this.resolve elem . expression) chosen_order (missing_to_ir chosen_nulls) collation=Nothing
Order_Rule _ _ _ _ ->
Error.throw <| Unsupported_Database_Operation_Error "Custom comparators are not supported in Database"
elems = Helpers.unify_vector_singleton by . map to_ir
new_ctx = this.context.set_orders elems
this.updated_context new_ctx
## UNSTABLE
Selects a subset of columns from this table by name.
Arguments:
- columns: The names of the columns to select from the table.
select : Vector Text -> Table
select columns =
candidates = this.internal_columns + this.context.meta_index
find_col name = candidates.find (p -> p.name == name)
selected_cols = columns.map (find_col >> .catch) . filter (c -> c.is_nothing.not)
this.updated_columns selected_cols
## UNSTABLE
Efficiently joins two tables based on either the index or a key column.
@ -675,28 +611,31 @@ type Table
Table new_table_name this.connection new_columns new_ctx
## UNSTABLE
## ALIAS group, summarize
Returns an aggregate table resulting from grouping the elements by the
value of the specified column.
Aggregates the rows in a table using any `Group_By` entries in columns.
The columns argument specifies which additional aggregations to perform and to return.
Arguments:
- by: The column names on which to group. If this is not set, the index
will be used for grouping instead.
group : Vector Text | Text | Nothing -> Aggregate_Table
group by=Nothing = Panic.recover Any <|
cols = case by of
Nothing ->
if this.context.meta_index.is_empty then Panic.throw <| Illegal_State_Error "Trying to group by an empty index." else
this.context.meta_index
_ ->
- columns: Vector of `Aggregate_Column` specifying the aggregated table.
- on_problems: Specifies how to handle problems if they occur, reporting
them as warnings by default.
Helpers.unify_vector_singleton by . map (this.resolve >> .as_internal)
exprs = cols.map .expression
new_ctx = this.context.set_groups exprs . set_index cols
Aggregate_Table this.name this.connection this.internal_columns new_ctx
The following problems can occur:
- If a column name is not in the input table, a `Missing_Input_Columns`.
- If a column index is out of range, a `Column_Indexes_Out_Of_Range`.
- If there are no valid columns in the output table, a `No_Output_Columns`.
- If there are invalid column names in the output table, a `Invalid_Output_Column_Names`.
- If there are duplicate column names in the output table, a `Duplicate_Output_Column_Names`.
- If grouping on or computing the `Mode` on a floating point number, a `Floating_Point_Grouping`.
- If an aggregation fails, an `Invalid_Aggregation_Method`.
- If when concatenating values there is an quoted delimited, an `Unquoted_Delimiter`
- If there are more than 10 issues with a single column, an `Additional_Warnings`.
## Prototype Group By function
> Example
Group by the Key column, count the rows
table.aggregate [Group_By "Key", Count Nothing]
aggregate : [Aggregate_Column] -> Problem_Behavior -> Table
aggregate columns (on_problems=Report_Warning) =
validated = Aggregate_Column_Helper.prepare_aggregate_columns columns this
@ -980,70 +919,6 @@ type Table
# TODO This should ideally be done in a streaming manner, or at least respect the row limits.
this.to_dataframe.write path format on_existing_file column_mapping on_problems
## Represents a table with grouped rows.
type Aggregate_Table
## UNSTABLE
Represents a table with grouped rows.
Arguments:
- name: The name of the table.
- connection: The connection with which the table is associated.
- internal_columns: The internal representation of the table columns.
- context: The context associated with this table.
# type Aggregate_Table (name : Text) (connection : Connection)
# (internal_columns : Vector [Text, IR.Expression])
# (context : IR.Context)
type Aggregate_Table name connection internal_columns context
## UNSTABLE
Returns a vector of aggregate columns in this table.
columns : Vector.Vector
columns = this.internal_columns . map this.make_column
## UNSTABLE
Returns a column containing the number of elements in each group.
count : Column
count =
expr = IR.Operation "COUNT_ROWS" []
# new_name connection expected_type expr context
Column_Module.lift_aggregate "count" this.connection Sql.Sql_Type.integer expr this.context
## UNSTABLE
Returns an aggregate column with the given name, contained in this table.
Arguments:
- name: The name of the aggregate column to get from the aggregate table.
at : Text -> Column ! No_Such_Column_Error
at name =
internal = this.internal_columns.find (p -> p.name == name)
this.make_column internal . map_error (_ -> No_Such_Column_Error name)
## PRIVATE
Helper to create aggregate columns from internal columns.
Arguments:
- internal: The internal column to make into an aggregate column.
make_column : Internal_Column -> Aggregate_Column_Builder
make_column internal =
Aggregate_Column_Builder internal.name this.connection internal.sql_type internal.expression this.context
## PRIVATE
Helper that returns the underlying table from before grouping.
ungrouped : Table
ungrouped =
new_ctx = this.context.set_groups []
new_cols = this.internal_columns.filter col->
turned_into_index = this.context.meta_index.exists i-> i.name == col.name
turned_into_index.not
Table this.name this.connection new_cols new_ctx
type Integrity_Error
## UNSTABLE

View File

@ -10,6 +10,3 @@ export Standard.Database.Connection.Connection
from Standard.Database.Connection.Database export all
import Standard.Table.Data.Table
import Standard.Table.Data.Order_Rule
from Standard.Table.Data.Table export No_Such_Column_Error
from Standard.Table.Data.Order_Rule export Order_Rule

View File

@ -278,14 +278,6 @@ transactions_table : Table.Table
transactions_table =
(Enso_Project.data / "food_shop_transactions.csv") . read
## An aggregate table for the relevant examples.
aggregate_table : Table.Aggregate_Table
aggregate_table =
transactions = here.transactions_table
item_names = here.inventory_table.at "item_name"
with_names = transactions.join item_names on="item_id"
with_names.group by="item_name"
## An example regex match.
match : Default_Engine.Match
match =

View File

@ -49,10 +49,12 @@
break ties in descending order.
import Standard.Examples
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
example_sort =
table = Examples.inventory_table
table.sort by=["total_stock", "sold_stock"] order=Sort_Direction.Descending
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock", Sort_Column.Name "sold_stock" Sort_Direction.Descending])
> Example
Compute the number of transactions that each item has participated in, as

View File

@ -11,9 +11,10 @@
Get the item name and price columns from the shop inventory.
import Standard.Examples
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name
example_select =
Examples.inventory_table.select ["item_name", "price"]
Examples.inventory_table.select_columns (By_Name ["item_name", "price"])
> Example
Remove any rows that contain missing values from the table.

View File

@ -24,14 +24,15 @@
example_map = Examples.integer_column.map (x -> x * x)
> Example
Sort the shop inventory based on the per-item price in descending order and
placing missing values at the top of the table.
Sort the shop inventory based on the per-item price in descending order.
import Standard.Examples
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
example_sort =
table = Examples.inventory_table
table.sort by="price" order=Sort_Direction.Descending missing_last=false
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price" Sort_Direction.Descending])
> Example
Add two columns to each other.

View File

@ -29,10 +29,10 @@ component-groups:
- Standard.Base.Join:
exports:
- Standard.Table.Data.Table.Table.join
- Standard.Table.Data.Table.Table.group
- Standard.Table.Data.Table.Table.aggregate
- Standard.Base.Transform:
exports:
- Standard.Table.Data.Table.Table.sort
- Standard.Table.Data.Table.Table.order_by
- Standard.Table.Data.Table.Table.to_csv
- Standard.Table.Data.Column.Column.to_table
- Standard.Base.Output:

View File

@ -996,14 +996,15 @@ type Column
Examples.decimal_column.sort comparator=my_comparator
sort : Sort_Direction -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column
sort order=Sort_Direction.Ascending missing_last=True comparator=Nothing =
comparator_to_java cmp x y = cmp x y . to_sign
order_bool = case order of
Sort_Direction.Ascending -> True
Sort_Direction.Descending -> False
java_cmp = case comparator of
Nothing -> Nothing
cmp -> Table.comparator_to_java cmp
cmp -> comparator_to_java cmp
rule = OrderBuilder.OrderRule.new this.java_column java_cmp order_bool missing_last
fallback_cmp = Table.comparator_to_java .compare_to
fallback_cmp = comparator_to_java .compare_to
mask = OrderBuilder.buildOrderMask [rule].to_array fallback_cmp
new_col = this.java_column.applyMask mask
Column new_col

View File

@ -1,31 +0,0 @@
from Standard.Base import all
type Order_Rule
## UNSTABLE
A rule used for sorting table-like structures.
Arguments:
- column: a value representing the data dimension by which this rule is
sorting. This type does not specify the underlying representation of a
column, assuming that the sorting engine defines its own column
representation.
- comparator: a function taking two elements of the data being sorted on
and returning an `Ordering`. The function may be `Nothing`, in which
case a natural ordering will be used. Note that certain table backends
(such us database connectors) may not support this field being set to a
non-`Nothing` value.
- order: specifies whether the table should be sorted in an ascending or
descending order. The default value of `Nothing` delegates the decision
to the sorting function. Can be set to `Sort_Direction.Ascending` or
`Sort_Direction.Descending` from the `Base` library, to specify the
ordering.
- missing_last: whether the missing values should be placed at the
beginning or end of the sorted table. Note that this argument is
independent from `order`, i.e. missing values will always be sorted
according to this rule, ignoring the ascending / descending setting.
The default value of `Nothing` delegates the decision to the sorting
function.
type Order_Rule column comparator=Nothing order=Nothing missing_last=Nothing

View File

@ -14,7 +14,6 @@ import Standard.Table.Internal.Parse_Values_Helper
import Standard.Table.Internal.Delimited_Reader
import Standard.Table.Internal.Problem_Builder
from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index
from Standard.Table.Data.Column_Type_Selection as Column_Type_Selection_Module import Column_Type_Selection, Auto
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
@ -32,9 +31,9 @@ import Standard.Base.Data.Ordering.Comparator
polyglot java import org.enso.table.data.table.Table as Java_Table
polyglot java import org.enso.table.data.table.Column as Java_Column
polyglot java import org.enso.table.operations.OrderBuilder
polyglot java import org.enso.table.format.csv.Writer as Csv_Writer
polyglot java import org.enso.table.format.xlsx.Writer as Spreadsheet_Writer
polyglot java import org.enso.table.operations.OrderBuilder
polyglot java import java.io.StringReader
## Creates a new table from a vector of `[name, items]` pairs.
@ -498,7 +497,6 @@ type Table
new_names = this.columns.map mapper
this.take_end (this.length - 1) . rename_columns (Column_Mapping.By_Position new_names) on_problems=on_problems
## ALIAS group, summarize
Aggregates the rows in a table using any `Group_By` entries in columns.
@ -572,6 +570,44 @@ type Table
descending order.
table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -7 Sort_Direction.Descending])
> Example
Sorting the shop inventory based on the per-item price in ascending
order.
import Standard.Examples
example_sort = Examples.inventory_table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price"])
> Example
Sort the shop inventory based on the per-item price in descending order
import Standard.Examples
example_sort =
table = Examples.inventory_table
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price" Sort_Direction.Descending])
> Example
Sort the shop inventory based on the total stock, using the number sold
to break ties in descending order.
import Standard.Examples
example_sort =
table = Examples.inventory_table
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock" Sort_Direction.Descending, Sort_Column.Name "sold_stock" Sort_Direction.Descending])
> Example
Sort the shop inventory in ascending order by the total stock, using
the number of items sold in descending order to break ties.
import Standard.Examples
import Standard.Table
example_sort =
table = Examples.inventory_table
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock", Sort_Column.Name "sold_stock" Sort_Direction.Descending])
order_by : Sort_Column_Selector -> Text_Ordering -> Problem_Behavior -> Table
order_by (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (this.columns.at 0 . name))])) text_ordering=Text_Ordering on_problems=Report_Warning =
problem_builder = Problem_Builder.new
@ -749,20 +785,6 @@ type Table
Nothing -> Error.throw No_Index_Set_Error
i -> Column.Column i
## Alias Select Columns
Selects a subset of columns from this table by name.
> Example
Get the item name and price columns from the shop inventory.
import Standard.Examples
example_select =
Examples.inventory_table.select ["item_name", "price"]
select : Vector -> Table
select columns = Table (this.java_table.selectColumns columns.to_array)
## ALIAS Join Table
Efficiently joins two tables based on either the index or the specified
@ -876,183 +898,6 @@ type Table
cols = this.columns
here.new [["Column", cols.map .name], ["Items Count", cols.map .count], ["Storage Type", cols.map .storage_type]] . set_index "Column"
## ALIAS Group a Table
Returns an aggregate table resulting from grouping the elements by the
value of the specified column.
Arguments:
- by: The column in the table to perform grouping by. If this argument
is not set, the index is used for grouping instead.
> Example
Compute the number of transactions that each item has participated in,
as well as the number of each item sold across those transactions.
import Standard.Examples
import Standard.Table
example_group =
transactions = Examples.transactions_table
item_names = Examples.inventory_table.at "item_name"
aggregated = transactions.group by="item_id"
num_transactions = aggregated.at "transaction_id" . reduce .length . rename "transaction_count"
num_sold = aggregated.at "quantity" . reduce .sum . rename "num_sold"
Table.join [item_names, num_transactions, num_sold]
group : Text | Nothing -> Aggregate_Table
group by=Nothing =
Aggregate_Table (this.java_table.group by)
## ALIAS Sort Table
UNSTABLE
Sorts the table according to the specified rules.
Arguments:
- by: Specifies the columns used for reordering the table. This argument
may be one of:
- a text: The text is treated as a column name.
- a column: Any column, that may or may not belong to this table.
Sorting by a column will result in reordering the rows of this
table in a way that would result in sorting the given column.
- an order rule: Specifies both the sorting column and additional
settings, that will take precedence over the global parameters of
this sort operation. The `column` field of the rule may be a text
or a column, with the semantics described above.
- a vector of any of the above: This will result in a hierarchical
sorting, such that the first rule is applied first, the second is
used for breaking ties, etc.
- order: Specifies the default sort order for this operation. All the
rules specified in the `by` argument will default to this setting,
unless specified in the rule.
- missing_last: Specifies the default placement of missing values when
compared to non-missing ones. This setting may be overriden by the
particular rules of the `by` argument. Note thet this argument is
independent from `order`, i.e. missing values will always be sorted
according to this rule, ignoring the ascending / descending setting.
> Example
Sorting the shop inventory based on the per-item price in ascending
order.
import Standard.Examples
example_sort = Examples.inventory_table.sort by="price"
> Example
Sort the shop inventory based on the per-item price in descending order
and placing missing values at the top of the table.
import Standard.Examples
example_sort =
table = Examples.inventory_table
table.sort by="price" order=Sort_Direction.Descending missing_last=false
> Example
Sort the shop inventory based on the total stock, using the number sold
to break ties in descending order.
import Standard.Examples
example_sort =
table = Examples.inventory_table
table.sort by=["total_stock", "sold_stock"] order=Sort_Direction.Descending
> Example
Sort the shop inventory in ascending order by the total stock, using
the number of items sold in descending order to break ties.
import Standard.Examples
import Standard.Table
example_sort =
table = Examples.inventory_table
sold_stock_rule = Table.Order_Rule "sold_stock" order=Sort_Direction.Descending
table.sort by=["total_stock", sold_stock_rule]
> Example
Sorting the inventory in descending order based on the percentage of
the total stock sold, using the popularity of the product to break
ties.
import Standard.Examples
import Standard.Table
example_sort =
table = Examples.inventory_table.join Examples.popularity_table
percentage_sold = table.at "sold_stock" / table.at "total_stock"
table.sort by=[percentage_sold, "popularity"] order=Sort_Direction.Descending
> Example
Sort the inventory by the price using a custom comparator function.
import Standard.Examples
import Standard.Table
example_sort =
table = Examples.inventory_table
comparator a b = a.compare_to b*2
price_rule = Table.Order_Rule "price" comparator=comparator
table.sort by=price_rule
sort : Text | Column.Column | Order_Rule | Vector.Vector (Text | Column.Column | Order_Rule) -> Sort_Direction -> Boolean -> Table
sort by order=Sort_Direction.Ascending missing_last=True = Panic.recover Any <|
rules = this.build_java_order_rules by order missing_last
fallback_cmp = here.comparator_to_java .compare_to
mask = OrderBuilder.buildOrderMask rules.to_array fallback_cmp
new_table = this.java_table.applyMask mask
Table new_table
## PRIVATE
Transforms order rules from Enso into Java.
Arguments:
- rules: The rule(s) to convert.
- order: The sorting order.
- missing_last: Whether or not missing values should be ordered last.
build_java_order_rules : (Text | Column.Column. | Order_Rule | Vector (Text | Column.Column | Order_Rule)) -> Sort_Direction -> Boolean -> Vector
build_java_order_rules rules order missing_last = case rules of
Text -> [this.build_java_order_rule rules order missing_last]
Column.Column _ -> [this.build_java_order_rule rules order missing_last]
Order_Rule _ _ _ _ -> [this.build_java_order_rule rules order missing_last]
Vector.Vector _ -> rules.map (this.build_java_order_rule _ order missing_last)
## PRIVATE
Builds a java order rule.
Arguments:
- rule: The rule to convert.
- order: The sort order.
- missing_last: Whether or not missing values should be ordered last.
build_java_order_rule : (Text | Column.Column | Order_Rule) -> Sort_Direction -> Boolean -> OrderRule
build_java_order_rule rule order missing_last =
order_bool = case order of
Sort_Direction.Ascending -> True
Sort_Direction.Descending -> False
case rule of
Text ->
column = Panic.rethrow (this.at rule)
OrderBuilder.OrderRule.new column.java_column Nothing order_bool missing_last
Column.Column c ->
OrderBuilder.OrderRule.new c Nothing order_bool missing_last
Order_Rule col_ref cmp rule_order rule_nulls_last ->
c = case col_ref of
Text -> this.at col_ref . java_column
Column.Column c -> c
o = case rule_order of
Nothing -> order_bool
Sort_Direction.Ascending -> True
Sort_Direction.Descending -> False
nulls = case rule_nulls_last of
Nothing -> missing_last
_ -> rule_nulls_last
java_cmp = case cmp of
Nothing -> Nothing
c -> here.comparator_to_java c
OrderBuilder.OrderRule.new c java_cmp o nulls
## UNSTABLE
Concatenates `other` to `this`.
@ -1414,86 +1259,6 @@ Text.write_to_spreadsheet cell = cell.setCellValue this
which should be set by this method.
Date.write_to_spreadsheet cell = cell.setCellValue this.internal_local_date
## Represents a table with grouped rows.
type Aggregate_Table
## PRIVATE
A table type with grouped rows.
Arguments:
- java_table: The internal representation of the table.
type Aggregate_Table java_table
## Returns a vector of aggregate columns in this table.
> Example
Get a vector of aggregate columns from this table.
import Standard.Examples
example_columns = Examples.aggregate_table.columns
columns : Vector.Vector
columns = Vector.Vector this.java_table.getColumns . map Column.Aggregate_Column
## Returns a table containing columns resulting from calling `values` on
each column in `this`.
> Example
Get the values table from an aggregate table.
import Standard.Examples
example_values = Examples.aggregate_table.values
values : Table
values = this.columns . map (_.values name_suffix='') . reduce .join
## Returns a column containing the number of elements in each group of the
aggregate table.
> Examples
Get the counts for an aggregate table.
import Standard.Examples
example_count = Examples.aggregate_table.count
count : Column
count = Column.Column this.java_table.count
## ALIAS Get a Column
Returns an aggregate column with the given name, contained in this table.
Arguments:
- name: The name of the aggregate column to get.
> Example
Get the transaction ids column from the aggregate table.
import Standard.Examples
example_at = Examples.aggregate_table.at "transaction_id"
at : Text -> Column ! No_Such_Column_Error
at name = case this.java_table.getColumnByName name of
Nothing -> Error.throw (No_Such_Column_Error name)
c -> Column.Aggregate_Column c
## Prints an ASCII-art table with this data to the standard output.
Arguments:
- show_rows: the number of initial rows that should be displayed.
> Example
Pretty-print and display an aggregate table in the console.
import Standard.Examples
example_print = Examples.aggregate_table.print
print : Integer -> Nothing
print show_rows=10 = this.values.print show_rows
## UNSTABLE
An error returned when a non-existent column is being looked up.
@ -1588,17 +1353,6 @@ print_table header rows indices_count format_term =
" " + y
([" " + header_line, divider] + row_lines).join '\n'
## PRIVATE
Wraps the Enso comparator function so it's usable in Java.
Arguments:
- cmp: The Enso comparator function.
- x: The left operand to the comparator.
- y: The right operand to the comparator.
comparator_to_java : (Any -> Any -> Ordering) -> Any -> Any -> Integer
comparator_to_java cmp x y = cmp x y . to_sign
Table.from (that : Text) (format:File_Format.Delimited|File_Format.Fixed_Width = File_Format.Delimited '\t') (on_problems:Problem_Behavior=Report_Warning) =
java_reader = StringReader.new that
Delimited_Reader.read_from_reader format java_reader on_problems

View File

@ -1,6 +1,5 @@
from Standard.Base import all
from Standard.Table.Data.Table as Table_Module import No_Such_Column_Error
from Standard.Table.Data.Column as Column_Module import Column
from Standard.Table.Data.Aggregate_Column import all
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Name, By_Index, By_Column

View File

@ -7,7 +7,6 @@ import Standard.Table.Io.Spreadsheet
import Standard.Table.Io.Spreadsheet_Write_Mode
import Standard.Table.Data.Table
import Standard.Table.Data.Column
import Standard.Table.Data.Order_Rule
import Standard.Table.Model
from Standard.Table.Io.Excel export Excel_Section, Excel_Range, read_excel
@ -19,7 +18,6 @@ export Standard.Table.Model
export Standard.Table.Io.File_Read
from Standard.Table.Data.Table export new, from_rows, join, concat, No_Such_Column_Error, Table
from Standard.Table.Data.Order_Rule export Order_Rule
## ALIAS To Table

View File

@ -1,4 +1,5 @@
from Standard.Base import all
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name
import Standard.Table.Data.Table as Dataframe_Table
import Standard.Table.Data.Column as Dataframe_Column
@ -31,8 +32,8 @@ prepare_visualization x max_rows = Helpers.recover_errors <| case x of
# Materialize a table with indices as normal columns (because dataframe does not support multi-indexing).
df = x.reset_index.to_dataframe max_rows
# Then split into actual columns and indices.
vis_df = df.select (x.columns.map .name)
indices = df.select (x.indices.map .name) . columns
vis_df = df.select_columns (By_Name (x.columns.map .name))
indices = df.select_columns (By_Name (x.indices.map .name)) . columns
all_rows_count = x.row_count
here.make_json vis_df indices all_rows_count
@ -43,14 +44,9 @@ prepare_visualization x max_rows = Helpers.recover_errors <| case x of
here.prepare_visualization x.to_table max_rows
# We display aggregates as their ungrouped counterparts.
Dataframe_Table.Aggregate_Table _ ->
ungrouped = Dataframe_Table.Table x.java_table.getUnderlyingTable
here.prepare_visualization ungrouped max_rows
Dataframe_Column.Aggregate_Column _ ->
ungrouped = Dataframe_Column.Column x.java_column.getColumn
here.prepare_visualization ungrouped.to_table max_rows
Database_Table.Aggregate_Table _ _ _ _ ->
here.prepare_visualization x.ungrouped max_rows
Database_Column.Aggregate_Column_Builder _ _ _ _ _ ->
here.prepare_visualization x.ungrouped.to_table max_rows

View File

@ -10,7 +10,6 @@ import org.enso.table.data.index.HashIndex;
import org.enso.table.data.index.Index;
import org.enso.table.data.index.MultiValueIndex;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.table.aggregate.AggregateTable;
import org.enso.table.data.table.problems.AggregatedProblems;
import org.enso.table.error.NoSuchColumnException;
import org.enso.table.error.UnexpectedColumnTypeException;
@ -468,11 +467,6 @@ public class Table {
return new Table(newColumns, index);
}
public AggregateTable group(String by) {
Table t = by == null ? this : indexFromColumn(by);
return new AggregateTable(t);
}
/** @return a copy of the Column containing a slice of the original data */
public Table slice(int offset, int limit) {
Column[] newColumns = new Column[columns.length];

View File

@ -1,58 +0,0 @@
package org.enso.table.data.table.aggregate;
import org.enso.table.data.column.operation.aggregate.Aggregator;
import org.enso.table.data.index.Index;
import org.enso.table.data.table.Column;
import java.util.List;
import java.util.function.Function;
import java.util.stream.IntStream;
/** A column wrapper used for aggregation operations. */
public class AggregateColumn {
private final Index uniqueIndex;
private final Column column;
/**
* Creates a new column
*
* @param uniqueIndex the unique index obtained from the column's index
* @param column the wrapped column
*/
public AggregateColumn(Index uniqueIndex, Column column) {
this.uniqueIndex = uniqueIndex;
this.column = column;
}
/**
* Aggregates the groups using a given aggregation operation.
*
* @param aggName name of a vectorized operation that can be used if possible. If null is passed,
* this parameter is unused.
* @param outSuffix a string appended to the name of the resulting column.
* @param aggregatorFunction the function to use if a vectorized operation is not available.
* @param skipNa whether missing values should be passed to the {@code fallback} function.
* @return a column indexed by the unique index of this aggregate, storing results of applying the
* specified operation.
*/
public Column aggregate(
String aggName,
String outSuffix,
Function<List<Object>, Object> aggregatorFunction,
boolean skipNa) {
Aggregator aggregator =
column.getStorage().getAggregator(aggName, aggregatorFunction, skipNa, uniqueIndex.size());
for (int i = 0; i < uniqueIndex.size(); i++) {
IntStream ixes =
column.getIndex().loc(uniqueIndex.iloc(i)).stream().mapToInt(Integer::intValue);
aggregator.nextGroup(ixes);
}
return new Column(column.getName() + outSuffix, uniqueIndex, aggregator.seal());
}
/** @return the underlying (ungrouped) column. */
public Column getColumn() {
return column;
}
}

View File

@ -1,59 +0,0 @@
package org.enso.table.data.table.aggregate;
import org.enso.table.data.column.storage.LongStorage;
import org.enso.table.data.index.Index;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import java.util.Arrays;
import java.util.List;
/** Represents a table grouped by a given index. */
public class AggregateTable {
private final Table table;
private final Index uniqueIndex;
/** @param table the underlying table */
public AggregateTable(Table table) {
this.table = table;
this.uniqueIndex = table.getIndex().unique();
}
/** @return a column containing group sizes in this aggregate. */
public Column count() {
long[] counts = new long[uniqueIndex.size()];
for (int i = 0; i < uniqueIndex.size(); i++) {
List<Integer> items = table.getIndex().loc(uniqueIndex.iloc(i));
counts[i] = items == null ? 0 : items.size();
}
LongStorage storage = new LongStorage(counts);
return new Column("count", uniqueIndex, storage);
}
/**
* Returns a column with the given name.
*
* @param n the column name
* @return column with the given name or null if does not exist
*/
public AggregateColumn getColumnByName(String n) {
Column c = table.getColumnByName(n);
if (c == null) {
return null;
} else {
return new AggregateColumn(uniqueIndex, c);
}
}
/** @return Aggregate columns contained in this table. */
public AggregateColumn[] getColumns() {
return Arrays.stream(table.getColumns())
.map(c -> new AggregateColumn(uniqueIndex, c))
.toArray(AggregateColumn[]::new);
}
/** @return the underlying (ungrouped) table. */
public Table getUnderlyingTable() {
return table;
}
}

View File

@ -0,0 +1 @@
This is a set of tests for the `Examples` library for Enso.

View File

@ -0,0 +1,7 @@
name: Tests
namespace: enso_dev
enso-version: default
version: 0.0.1
license: MIT
author: enso-dev@enso.org
maintainer: enso-dev@enso.org

View File

@ -112,17 +112,11 @@ spec = Test.group "Examples" <|
Examples.text_column_1
Examples.text_column_2
Test.specify "should provide an aggregate column" <|
Examples.aggregate_column
Test.specify "should provide various example tables" <|
Examples.inventory_table
Examples.popularity_table
Examples.transactions_table
Test.specify "should provide an aggregate table" <|
Examples.aggregate_table
Test.specify "should provide an example of a regex match" <|
match = Examples.match
match.groups.length . should_equal 5

View File

@ -0,0 +1,8 @@
from Standard.Base import all
import Standard.Test
import project.Examples_Spec
main = Test.Suite.run_main <|
Examples_Spec.spec

View File

@ -830,7 +830,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te
table = table_builder [["A", ["foo", "bar", "foo", "foo"]], ["B", ["a", "b", "c", "d"]]]
result = table.aggregate [Group_By "A", (Concatenate "B" prefix="[[" suffix="]]" separator="; ")]
result.row_count . should_equal 2
materialized = materialize result . sort "A"
materialized = materialize result . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"])
materialized.columns.length . should_equal 2
materialized.columns.at 0 . name . should_equal "A"
materialized.columns.at 0 . to_vector . should_equal ["bar", "foo"]
@ -910,14 +910,14 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te
r1 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=True)]
r1.row_count . should_equal 2
m1 = materialize r1 . sort "G"
m1 = materialize r1 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"])
m1.columns.length . should_equal 2
m1.columns.first.to_vector . should_equal ["bar", "foo"]
m1.columns.second.to_vector . should_equal [0, 1]
r2 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=False)]
r2.row_count . should_equal 2
m2 = materialize r2 . sort "G"
m2 = materialize r2 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"])
m2.columns.length . should_equal 2
m2.columns.first.to_vector . should_equal ["bar", "foo"]
m2.columns.second.to_vector . should_equal [1, 2]
@ -959,7 +959,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te
r2 = table.aggregate [Group_By "G", Average "X"]
r2.row_count.should_equal 2
m2 = materialize r2 . sort "G"
m2 = materialize r2 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"])
m2.columns.length . should_equal 2
m2.columns.first.to_vector . should_equal ["a", "b"]
m2.columns.second.to_vector . should_equal [0.5, 1]
@ -1145,7 +1145,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te
table = table_builder [["A", [1, 1, 2, 1]], ["B", [3, 2, 2, 3]], ["C", [11, 12, 13, 14]]]
grouped = table.aggregate [Group_By "B", Group_By "A"]
grouped.row_count . should_equal 3
materialized = materialize grouped . sort ["A", "B"]
materialized = materialize grouped . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B"])
materialized.columns.length . should_equal 2
materialized.columns.at 1 . name . should_equal "A"
materialized.columns.at 1 . to_vector . should_equal [1, 1, 2]

View File

@ -1,14 +1,19 @@
from Standard.Base import all
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
import project.Database.Helpers.Fake_Test_Connection
import Standard.Database.Data.Dialect
import Standard.Database.Data.Table as Table_Module
import Standard.Test
import Standard.Test.Problems
from Standard.Table.Data.Aggregate_Column import all
from Standard.Database import all
from Standard.Database.Data.Sql import Sql_Type
from Standard.Table import No_Such_Column_Error, Order_Rule
from Standard.Table import No_Such_Column_Error
from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
spec =
@ -38,7 +43,7 @@ spec =
Test.group "[Codegen] Basic Select" <|
Test.specify "should select columns from a table" <|
t1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1"', []]
t2 = t1.select ["C", "B", "undefined"]
t2 = t1.select_columns (By_Name ["C", "B", "undefined"]) reorder=True
t2.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B" FROM "T1" AS "T1"', []]
foo = t1.at "A" . rename "FOO"
@ -47,7 +52,7 @@ spec =
t3 = t2.set "bar" foo
t3.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B", "T1"."A" AS "bar" FROM "T1" AS "T1"', []]
Test.specify "should fail if at is called for a nonexisting column" <|
Test.specify "should fail if at is called for a non-existent column" <|
t1.at "undefined" . should_fail_with No_Such_Column_Error
Test.specify "should allow to limit the amount of returned results" <|
@ -55,7 +60,7 @@ spec =
t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" LIMIT 5', []]
Test.specify "should work correctly when there are no columns" <|
empty = t1.select []
empty = t1.select_columns (By_Name [])
json = Json.from_pairs [["query", Nothing], ["message", "The table has no columns so a query cannot be generated."]]
empty.to_json . should_equal json
empty.columns.length . should_equal 0
@ -146,28 +151,26 @@ spec =
Test.group "[Codegen] Sorting" <|
Test.specify "should allow sorting by a single column name" <|
r1 = t1.sort by="A" . at "B"
r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST', []]
r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . at "B"
r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC', []]
r2 = t1.sort by="B" missing_last=False order=Sort_Direction.Descending . at "A"
r2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" ORDER BY "T1"."B" DESC NULLS FIRST', []]
r2 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "B" Sort_Direction.Descending]) . at "A"
r2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" ORDER BY "T1"."B" DESC', []]
Test.specify 'should allow sorting by multiple column names' <|
r1 = t1.sort by=['A', 'B']
r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" ASC NULLS LAST', []]
Test.specify 'should allow sorting by expressions' <|
sum = t1.at 'A' + t1.at 'B'
r1 = t1.sort by=sum . at "C"
r1.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY ("T1"."A" + "T1"."B") ASC NULLS LAST', []]
r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'A', Sort_Column.Name 'B'])
r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC, "T1"."B" ASC', []]
Test.specify 'should allow sorting with specific by-column rules' <|
r1 = t1.sort by=['A', (Order_Rule 'B' order=Sort_Direction.Descending)]
r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" DESC NULLS LAST', []]
r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B" Sort_Direction.Descending])
r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC, "T1"."B" DESC', []]
Test.specify 'should return dataflow error when passed a non-existent column' <|
r = t1.sort by='foobar'
r.should_fail_with No_Such_Column_Error
Test.specify 'should return warnings and errors when passed a non-existent column' <|
action = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_
tester table =
table.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1"', []]
problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected]
Problems.test_problem_handling action problems tester
Test.group "Helpers" <|
Test.specify "combine_names should combine lists of names" <|

View File

@ -1,7 +1,14 @@
from Standard.Base import all
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name
from Standard.Database import all
import Standard.Table.Data.Table as Materialized_Table
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
import Standard.Test
import Standard.Test.Problems
from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
import project.Database.Helpers.Name_Generator
from Standard.Table.Data.Aggregate_Column import all
@ -46,7 +53,7 @@ spec prefix connection pending=Nothing =
ix2.name . should_equal 'a'
ix2.to_vector . should_equal [1, 4]
Test.specify "should work correctly when there are no columns" <|
empty = t1.select []
empty = t1.select_columns (By_Name [])
empty.to_dataframe.columns.length . should_equal 0
empty.to_dataframe.row_count . should_equal empty.row_count
Test.specify "should handle bigger result sets" <|
@ -125,24 +132,24 @@ spec prefix connection pending=Nothing =
the Dataframes library, so it is independent of the library under
testing here.
Test.specify "should allow joining tables index-on-index" <|
r_1 = a.set_index 'x' . join (b.set_index 'w') . to_dataframe . sort by=['y', 'z']
r_1 = a.set_index 'x' . join (b.set_index 'w') . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z'])
r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam']
r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo']
r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . to_dataframe . sort by=['x', 'w']
r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w'])
r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6]
r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3]
Test.specify "should allow joining tables column-on-index" <|
r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . sort by=['y', 'z']
r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z'])
r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam']
r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo']
r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . sort by=['x', 'w']
r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w'])
r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6]
r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3]
Test.specify "should allow self-joins and append suffixes to disambiguate column names" <|
r_1 = a.join (a.set_index 'x') on='x' . to_dataframe . sort by='x'
r_1 = a.join (a.set_index 'x') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x'])
r_1.columns.map .name . should_equal ['x', 'y_left', 'y_right']
r_1.at 'x' . to_vector . should_equal [0, 1, 3, 6, 7]
expected_y = ['foo', 'bar', 'spam', 'eggs', 'baz']
@ -159,8 +166,8 @@ spec prefix connection pending=Nothing =
ta_2 = ta.set_index "id"
tb_2 = tb.set_index "id"
res = (tc.join ta_2 on="id_a") . join tb_2 on="id_b" left_suffix="_a" right_suffix="_b"
sel = res.select ["name_a", "name_b"]
df = sel.to_dataframe . sort by="name_a"
sel = res.select_columns (By_Name ["name_a", "name_b"])
df = sel.to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "name_a"])
df . at "name_a" . to_vector . should_equal ["Foo", "Hmm"]
df . at "name_b" . to_vector . should_equal ["Bar", "Hmm"]
@ -201,41 +208,6 @@ spec prefix connection pending=Nothing =
empty.columns.length . should_equal 0
empty.to_dataframe.columns.length . should_equal 0
Test.group prefix+"Old Aggregation" pending=pending <|
t = upload "T6" <|
Materialized_Table.new [["name", ["foo", "bar", "foo", "baz", "foo", "bar", "quux"]], ["price", [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]], ["quantity", [10, 20, 30, 40, 50, 60, 70]]]
agg = t.group by='name'
## A helper which makes sure that the groups are ordered according to the index, using the Table library
determinize col =
df = col.to_dataframe.to_table
df.sort by=df.index . at col.name
Test.specify "should allow counting group sizes" <|
determinize agg.count . to_vector . should_equal [2, 1, 3, 1]
Test.specify "should allow aggregating columns with basic arithmetic aggregators" <|
determinize (agg.at 'price' . mean) . to_vector . should_equal [50.25, 6.7, 0.4, Nothing]
determinize (agg.at 'price' . min) . to_vector . should_equal [3.5, 6.7, 0.4, Nothing]
determinize (agg.at 'price' . max) . to_vector . should_equal [97, 6.7, 0.4, Nothing]
Test.specify "should allow to join multiple aggregations" <|
m1 = agg.at 'price' . mean
m2 = agg.at 'quantity' . max
df = (m1.join m2).to_dataframe
df2 = df.sort by=df.index
df2.at 'price_mean' . to_vector . should_equal [50.25, 6.7, 0.4, Nothing]
df2.at 'quantity_max' . to_vector . should_equal [60, 40, 50, 70]
Test.specify "should correctly compute the result size" <|
m = agg.at 'price' . mean
m.length . should_equal m.to_vector.length
m.length . should_equal 4
Test.specify "should correctly count values" <|
m = agg.at 'price' . mean
m.count . should_equal 3
m.count_missing . should_equal 1
Test.group prefix+"Column-wide statistics" pending=pending <|
Test.specify 'should allow computing basic column-wide stats' <|
t7 = upload "T7" <|
@ -251,38 +223,29 @@ spec prefix connection pending=Nothing =
Materialized_Table.new [["id", [1,2,3,4,5,6]], ["name", ["shoes","trousers","dress","skirt","blouse","t-shirt"]], ["quantity", [20,10,20,10,30,30]], ["rating", [3.0,Nothing,7.3,3.0,2.2,Nothing]], ["price", [37.2,42.1,64.1,87.4,13.5,64.2]]]
Test.specify "should allow sorting by a single column name" <|
r_1 = df.sort by="quantity"
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'quantity'])
r_1.at 'id' . to_vector . should_equal [2,4,1,3,5,6]
r_2 = df.sort by="rating" missing_last=False
r_2.at 'id' . to_vector . should_equal [2,6,5,1,4,3]
r_3 = df.sort by="rating" missing_last=False order=Sort_Direction.Descending
r_3.at 'id' . to_vector . should_equal [2,6,3,1,4,5]
r_3 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending])
r_3.at 'id' . to_vector . should_equal [3,1,4,5,2,6]
Test.specify 'should allow sorting by multiple column names' <|
r_1 = df.sort by=['quantity', 'rating']
r_1.at 'id' . to_vector . should_equal [4,2,1,3,5,6]
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'quantity', Sort_Column.Name 'rating'])
r_1.at 'id' . to_vector . should_equal [2,4,1,3,6,5]
r_2 = df.sort by=['rating', 'quantity'] missing_last=False order=Sort_Direction.Descending
r_2.at 'id' . to_vector . should_equal [6,2,3,1,4,5]
Test.specify 'should allow sorting by external columns' <|
quality_ratio = df.at 'rating' / df.at 'price'
r_1 = df.sort by=quality_ratio
r_1.at 'id' . to_vector . should_equal [4,1,3,5,2,6]
r_2 = df.sort by=['quantity', quality_ratio]
r_2.at 'id' . to_vector . should_equal [4,2,1,3,5,6]
r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending, Sort_Column.Name 'quantity' Sort_Direction.Descending])
r_2.at 'id' . to_vector . should_equal [3,1,4,5,6,2]
Test.specify 'should allow sorting with specific by-column rules' <|
r_1 = df.sort by=['quantity', (Order_Rule 'price' order=Sort_Direction.Descending)]
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "quantity", Sort_Column.Name "price" Sort_Direction.Descending])
r_1.at 'id' . to_vector . should_equal [4,2,3,1,6,5]
Test.specify 'should return dataflow error when passed a non-existent column' <|
r = df.sort by='foobar'
r.should_fail_with No_Such_Column_Error
Test.specify 'should return warnings and errors when passed a non-existent column' <|
action = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_
tester table =
table.at 'id' . to_vector . should_equal [1,2,3,4,5,6]
problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected]
Problems.test_problem_handling action problems tester
Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <|
ints = [1, 2, 3, 4, 5]
@ -291,7 +254,7 @@ spec prefix connection pending=Nothing =
texts = ["foo", "foo", "bar", "baz", "spam"]
df = upload "T8" <|
Materialized_Table.new [["ord", [0,3,2,4,1]], ["ints", ints], ["reals", reals], ["bools", bools], ["texts", texts]]
r = df.sort by='ord'
r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'ord'])
r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4]
df.at 'ints' . to_vector . should_equal ints
@ -309,22 +272,17 @@ spec prefix connection pending=Nothing =
c = df.at 'rating'
r_1 = c.sort
r_1.to_vector.should_equal [2.2, 3.0, 3.0, 7.3, Nothing, Nothing]
r_1.to_vector.should_equal [Nothing, Nothing, 2.2, 3.0, 3.0, 7.3]
r_2 = c.sort order=Sort_Direction.Descending
r_2.to_vector.should_equal [7.3, 3.0, 3.0, 2.2, Nothing, Nothing]
r_3 = c.sort order=Sort_Direction.Descending missing_last=False
r_3.to_vector.should_equal [Nothing, Nothing, 7.3, 3.0, 3.0, 2.2]
Test.group prefix+"Index" pending=pending <|
t0 = upload "Tix" <|
Materialized_Table.new [["ix", [1,2,3]], ["c1", [4,5,6]]]
t = t0.set_index 'ix'
Test.specify "should be accessible by `at` like other columns" <|
t.at 'ix' . to_vector . should_equal t.index.to_vector
Test.specify "should be accessible by `select` like other columns" <|
t.select ['ix'] . columns . first . to_vector . should_equal t.index.to_vector
Test.specify "treated as a column indexed by itself should still correctly compute values" <|
col = t.index+10
vec = [11, 12, 13]
@ -360,7 +318,7 @@ spec prefix connection pending=Nothing =
(InMemory) table are ordered according to a specified column or list
of columns.
determinize_by order_column table =
table.sort by=order_column
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name order_column])
Test.specify "should allow counting group sizes and elements" <|
aggregates = [Count Nothing, Count_Not_Nothing "price", Count_Nothing "price"]

View File

@ -1,5 +1,7 @@
from Standard.Base import all
from Standard.Table import all
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
from Standard.Table.Data.Table as Table_Internal import Empty_Error
@ -8,7 +10,7 @@ import Standard.Table.Data.Storage
import Standard.Test
import Standard.Test.Problems
import Standard.Visualization
from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names
from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, No_Input_Columns_Selected, Missing_Input_Columns
import project.Common_Table_Spec
@ -321,26 +323,6 @@ spec =
i.at "Items Count" . to_vector . should_equal [3, 2, 4]
i.at "Storage Type" . to_vector . should_equal [Storage.Text, Storage.Integer, Storage.Any]
Test.group "Aggregation" <|
name = ['name', ["foo", "bar", "foo", "baz", "foo", "bar", "quux"]]
price = ['price', [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]]
quantity = ['quantity', [10, 20, 30, 40, 50, 60, 70]]
t = Table.new [name, price, quantity]
agg = t.group by='name'
Test.specify "should allow counting group sizes" <|
agg.count.to_vector.should_equal [3, 2, 1, 1]
Test.specify "should allow aggregating columns with basic arithmetic aggregators" <|
agg.at 'price' . mean . to_vector . should_equal [0.4, 50.25, 6.7, Nothing]
agg.at 'price' . min . to_vector . should_equal [0.4, 3.5, 6.7, Nothing]
Test.specify "should allow aggregating with user-defined aggregate functions" <|
median vec =
sorted = vec.sort
if sorted.is_empty then Nothing else sorted.at (sorted.length-1 / 2).floor
agg.at 'quantity' . reduce median . to_vector . should_equal [30, 20, 40, 70]
Test.group "Column-wide statistics" <|
Test.specify 'should allow computing basic column-wide stats' <|
price = Column.from_vector 'price' [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]
@ -353,54 +335,36 @@ spec =
df = (Enso_Project.data / "clothes.csv").read
Test.specify "should allow sorting by a single column name" <|
r_1 = df.sort by="Quantity"
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Quantity"])
r_1.at 'Id' . to_vector . should_equal [2,4,1,3,5,6]
r_2 = df.sort by="Rating" missing_last=False
r_2.at 'Id' . to_vector . should_equal [2,6,5,1,4,3]
r_3 = df.sort by="Rating" missing_last=False order=Sort_Direction.Descending
r_3.at 'Id' . to_vector . should_equal [2,6,3,1,4,5]
r_3 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Rating" Sort_Direction.Descending])
r_3.at 'Id' . to_vector . should_equal [3,1,4,5,2,6]
Test.specify 'should allow sorting by multiple column names' <|
r_1 = df.sort by=['Quantity', 'Rating']
r_1.at 'Id' . to_vector . should_equal [4,2,1,3,5,6]
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating'])
r_1.at 'Id' . to_vector . should_equal [2,4,1,3,6,5]
r_2 = df.sort by=['Rating', 'Quantity'] missing_last=False order=Sort_Direction.Descending
r_2.at 'Id' . to_vector . should_equal [6,2,3,1,4,5]
Test.specify 'should allow sorting by external columns' <|
quality_ratio = df.at 'Rating' / df.at 'Price'
r_1 = df.sort by=quality_ratio
r_1.at 'Id' . to_vector . should_equal [4,1,3,5,2,6]
r_2 = df.sort by=['Quantity', quality_ratio]
r_2.at 'Id' . to_vector . should_equal [4,2,1,3,5,6]
r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Rating' Sort_Direction.Descending, Sort_Column.Name 'Quantity' Sort_Direction.Descending])
r_2.at 'Id' . to_vector . should_equal [3,1,4,5,6,2]
Test.specify 'should allow sorting with specific by-column rules' <|
r_1 = df.sort by=['Quantity', (Order_Rule 'Price' order=Sort_Direction.Descending)]
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Quantity", Sort_Column.Name "Price" Sort_Direction.Descending])
r_1.at 'Id' . to_vector . should_equal [4,2,3,1,6,5]
Test.specify 'should respect defined comparison operations for custom types' <|
c_1 = ['id', [1, 2, 3, 4, 5, 6]]
c_2 = ['val', [My 1 2, My 3 4, My 2 1, My 5 2, My 7 0, My 4 -1]]
df = Table.new [c_1, c_2]
r = df.sort by='val'
r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'val'])
r.at 'id' . to_vector . should_equal [1,3,6,2,4,5]
Test.specify 'should allow passing a custom comparator per column and should missing-proof it' <|
c_1 = ['id', [1, 2, 3, 4, 5, 6]]
c_2 = ['val', [My 1 2, My 2 5, My 3 4, My 6 3, Nothing, My 1 0]]
df = Table.new [c_1, c_2]
cmp a b = (a.x-a.y).abs . compare_to (b.x-b.y).abs
r = df.sort by=(Order_Rule 'val' comparator=cmp)
r.at 'id' . to_vector . should_equal [1,3,6,2,4,5]
Test.specify 'should return dataflow error when passed a non-existent column' <|
r = df.sort by='foobar'
r.should_fail_with No_Such_Column_Error
Test.specify 'should return warnings and errors when passed a non-existent column' <|
action = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_
tester table =
table.at 'Id' . to_vector . should_equal [1,2,3,4,5,6]
problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected]
Problems.test_problem_handling action problems tester
Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <|
ord = [0, 3, 2, 4, 1]
@ -411,7 +375,7 @@ spec =
objs = [Cons 1 2, Cons 2 3, Cons 6 7, Cons 8 9, Cons 10 30]
df = Table.new [['ord', ord], ['ints', ints], ['reals', reals], ['bools', bools], ['texts', texts], ['objs', objs]]
r = df.sort by='ord'
r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'ord'])
r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4]
df.at 'ints' . to_vector . should_equal ints
@ -507,9 +471,6 @@ spec =
i = t.index
c.to_vector . should_equal i.to_vector
Test.specify "should be accessible by `select` like other columns" <|
t.select ['ix'] . columns . first . to_vector . should_equal t.index.to_vector
Test.specify "should be able to be set by column" <|
with_index = t.set_index c
with_index.index.to_vector . should_equal c.to_vector

View File

@ -60,8 +60,6 @@ import project.System.File_Spec
import project.System.Process_Spec
import project.System.Reporting_Stream_Decoder_Spec
import project.Examples_Spec
main = Test.Suite.run_main <|
Any_Spec.spec
Array_Spec.spec
@ -70,7 +68,6 @@ main = Test.Suite.run_main <|
Conversion_Spec.spec
Deep_Export_Spec.spec
Error_Spec.spec
Examples_Spec.spec
File_Spec.spec
Reporting_Stream_Decoder_Spec.spec
Http_Header_Spec.spec

View File

@ -3,6 +3,7 @@ from Standard.Base import all
from Standard.Database import all
import Standard.Database.Data.Table as Database_Table
import Standard.Table.Data.Table as Dataframe_Table
from Standard.Table.Data.Aggregate_Column import Group_By, Average
import Standard.Visualization.Table.Visualization as Visualization
import Standard.Test
@ -47,19 +48,9 @@ visualization_spec connection =
json = make_json header=["A"] data=[['a', 'a']] all_rows=3 ixes_header=[] ixes=[]
vis . should_equal json
g = t.group by=["A", "B"] . at "C" . mean
g = t.aggregate [Group_By "A", Group_By "B", Average "C"] . at "Average C"
vis2 = Visualization.prepare_visualization g 1
json2 = make_json header=["C_mean"] data=[[4]] all_rows=2 ixes_header=["A", "B"] ixes=[['a'], [2]]
vis2 . should_equal json2
Test.specify "should visualize database aggregates" <|
agg = t.group by="A"
vis = Visualization.prepare_visualization agg 1
json = make_json header=["B", "C"] data=[[2], [3]] all_rows=3 ixes_header=["A"] ixes=[['a']]
vis . should_equal json
vis2 = Visualization.prepare_visualization (agg.at "C") 1
json2 = make_json header=["C"] data=[[3]] all_rows=3 ixes_header=["A"] ixes=[['a']]
json2 = make_json header=["Average C"] data=[[4.0]] all_rows=2 ixes_header=[] ixes=[]
vis2 . should_equal json2
t2 = Dataframe_Table.new [["A", [1, 2, 3]], ["B", [4, 5, 6]], ["C", [7, 8, 9]]]
@ -78,21 +69,6 @@ visualization_spec connection =
json = make_json header=["A"] data=[[1, 2]] all_rows=3 ixes_header=[""] ixes=[[0, 1]]
vis . should_equal json
g = t2.group by="A" . at "C" . mean
vis2 = Visualization.prepare_visualization g 1
json2 = make_json header=["C_mean"] data=[[7.0]] all_rows=3 ixes_header=["A"] ixes=[[1]]
vis2 . should_equal json2
Test.specify "should visualize dataframe aggregates" <|
agg = t2.group by="A"
vis = Visualization.prepare_visualization agg 1
json = make_json header=["B", "C"] data=[[4], [7]] all_rows=3 ixes_header=["A"] ixes=[[1]]
vis . should_equal json
vis2 = Visualization.prepare_visualization (agg.at "C") 1
json2 = make_json header=["C"] data=[[7]] all_rows=3 ixes_header=["A"] ixes=[[1]]
vis2 . should_equal json2
Test.specify "should handle Vectors" <|
vis = Visualization.prepare_visualization [1, 2, 3] 2