mirror of
https://github.com/enso-org/enso.git
synced 2024-12-22 22:21:40 +03:00
Removing old functions and tidy up of Table types (#3519)
- Removed `select` method. - Removed `group` method. - Removed `Aggregate_Table` type. - Removed `Order_Rule` type. - Removed `sort` method from Table. - Expanded comments on `order_by`. - Update comment on `aggregate` on Database. - Update Visualisation to use new APIs. - Updated Data Science examples to use new APIs. - Moved Examples test out of Tests to own test. # Important Notes Need to get Examples_Tests added to CI.
This commit is contained in:
parent
e83c36d9d6
commit
a0c6fa9c96
4
.github/workflows/scala.yml
vendored
4
.github/workflows/scala.yml
vendored
@ -286,6 +286,7 @@ jobs:
|
||||
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Geo_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Visualization_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Image_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Examples_Tests
|
||||
|
||||
- name: Compile the Standard Libraries (Unix)
|
||||
shell: bash
|
||||
@ -311,6 +312,7 @@ jobs:
|
||||
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Geo_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Visualization_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Image_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Examples_Tests
|
||||
|
||||
- name: Test Engine Distribution Without Caches (Windows)
|
||||
shell: bash
|
||||
@ -321,6 +323,7 @@ jobs:
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Geo_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Visualization_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Image_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Examples_Tests
|
||||
|
||||
- name: Compile the Standard Libraries (Windows)
|
||||
shell: bash
|
||||
@ -346,6 +349,7 @@ jobs:
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Geo_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Visualization_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Image_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Examples_Tests
|
||||
|
||||
# Publish
|
||||
- name: Compress the built artifacts for upload
|
||||
|
@ -139,6 +139,8 @@
|
||||
API and added builders for customizing less common settings.][3516]
|
||||
- [Allow control of sort direction in `First` and `Last` aggregations.][3517]
|
||||
- [Implemented `Text.write`, replacing `File.write_text`.][3518]
|
||||
- [Removed obsolete `select`, `group`, `sort` and releated types from tables.]
|
||||
[3519]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -219,6 +221,7 @@
|
||||
[3516]: https://github.com/enso-org/enso/pull/3516
|
||||
[3517]: https://github.com/enso-org/enso/pull/3517
|
||||
[3518]: https://github.com/enso-org/enso/pull/3518
|
||||
[3519]: https://github.com/enso-org/enso/pull/3519
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -24,8 +24,8 @@ component-groups:
|
||||
- Standard.Base.Join:
|
||||
exports:
|
||||
- Standard.Database.Data.Table.Table.join
|
||||
- Standard.Database.Data.Table.Table.group
|
||||
- Standard.Database.Data.Table.Table.aggregate
|
||||
- Standard.Base.Transform:
|
||||
exports:
|
||||
- Standard.Database.Data.Table.Table.sort
|
||||
- Standard.Database.Data.Table.Table.order_by
|
||||
- Standard.Database.Data.Column.Column.to_table
|
||||
|
@ -4,6 +4,8 @@ import Standard.Database.Data.Internal.Helpers
|
||||
import Standard.Database.Data.Internal.IR
|
||||
import Standard.Database.Data.Table
|
||||
import Standard.Table.Data.Column as Materialized_Column
|
||||
import Standard.Table.Data.Sort_Column_Selector
|
||||
import Standard.Table.Data.Sort_Column
|
||||
|
||||
from Standard.Database.Data.Sql import Sql_Type
|
||||
from Standard.Database.Data.Table import Integrity_Error
|
||||
@ -458,12 +460,11 @@ type Column
|
||||
column.sort
|
||||
|
||||
> Example
|
||||
Sorting `column` in descending order, placing missing values at the
|
||||
top of the resulting column.
|
||||
column.sort order=Sort_Direction.Descending missing_last=False
|
||||
sort : Sort_Direction -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column
|
||||
sort order=Sort_Direction.Ascending missing_last=True =
|
||||
this.to_table.sort by=this order=order missing_last=missing_last . at this.name
|
||||
Sorting `column` in descending order.
|
||||
column.sort order=Sort_Direction.Descending
|
||||
sort : Sort_Direction -> Column
|
||||
sort order=Sort_Direction.Ascending =
|
||||
this.to_table.order_by (Sort_Column_Selector.By_Column [Sort_Column.Column this order]) . at this.name
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -637,4 +638,3 @@ lift_aggregate new_name connection expected_type expr context =
|
||||
new_ixes = cols.second
|
||||
new_ctx = IR.subquery_as_ctx subquery . set_index new_ixes
|
||||
Column new_name connection new_col.sql_type new_col.expression new_ctx
|
||||
|
||||
|
@ -17,7 +17,6 @@ import Standard.Table.Internal.Aggregate_Column_Helper
|
||||
from Standard.Database.Data.Column as Column_Module import Column, Aggregate_Column_Builder
|
||||
from Standard.Database.Data.Internal.IR import Internal_Column
|
||||
from Standard.Table.Data.Table import No_Such_Column_Error
|
||||
from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index
|
||||
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
@ -356,7 +355,7 @@ type Table
|
||||
|
||||
Since this Table is backed by an SQL database, the Table returned by the
|
||||
`limit` method is deterministic only if the Table has been ordered (using
|
||||
the `sort` method).
|
||||
the `order_by` method).
|
||||
|
||||
Otherwise, no order is imposed, so the returned Table will include at most
|
||||
`max_rows` rows, but there are no guarantees on which rows will be
|
||||
@ -365,7 +364,7 @@ type Table
|
||||
Table is materialized.
|
||||
|
||||
The limit is applied at the very end, so the new Table behaves exactly as
|
||||
the old one, just limitting its results when being materialized.
|
||||
the old one, just limiting its results when being materialized.
|
||||
Specifically, applying further filters will still apply to the whole
|
||||
result set and the limit will be taken after applying these filters.
|
||||
|
||||
@ -373,7 +372,7 @@ type Table
|
||||
In the call below, assuming that the table of `t1` contains rows for
|
||||
numbers 1, 2, ..., 10, will return rows starting from 6 and not an empty
|
||||
result as one could expect if the limit was applied before the filters.
|
||||
t1 = table.sort by='A' . limit 5
|
||||
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . limit 5
|
||||
t2 = t1.where (t1.at 'A' > 5)
|
||||
t2.to_dataframe
|
||||
limit : Integer -> Table
|
||||
@ -481,6 +480,28 @@ type Table
|
||||
descending order.
|
||||
|
||||
table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -7 Sort_Direction.Descending])
|
||||
> Example
|
||||
Sorting `table` in ascending order by the value in column `'Quantity'`.
|
||||
|
||||
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity'])
|
||||
|
||||
> Example
|
||||
Sorting `table` in descending order by the value in column `'Quantity'`.
|
||||
|
||||
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity' Sort_Direction.Descending])
|
||||
|
||||
> Example
|
||||
Sorting `table` in ascending order by the value in column `'Quantity'`,
|
||||
using the value in column `'Rating'` for breaking ties.
|
||||
|
||||
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating'])
|
||||
|
||||
> Example
|
||||
Sorting `table` in ascending order by the value in column `'Quantity'`,
|
||||
using the value in column `'Rating'` in descending order for breaking
|
||||
ties.
|
||||
|
||||
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating' Sort_Direction.Descending])
|
||||
order_by : Sort_Column_Selector -> Text_Ordering -> Problem_Behavior -> Table
|
||||
order_by (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (this.columns.at 0 . name))])) text_ordering=Text_Ordering on_problems=Report_Warning = Panic.handle_wrapped_dataflow_error <|
|
||||
problem_builder = Problem_Builder.new
|
||||
@ -497,91 +518,6 @@ type Table
|
||||
new_ctx = this.context.add_orders new_order_descriptors
|
||||
this.updated_context new_ctx
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Sorts the table according to the specified rules.
|
||||
|
||||
Arguments:
|
||||
- by: Specifies the columns used for reordering the table. This
|
||||
argument may be one of:
|
||||
- a text: The text is treated as a column name.
|
||||
- a column: Any column, which is an expression computed from this
|
||||
table.
|
||||
- an order rule: Specifies both the sorting column and additional
|
||||
settings, that will take precedence over the global parameters of
|
||||
this sort operation. The `column` field of the rule may be a text
|
||||
or a column, with the semantics described above.
|
||||
- a vector of any of the above: This will result in a hierarchical
|
||||
sorting, such that the first rule is applied first, the second is
|
||||
used for breaking ties, etc.
|
||||
- order: Specifies the default sort order for this operation. All the
|
||||
rules specified in the `by` argument will default to this setting,
|
||||
unless specified in the rule.
|
||||
- missing_last: Specifies the default placement of missing values when
|
||||
compared to non-missing ones. This setting may be overridden by the
|
||||
particular rules of the `by` argument. Note thet this argument is
|
||||
independent from `order`, i.e. missing values will always be sorted
|
||||
according to this rule, ignoring the ascending / descending setting.
|
||||
|
||||
> Example
|
||||
Sorting `table` in ascending order by the value in column `'Quantity'`
|
||||
table.sort by='Quantity'
|
||||
|
||||
> Example
|
||||
Sorting `table` in descending order by the value in column `'Quantity'`,
|
||||
placing missing values at the top of the table.
|
||||
table.sort by='Quantity' order=Sort_Direction.Descending missing_last=False
|
||||
|
||||
> Example
|
||||
Sorting `table` in ascending order by the value in column `'Quantity'`,
|
||||
using the value in column `'Rating'` for breaking ties.
|
||||
table.sort by=['Quantity', 'Rating']
|
||||
|
||||
> Example
|
||||
Sorting `table` in ascending order by the value in column `'Quantity'`,
|
||||
using the value in column `'Rating'` in descending order for breaking
|
||||
ties.
|
||||
table.sort by=['Quantity', Order_Rule 'Rating' (order=Sort_Direction.Descending)]
|
||||
|
||||
> Example
|
||||
Sorting `table` in ascending order by the value in an externally
|
||||
computed column, using the value in column `'Rating'` for breaking
|
||||
ties.
|
||||
quality_ratio = table.at 'Rating' / table.at 'Price'
|
||||
table.sort by=[quality_ratio, 'Rating']
|
||||
sort : Text | Column | Order_Rule | Vector.Vector (Text | Column | Order_Rule) -> Sort_Direction -> Boolean -> Table
|
||||
sort by order=Sort_Direction.Ascending missing_last=True = Panic.recover Any <|
|
||||
missing_to_ir last = case last of
|
||||
True -> IR.Nulls_Last
|
||||
False -> IR.Nulls_First
|
||||
wrap_elem elem =
|
||||
IR.Order_Descriptor (this.resolve elem . expression) order (missing_to_ir missing_last) collation=Nothing
|
||||
to_ir elem = case elem of
|
||||
Text -> wrap_elem elem
|
||||
Column _ _ _ _ _ -> wrap_elem elem
|
||||
Order_Rule elem Nothing my_order my_nulls ->
|
||||
chosen_order = my_order.if_nothing order
|
||||
chosen_nulls = my_nulls.if_nothing missing_last
|
||||
IR.Order_Descriptor (this.resolve elem . expression) chosen_order (missing_to_ir chosen_nulls) collation=Nothing
|
||||
Order_Rule _ _ _ _ ->
|
||||
Error.throw <| Unsupported_Database_Operation_Error "Custom comparators are not supported in Database"
|
||||
elems = Helpers.unify_vector_singleton by . map to_ir
|
||||
new_ctx = this.context.set_orders elems
|
||||
this.updated_context new_ctx
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Selects a subset of columns from this table by name.
|
||||
|
||||
Arguments:
|
||||
- columns: The names of the columns to select from the table.
|
||||
select : Vector Text -> Table
|
||||
select columns =
|
||||
candidates = this.internal_columns + this.context.meta_index
|
||||
find_col name = candidates.find (p -> p.name == name)
|
||||
selected_cols = columns.map (find_col >> .catch) . filter (c -> c.is_nothing.not)
|
||||
this.updated_columns selected_cols
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Efficiently joins two tables based on either the index or a key column.
|
||||
@ -675,28 +611,31 @@ type Table
|
||||
|
||||
Table new_table_name this.connection new_columns new_ctx
|
||||
|
||||
## UNSTABLE
|
||||
## ALIAS group, summarize
|
||||
|
||||
Returns an aggregate table resulting from grouping the elements by the
|
||||
value of the specified column.
|
||||
Aggregates the rows in a table using any `Group_By` entries in columns.
|
||||
The columns argument specifies which additional aggregations to perform and to return.
|
||||
|
||||
Arguments:
|
||||
- by: The column names on which to group. If this is not set, the index
|
||||
will be used for grouping instead.
|
||||
group : Vector Text | Text | Nothing -> Aggregate_Table
|
||||
group by=Nothing = Panic.recover Any <|
|
||||
cols = case by of
|
||||
Nothing ->
|
||||
if this.context.meta_index.is_empty then Panic.throw <| Illegal_State_Error "Trying to group by an empty index." else
|
||||
this.context.meta_index
|
||||
_ ->
|
||||
- columns: Vector of `Aggregate_Column` specifying the aggregated table.
|
||||
- on_problems: Specifies how to handle problems if they occur, reporting
|
||||
them as warnings by default.
|
||||
|
||||
Helpers.unify_vector_singleton by . map (this.resolve >> .as_internal)
|
||||
exprs = cols.map .expression
|
||||
new_ctx = this.context.set_groups exprs . set_index cols
|
||||
Aggregate_Table this.name this.connection this.internal_columns new_ctx
|
||||
The following problems can occur:
|
||||
- If a column name is not in the input table, a `Missing_Input_Columns`.
|
||||
- If a column index is out of range, a `Column_Indexes_Out_Of_Range`.
|
||||
- If there are no valid columns in the output table, a `No_Output_Columns`.
|
||||
- If there are invalid column names in the output table, a `Invalid_Output_Column_Names`.
|
||||
- If there are duplicate column names in the output table, a `Duplicate_Output_Column_Names`.
|
||||
- If grouping on or computing the `Mode` on a floating point number, a `Floating_Point_Grouping`.
|
||||
- If an aggregation fails, an `Invalid_Aggregation_Method`.
|
||||
- If when concatenating values there is an quoted delimited, an `Unquoted_Delimiter`
|
||||
- If there are more than 10 issues with a single column, an `Additional_Warnings`.
|
||||
|
||||
## Prototype Group By function
|
||||
> Example
|
||||
Group by the Key column, count the rows
|
||||
|
||||
table.aggregate [Group_By "Key", Count Nothing]
|
||||
aggregate : [Aggregate_Column] -> Problem_Behavior -> Table
|
||||
aggregate columns (on_problems=Report_Warning) =
|
||||
validated = Aggregate_Column_Helper.prepare_aggregate_columns columns this
|
||||
@ -980,70 +919,6 @@ type Table
|
||||
# TODO This should ideally be done in a streaming manner, or at least respect the row limits.
|
||||
this.to_dataframe.write path format on_existing_file column_mapping on_problems
|
||||
|
||||
## Represents a table with grouped rows.
|
||||
type Aggregate_Table
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Represents a table with grouped rows.
|
||||
|
||||
Arguments:
|
||||
- name: The name of the table.
|
||||
- connection: The connection with which the table is associated.
|
||||
- internal_columns: The internal representation of the table columns.
|
||||
- context: The context associated with this table.
|
||||
# type Aggregate_Table (name : Text) (connection : Connection)
|
||||
# (internal_columns : Vector [Text, IR.Expression])
|
||||
# (context : IR.Context)
|
||||
type Aggregate_Table name connection internal_columns context
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns a vector of aggregate columns in this table.
|
||||
columns : Vector.Vector
|
||||
columns = this.internal_columns . map this.make_column
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns a column containing the number of elements in each group.
|
||||
count : Column
|
||||
count =
|
||||
expr = IR.Operation "COUNT_ROWS" []
|
||||
# new_name connection expected_type expr context
|
||||
Column_Module.lift_aggregate "count" this.connection Sql.Sql_Type.integer expr this.context
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns an aggregate column with the given name, contained in this table.
|
||||
|
||||
Arguments:
|
||||
- name: The name of the aggregate column to get from the aggregate table.
|
||||
at : Text -> Column ! No_Such_Column_Error
|
||||
at name =
|
||||
internal = this.internal_columns.find (p -> p.name == name)
|
||||
this.make_column internal . map_error (_ -> No_Such_Column_Error name)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Helper to create aggregate columns from internal columns.
|
||||
|
||||
Arguments:
|
||||
- internal: The internal column to make into an aggregate column.
|
||||
make_column : Internal_Column -> Aggregate_Column_Builder
|
||||
make_column internal =
|
||||
Aggregate_Column_Builder internal.name this.connection internal.sql_type internal.expression this.context
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Helper that returns the underlying table from before grouping.
|
||||
ungrouped : Table
|
||||
ungrouped =
|
||||
new_ctx = this.context.set_groups []
|
||||
new_cols = this.internal_columns.filter col->
|
||||
turned_into_index = this.context.meta_index.exists i-> i.name == col.name
|
||||
turned_into_index.not
|
||||
Table this.name this.connection new_cols new_ctx
|
||||
|
||||
type Integrity_Error
|
||||
|
||||
## UNSTABLE
|
||||
|
@ -10,6 +10,3 @@ export Standard.Database.Connection.Connection
|
||||
from Standard.Database.Connection.Database export all
|
||||
|
||||
import Standard.Table.Data.Table
|
||||
import Standard.Table.Data.Order_Rule
|
||||
from Standard.Table.Data.Table export No_Such_Column_Error
|
||||
from Standard.Table.Data.Order_Rule export Order_Rule
|
||||
|
@ -278,14 +278,6 @@ transactions_table : Table.Table
|
||||
transactions_table =
|
||||
(Enso_Project.data / "food_shop_transactions.csv") . read
|
||||
|
||||
## An aggregate table for the relevant examples.
|
||||
aggregate_table : Table.Aggregate_Table
|
||||
aggregate_table =
|
||||
transactions = here.transactions_table
|
||||
item_names = here.inventory_table.at "item_name"
|
||||
with_names = transactions.join item_names on="item_id"
|
||||
with_names.group by="item_name"
|
||||
|
||||
## An example regex match.
|
||||
match : Default_Engine.Match
|
||||
match =
|
||||
|
@ -49,10 +49,12 @@
|
||||
break ties in descending order.
|
||||
|
||||
import Standard.Examples
|
||||
import Standard.Table.Data.Sort_Column_Selector
|
||||
import Standard.Table.Data.Sort_Column
|
||||
|
||||
example_sort =
|
||||
table = Examples.inventory_table
|
||||
table.sort by=["total_stock", "sold_stock"] order=Sort_Direction.Descending
|
||||
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock", Sort_Column.Name "sold_stock" Sort_Direction.Descending])
|
||||
|
||||
> Example
|
||||
Compute the number of transactions that each item has participated in, as
|
||||
|
@ -11,9 +11,10 @@
|
||||
Get the item name and price columns from the shop inventory.
|
||||
|
||||
import Standard.Examples
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name
|
||||
|
||||
example_select =
|
||||
Examples.inventory_table.select ["item_name", "price"]
|
||||
Examples.inventory_table.select_columns (By_Name ["item_name", "price"])
|
||||
|
||||
> Example
|
||||
Remove any rows that contain missing values from the table.
|
||||
|
@ -24,14 +24,15 @@
|
||||
example_map = Examples.integer_column.map (x -> x * x)
|
||||
|
||||
> Example
|
||||
Sort the shop inventory based on the per-item price in descending order and
|
||||
placing missing values at the top of the table.
|
||||
Sort the shop inventory based on the per-item price in descending order.
|
||||
|
||||
import Standard.Examples
|
||||
import Standard.Table.Data.Sort_Column_Selector
|
||||
import Standard.Table.Data.Sort_Column
|
||||
|
||||
example_sort =
|
||||
table = Examples.inventory_table
|
||||
table.sort by="price" order=Sort_Direction.Descending missing_last=false
|
||||
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price" Sort_Direction.Descending])
|
||||
|
||||
> Example
|
||||
Add two columns to each other.
|
||||
|
@ -29,10 +29,10 @@ component-groups:
|
||||
- Standard.Base.Join:
|
||||
exports:
|
||||
- Standard.Table.Data.Table.Table.join
|
||||
- Standard.Table.Data.Table.Table.group
|
||||
- Standard.Table.Data.Table.Table.aggregate
|
||||
- Standard.Base.Transform:
|
||||
exports:
|
||||
- Standard.Table.Data.Table.Table.sort
|
||||
- Standard.Table.Data.Table.Table.order_by
|
||||
- Standard.Table.Data.Table.Table.to_csv
|
||||
- Standard.Table.Data.Column.Column.to_table
|
||||
- Standard.Base.Output:
|
||||
|
@ -996,14 +996,15 @@ type Column
|
||||
Examples.decimal_column.sort comparator=my_comparator
|
||||
sort : Sort_Direction -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column
|
||||
sort order=Sort_Direction.Ascending missing_last=True comparator=Nothing =
|
||||
comparator_to_java cmp x y = cmp x y . to_sign
|
||||
order_bool = case order of
|
||||
Sort_Direction.Ascending -> True
|
||||
Sort_Direction.Descending -> False
|
||||
java_cmp = case comparator of
|
||||
Nothing -> Nothing
|
||||
cmp -> Table.comparator_to_java cmp
|
||||
cmp -> comparator_to_java cmp
|
||||
rule = OrderBuilder.OrderRule.new this.java_column java_cmp order_bool missing_last
|
||||
fallback_cmp = Table.comparator_to_java .compare_to
|
||||
fallback_cmp = comparator_to_java .compare_to
|
||||
mask = OrderBuilder.buildOrderMask [rule].to_array fallback_cmp
|
||||
new_col = this.java_column.applyMask mask
|
||||
Column new_col
|
||||
|
@ -1,31 +0,0 @@
|
||||
from Standard.Base import all
|
||||
|
||||
type Order_Rule
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
A rule used for sorting table-like structures.
|
||||
|
||||
Arguments:
|
||||
- column: a value representing the data dimension by which this rule is
|
||||
sorting. This type does not specify the underlying representation of a
|
||||
column, assuming that the sorting engine defines its own column
|
||||
representation.
|
||||
- comparator: a function taking two elements of the data being sorted on
|
||||
and returning an `Ordering`. The function may be `Nothing`, in which
|
||||
case a natural ordering will be used. Note that certain table backends
|
||||
(such us database connectors) may not support this field being set to a
|
||||
non-`Nothing` value.
|
||||
- order: specifies whether the table should be sorted in an ascending or
|
||||
descending order. The default value of `Nothing` delegates the decision
|
||||
to the sorting function. Can be set to `Sort_Direction.Ascending` or
|
||||
`Sort_Direction.Descending` from the `Base` library, to specify the
|
||||
ordering.
|
||||
- missing_last: whether the missing values should be placed at the
|
||||
beginning or end of the sorted table. Note that this argument is
|
||||
independent from `order`, i.e. missing values will always be sorted
|
||||
according to this rule, ignoring the ascending / descending setting.
|
||||
The default value of `Nothing` delegates the decision to the sorting
|
||||
function.
|
||||
type Order_Rule column comparator=Nothing order=Nothing missing_last=Nothing
|
||||
|
@ -14,7 +14,6 @@ import Standard.Table.Internal.Parse_Values_Helper
|
||||
import Standard.Table.Internal.Delimited_Reader
|
||||
import Standard.Table.Internal.Problem_Builder
|
||||
|
||||
from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index
|
||||
from Standard.Table.Data.Column_Type_Selection as Column_Type_Selection_Module import Column_Type_Selection, Auto
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
@ -32,9 +31,9 @@ import Standard.Base.Data.Ordering.Comparator
|
||||
|
||||
polyglot java import org.enso.table.data.table.Table as Java_Table
|
||||
polyglot java import org.enso.table.data.table.Column as Java_Column
|
||||
polyglot java import org.enso.table.operations.OrderBuilder
|
||||
polyglot java import org.enso.table.format.csv.Writer as Csv_Writer
|
||||
polyglot java import org.enso.table.format.xlsx.Writer as Spreadsheet_Writer
|
||||
polyglot java import org.enso.table.operations.OrderBuilder
|
||||
polyglot java import java.io.StringReader
|
||||
|
||||
## Creates a new table from a vector of `[name, items]` pairs.
|
||||
@ -498,7 +497,6 @@ type Table
|
||||
new_names = this.columns.map mapper
|
||||
this.take_end (this.length - 1) . rename_columns (Column_Mapping.By_Position new_names) on_problems=on_problems
|
||||
|
||||
|
||||
## ALIAS group, summarize
|
||||
|
||||
Aggregates the rows in a table using any `Group_By` entries in columns.
|
||||
@ -572,6 +570,44 @@ type Table
|
||||
descending order.
|
||||
|
||||
table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -7 Sort_Direction.Descending])
|
||||
> Example
|
||||
Sorting the shop inventory based on the per-item price in ascending
|
||||
order.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_sort = Examples.inventory_table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price"])
|
||||
|
||||
> Example
|
||||
Sort the shop inventory based on the per-item price in descending order
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_sort =
|
||||
table = Examples.inventory_table
|
||||
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price" Sort_Direction.Descending])
|
||||
|
||||
> Example
|
||||
Sort the shop inventory based on the total stock, using the number sold
|
||||
to break ties in descending order.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_sort =
|
||||
table = Examples.inventory_table
|
||||
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock" Sort_Direction.Descending, Sort_Column.Name "sold_stock" Sort_Direction.Descending])
|
||||
|
||||
> Example
|
||||
Sort the shop inventory in ascending order by the total stock, using
|
||||
the number of items sold in descending order to break ties.
|
||||
|
||||
import Standard.Examples
|
||||
import Standard.Table
|
||||
|
||||
example_sort =
|
||||
table = Examples.inventory_table
|
||||
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock", Sort_Column.Name "sold_stock" Sort_Direction.Descending])
|
||||
|
||||
order_by : Sort_Column_Selector -> Text_Ordering -> Problem_Behavior -> Table
|
||||
order_by (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (this.columns.at 0 . name))])) text_ordering=Text_Ordering on_problems=Report_Warning =
|
||||
problem_builder = Problem_Builder.new
|
||||
@ -749,20 +785,6 @@ type Table
|
||||
Nothing -> Error.throw No_Index_Set_Error
|
||||
i -> Column.Column i
|
||||
|
||||
## Alias Select Columns
|
||||
|
||||
Selects a subset of columns from this table by name.
|
||||
|
||||
> Example
|
||||
Get the item name and price columns from the shop inventory.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_select =
|
||||
Examples.inventory_table.select ["item_name", "price"]
|
||||
select : Vector -> Table
|
||||
select columns = Table (this.java_table.selectColumns columns.to_array)
|
||||
|
||||
## ALIAS Join Table
|
||||
|
||||
Efficiently joins two tables based on either the index or the specified
|
||||
@ -876,183 +898,6 @@ type Table
|
||||
cols = this.columns
|
||||
here.new [["Column", cols.map .name], ["Items Count", cols.map .count], ["Storage Type", cols.map .storage_type]] . set_index "Column"
|
||||
|
||||
## ALIAS Group a Table
|
||||
|
||||
Returns an aggregate table resulting from grouping the elements by the
|
||||
value of the specified column.
|
||||
|
||||
Arguments:
|
||||
- by: The column in the table to perform grouping by. If this argument
|
||||
is not set, the index is used for grouping instead.
|
||||
|
||||
> Example
|
||||
Compute the number of transactions that each item has participated in,
|
||||
as well as the number of each item sold across those transactions.
|
||||
|
||||
import Standard.Examples
|
||||
import Standard.Table
|
||||
|
||||
example_group =
|
||||
transactions = Examples.transactions_table
|
||||
item_names = Examples.inventory_table.at "item_name"
|
||||
aggregated = transactions.group by="item_id"
|
||||
num_transactions = aggregated.at "transaction_id" . reduce .length . rename "transaction_count"
|
||||
num_sold = aggregated.at "quantity" . reduce .sum . rename "num_sold"
|
||||
Table.join [item_names, num_transactions, num_sold]
|
||||
group : Text | Nothing -> Aggregate_Table
|
||||
group by=Nothing =
|
||||
Aggregate_Table (this.java_table.group by)
|
||||
|
||||
## ALIAS Sort Table
|
||||
UNSTABLE
|
||||
|
||||
Sorts the table according to the specified rules.
|
||||
|
||||
Arguments:
|
||||
- by: Specifies the columns used for reordering the table. This argument
|
||||
may be one of:
|
||||
- a text: The text is treated as a column name.
|
||||
- a column: Any column, that may or may not belong to this table.
|
||||
Sorting by a column will result in reordering the rows of this
|
||||
table in a way that would result in sorting the given column.
|
||||
- an order rule: Specifies both the sorting column and additional
|
||||
settings, that will take precedence over the global parameters of
|
||||
this sort operation. The `column` field of the rule may be a text
|
||||
or a column, with the semantics described above.
|
||||
- a vector of any of the above: This will result in a hierarchical
|
||||
sorting, such that the first rule is applied first, the second is
|
||||
used for breaking ties, etc.
|
||||
- order: Specifies the default sort order for this operation. All the
|
||||
rules specified in the `by` argument will default to this setting,
|
||||
unless specified in the rule.
|
||||
- missing_last: Specifies the default placement of missing values when
|
||||
compared to non-missing ones. This setting may be overriden by the
|
||||
particular rules of the `by` argument. Note thet this argument is
|
||||
independent from `order`, i.e. missing values will always be sorted
|
||||
according to this rule, ignoring the ascending / descending setting.
|
||||
|
||||
> Example
|
||||
Sorting the shop inventory based on the per-item price in ascending
|
||||
order.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_sort = Examples.inventory_table.sort by="price"
|
||||
|
||||
> Example
|
||||
Sort the shop inventory based on the per-item price in descending order
|
||||
and placing missing values at the top of the table.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_sort =
|
||||
table = Examples.inventory_table
|
||||
table.sort by="price" order=Sort_Direction.Descending missing_last=false
|
||||
|
||||
> Example
|
||||
Sort the shop inventory based on the total stock, using the number sold
|
||||
to break ties in descending order.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_sort =
|
||||
table = Examples.inventory_table
|
||||
table.sort by=["total_stock", "sold_stock"] order=Sort_Direction.Descending
|
||||
|
||||
> Example
|
||||
Sort the shop inventory in ascending order by the total stock, using
|
||||
the number of items sold in descending order to break ties.
|
||||
|
||||
import Standard.Examples
|
||||
import Standard.Table
|
||||
|
||||
example_sort =
|
||||
table = Examples.inventory_table
|
||||
sold_stock_rule = Table.Order_Rule "sold_stock" order=Sort_Direction.Descending
|
||||
table.sort by=["total_stock", sold_stock_rule]
|
||||
|
||||
> Example
|
||||
Sorting the inventory in descending order based on the percentage of
|
||||
the total stock sold, using the popularity of the product to break
|
||||
ties.
|
||||
|
||||
import Standard.Examples
|
||||
import Standard.Table
|
||||
|
||||
example_sort =
|
||||
table = Examples.inventory_table.join Examples.popularity_table
|
||||
percentage_sold = table.at "sold_stock" / table.at "total_stock"
|
||||
table.sort by=[percentage_sold, "popularity"] order=Sort_Direction.Descending
|
||||
|
||||
> Example
|
||||
Sort the inventory by the price using a custom comparator function.
|
||||
|
||||
import Standard.Examples
|
||||
import Standard.Table
|
||||
|
||||
example_sort =
|
||||
table = Examples.inventory_table
|
||||
comparator a b = a.compare_to b*2
|
||||
price_rule = Table.Order_Rule "price" comparator=comparator
|
||||
table.sort by=price_rule
|
||||
sort : Text | Column.Column | Order_Rule | Vector.Vector (Text | Column.Column | Order_Rule) -> Sort_Direction -> Boolean -> Table
|
||||
sort by order=Sort_Direction.Ascending missing_last=True = Panic.recover Any <|
|
||||
rules = this.build_java_order_rules by order missing_last
|
||||
fallback_cmp = here.comparator_to_java .compare_to
|
||||
mask = OrderBuilder.buildOrderMask rules.to_array fallback_cmp
|
||||
new_table = this.java_table.applyMask mask
|
||||
Table new_table
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Transforms order rules from Enso into Java.
|
||||
|
||||
Arguments:
|
||||
- rules: The rule(s) to convert.
|
||||
- order: The sorting order.
|
||||
- missing_last: Whether or not missing values should be ordered last.
|
||||
build_java_order_rules : (Text | Column.Column. | Order_Rule | Vector (Text | Column.Column | Order_Rule)) -> Sort_Direction -> Boolean -> Vector
|
||||
build_java_order_rules rules order missing_last = case rules of
|
||||
Text -> [this.build_java_order_rule rules order missing_last]
|
||||
Column.Column _ -> [this.build_java_order_rule rules order missing_last]
|
||||
Order_Rule _ _ _ _ -> [this.build_java_order_rule rules order missing_last]
|
||||
Vector.Vector _ -> rules.map (this.build_java_order_rule _ order missing_last)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Builds a java order rule.
|
||||
|
||||
Arguments:
|
||||
- rule: The rule to convert.
|
||||
- order: The sort order.
|
||||
- missing_last: Whether or not missing values should be ordered last.
|
||||
build_java_order_rule : (Text | Column.Column | Order_Rule) -> Sort_Direction -> Boolean -> OrderRule
|
||||
build_java_order_rule rule order missing_last =
|
||||
order_bool = case order of
|
||||
Sort_Direction.Ascending -> True
|
||||
Sort_Direction.Descending -> False
|
||||
case rule of
|
||||
Text ->
|
||||
column = Panic.rethrow (this.at rule)
|
||||
OrderBuilder.OrderRule.new column.java_column Nothing order_bool missing_last
|
||||
Column.Column c ->
|
||||
OrderBuilder.OrderRule.new c Nothing order_bool missing_last
|
||||
Order_Rule col_ref cmp rule_order rule_nulls_last ->
|
||||
c = case col_ref of
|
||||
Text -> this.at col_ref . java_column
|
||||
Column.Column c -> c
|
||||
o = case rule_order of
|
||||
Nothing -> order_bool
|
||||
Sort_Direction.Ascending -> True
|
||||
Sort_Direction.Descending -> False
|
||||
nulls = case rule_nulls_last of
|
||||
Nothing -> missing_last
|
||||
_ -> rule_nulls_last
|
||||
java_cmp = case cmp of
|
||||
Nothing -> Nothing
|
||||
c -> here.comparator_to_java c
|
||||
OrderBuilder.OrderRule.new c java_cmp o nulls
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Concatenates `other` to `this`.
|
||||
@ -1414,86 +1259,6 @@ Text.write_to_spreadsheet cell = cell.setCellValue this
|
||||
which should be set by this method.
|
||||
Date.write_to_spreadsheet cell = cell.setCellValue this.internal_local_date
|
||||
|
||||
|
||||
|
||||
## Represents a table with grouped rows.
|
||||
type Aggregate_Table
|
||||
|
||||
## PRIVATE
|
||||
|
||||
A table type with grouped rows.
|
||||
|
||||
Arguments:
|
||||
- java_table: The internal representation of the table.
|
||||
type Aggregate_Table java_table
|
||||
|
||||
## Returns a vector of aggregate columns in this table.
|
||||
|
||||
> Example
|
||||
Get a vector of aggregate columns from this table.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_columns = Examples.aggregate_table.columns
|
||||
columns : Vector.Vector
|
||||
columns = Vector.Vector this.java_table.getColumns . map Column.Aggregate_Column
|
||||
|
||||
## Returns a table containing columns resulting from calling `values` on
|
||||
each column in `this`.
|
||||
|
||||
> Example
|
||||
Get the values table from an aggregate table.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_values = Examples.aggregate_table.values
|
||||
values : Table
|
||||
values = this.columns . map (_.values name_suffix='') . reduce .join
|
||||
|
||||
## Returns a column containing the number of elements in each group of the
|
||||
aggregate table.
|
||||
|
||||
> Examples
|
||||
Get the counts for an aggregate table.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_count = Examples.aggregate_table.count
|
||||
count : Column
|
||||
count = Column.Column this.java_table.count
|
||||
|
||||
## ALIAS Get a Column
|
||||
|
||||
Returns an aggregate column with the given name, contained in this table.
|
||||
|
||||
Arguments:
|
||||
- name: The name of the aggregate column to get.
|
||||
|
||||
> Example
|
||||
Get the transaction ids column from the aggregate table.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_at = Examples.aggregate_table.at "transaction_id"
|
||||
at : Text -> Column ! No_Such_Column_Error
|
||||
at name = case this.java_table.getColumnByName name of
|
||||
Nothing -> Error.throw (No_Such_Column_Error name)
|
||||
c -> Column.Aggregate_Column c
|
||||
|
||||
## Prints an ASCII-art table with this data to the standard output.
|
||||
|
||||
Arguments:
|
||||
- show_rows: the number of initial rows that should be displayed.
|
||||
|
||||
> Example
|
||||
Pretty-print and display an aggregate table in the console.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_print = Examples.aggregate_table.print
|
||||
print : Integer -> Nothing
|
||||
print show_rows=10 = this.values.print show_rows
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
An error returned when a non-existent column is being looked up.
|
||||
@ -1588,17 +1353,6 @@ print_table header rows indices_count format_term =
|
||||
" " + y
|
||||
([" " + header_line, divider] + row_lines).join '\n'
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Wraps the Enso comparator function so it's usable in Java.
|
||||
|
||||
Arguments:
|
||||
- cmp: The Enso comparator function.
|
||||
- x: The left operand to the comparator.
|
||||
- y: The right operand to the comparator.
|
||||
comparator_to_java : (Any -> Any -> Ordering) -> Any -> Any -> Integer
|
||||
comparator_to_java cmp x y = cmp x y . to_sign
|
||||
|
||||
Table.from (that : Text) (format:File_Format.Delimited|File_Format.Fixed_Width = File_Format.Delimited '\t') (on_problems:Problem_Behavior=Report_Warning) =
|
||||
java_reader = StringReader.new that
|
||||
Delimited_Reader.read_from_reader format java_reader on_problems
|
||||
|
@ -1,6 +1,5 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Data.Table as Table_Module import No_Such_Column_Error
|
||||
from Standard.Table.Data.Column as Column_Module import Column
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Name, By_Index, By_Column
|
||||
|
@ -7,7 +7,6 @@ import Standard.Table.Io.Spreadsheet
|
||||
import Standard.Table.Io.Spreadsheet_Write_Mode
|
||||
import Standard.Table.Data.Table
|
||||
import Standard.Table.Data.Column
|
||||
import Standard.Table.Data.Order_Rule
|
||||
import Standard.Table.Model
|
||||
|
||||
from Standard.Table.Io.Excel export Excel_Section, Excel_Range, read_excel
|
||||
@ -19,7 +18,6 @@ export Standard.Table.Model
|
||||
export Standard.Table.Io.File_Read
|
||||
|
||||
from Standard.Table.Data.Table export new, from_rows, join, concat, No_Such_Column_Error, Table
|
||||
from Standard.Table.Data.Order_Rule export Order_Rule
|
||||
|
||||
## ALIAS To Table
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
from Standard.Base import all
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name
|
||||
|
||||
import Standard.Table.Data.Table as Dataframe_Table
|
||||
import Standard.Table.Data.Column as Dataframe_Column
|
||||
@ -31,8 +32,8 @@ prepare_visualization x max_rows = Helpers.recover_errors <| case x of
|
||||
# Materialize a table with indices as normal columns (because dataframe does not support multi-indexing).
|
||||
df = x.reset_index.to_dataframe max_rows
|
||||
# Then split into actual columns and indices.
|
||||
vis_df = df.select (x.columns.map .name)
|
||||
indices = df.select (x.indices.map .name) . columns
|
||||
vis_df = df.select_columns (By_Name (x.columns.map .name))
|
||||
indices = df.select_columns (By_Name (x.indices.map .name)) . columns
|
||||
all_rows_count = x.row_count
|
||||
here.make_json vis_df indices all_rows_count
|
||||
|
||||
@ -43,14 +44,9 @@ prepare_visualization x max_rows = Helpers.recover_errors <| case x of
|
||||
here.prepare_visualization x.to_table max_rows
|
||||
|
||||
# We display aggregates as their ungrouped counterparts.
|
||||
Dataframe_Table.Aggregate_Table _ ->
|
||||
ungrouped = Dataframe_Table.Table x.java_table.getUnderlyingTable
|
||||
here.prepare_visualization ungrouped max_rows
|
||||
Dataframe_Column.Aggregate_Column _ ->
|
||||
ungrouped = Dataframe_Column.Column x.java_column.getColumn
|
||||
here.prepare_visualization ungrouped.to_table max_rows
|
||||
Database_Table.Aggregate_Table _ _ _ _ ->
|
||||
here.prepare_visualization x.ungrouped max_rows
|
||||
Database_Column.Aggregate_Column_Builder _ _ _ _ _ ->
|
||||
here.prepare_visualization x.ungrouped.to_table max_rows
|
||||
|
||||
|
@ -10,7 +10,6 @@ import org.enso.table.data.index.HashIndex;
|
||||
import org.enso.table.data.index.Index;
|
||||
import org.enso.table.data.index.MultiValueIndex;
|
||||
import org.enso.table.data.mask.OrderMask;
|
||||
import org.enso.table.data.table.aggregate.AggregateTable;
|
||||
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||
import org.enso.table.error.NoSuchColumnException;
|
||||
import org.enso.table.error.UnexpectedColumnTypeException;
|
||||
@ -468,11 +467,6 @@ public class Table {
|
||||
return new Table(newColumns, index);
|
||||
}
|
||||
|
||||
public AggregateTable group(String by) {
|
||||
Table t = by == null ? this : indexFromColumn(by);
|
||||
return new AggregateTable(t);
|
||||
}
|
||||
|
||||
/** @return a copy of the Column containing a slice of the original data */
|
||||
public Table slice(int offset, int limit) {
|
||||
Column[] newColumns = new Column[columns.length];
|
||||
|
@ -1,58 +0,0 @@
|
||||
package org.enso.table.data.table.aggregate;
|
||||
|
||||
import org.enso.table.data.column.operation.aggregate.Aggregator;
|
||||
import org.enso.table.data.index.Index;
|
||||
import org.enso.table.data.table.Column;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/** A column wrapper used for aggregation operations. */
|
||||
public class AggregateColumn {
|
||||
private final Index uniqueIndex;
|
||||
private final Column column;
|
||||
|
||||
/**
|
||||
* Creates a new column
|
||||
*
|
||||
* @param uniqueIndex the unique index obtained from the column's index
|
||||
* @param column the wrapped column
|
||||
*/
|
||||
public AggregateColumn(Index uniqueIndex, Column column) {
|
||||
this.uniqueIndex = uniqueIndex;
|
||||
this.column = column;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregates the groups using a given aggregation operation.
|
||||
*
|
||||
* @param aggName name of a vectorized operation that can be used if possible. If null is passed,
|
||||
* this parameter is unused.
|
||||
* @param outSuffix a string appended to the name of the resulting column.
|
||||
* @param aggregatorFunction the function to use if a vectorized operation is not available.
|
||||
* @param skipNa whether missing values should be passed to the {@code fallback} function.
|
||||
* @return a column indexed by the unique index of this aggregate, storing results of applying the
|
||||
* specified operation.
|
||||
*/
|
||||
public Column aggregate(
|
||||
String aggName,
|
||||
String outSuffix,
|
||||
Function<List<Object>, Object> aggregatorFunction,
|
||||
boolean skipNa) {
|
||||
Aggregator aggregator =
|
||||
column.getStorage().getAggregator(aggName, aggregatorFunction, skipNa, uniqueIndex.size());
|
||||
|
||||
for (int i = 0; i < uniqueIndex.size(); i++) {
|
||||
IntStream ixes =
|
||||
column.getIndex().loc(uniqueIndex.iloc(i)).stream().mapToInt(Integer::intValue);
|
||||
aggregator.nextGroup(ixes);
|
||||
}
|
||||
return new Column(column.getName() + outSuffix, uniqueIndex, aggregator.seal());
|
||||
}
|
||||
|
||||
/** @return the underlying (ungrouped) column. */
|
||||
public Column getColumn() {
|
||||
return column;
|
||||
}
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
package org.enso.table.data.table.aggregate;
|
||||
|
||||
import org.enso.table.data.column.storage.LongStorage;
|
||||
import org.enso.table.data.index.Index;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.Table;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/** Represents a table grouped by a given index. */
|
||||
public class AggregateTable {
|
||||
private final Table table;
|
||||
private final Index uniqueIndex;
|
||||
|
||||
/** @param table the underlying table */
|
||||
public AggregateTable(Table table) {
|
||||
this.table = table;
|
||||
this.uniqueIndex = table.getIndex().unique();
|
||||
}
|
||||
|
||||
/** @return a column containing group sizes in this aggregate. */
|
||||
public Column count() {
|
||||
long[] counts = new long[uniqueIndex.size()];
|
||||
for (int i = 0; i < uniqueIndex.size(); i++) {
|
||||
List<Integer> items = table.getIndex().loc(uniqueIndex.iloc(i));
|
||||
counts[i] = items == null ? 0 : items.size();
|
||||
}
|
||||
LongStorage storage = new LongStorage(counts);
|
||||
return new Column("count", uniqueIndex, storage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a column with the given name.
|
||||
*
|
||||
* @param n the column name
|
||||
* @return column with the given name or null if does not exist
|
||||
*/
|
||||
public AggregateColumn getColumnByName(String n) {
|
||||
Column c = table.getColumnByName(n);
|
||||
if (c == null) {
|
||||
return null;
|
||||
} else {
|
||||
return new AggregateColumn(uniqueIndex, c);
|
||||
}
|
||||
}
|
||||
|
||||
/** @return Aggregate columns contained in this table. */
|
||||
public AggregateColumn[] getColumns() {
|
||||
return Arrays.stream(table.getColumns())
|
||||
.map(c -> new AggregateColumn(uniqueIndex, c))
|
||||
.toArray(AggregateColumn[]::new);
|
||||
}
|
||||
|
||||
/** @return the underlying (ungrouped) table. */
|
||||
public Table getUnderlyingTable() {
|
||||
return table;
|
||||
}
|
||||
}
|
1
test/Examples_Tests/README.md
Normal file
1
test/Examples_Tests/README.md
Normal file
@ -0,0 +1 @@
|
||||
This is a set of tests for the `Examples` library for Enso.
|
7
test/Examples_Tests/package.yaml
Normal file
7
test/Examples_Tests/package.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
name: Tests
|
||||
namespace: enso_dev
|
||||
enso-version: default
|
||||
version: 0.0.1
|
||||
license: MIT
|
||||
author: enso-dev@enso.org
|
||||
maintainer: enso-dev@enso.org
|
@ -112,17 +112,11 @@ spec = Test.group "Examples" <|
|
||||
Examples.text_column_1
|
||||
Examples.text_column_2
|
||||
|
||||
Test.specify "should provide an aggregate column" <|
|
||||
Examples.aggregate_column
|
||||
|
||||
Test.specify "should provide various example tables" <|
|
||||
Examples.inventory_table
|
||||
Examples.popularity_table
|
||||
Examples.transactions_table
|
||||
|
||||
Test.specify "should provide an aggregate table" <|
|
||||
Examples.aggregate_table
|
||||
|
||||
Test.specify "should provide an example of a regex match" <|
|
||||
match = Examples.match
|
||||
match.groups.length . should_equal 5
|
8
test/Examples_Tests/src/Main.enso
Normal file
8
test/Examples_Tests/src/Main.enso
Normal file
@ -0,0 +1,8 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Test
|
||||
|
||||
import project.Examples_Spec
|
||||
|
||||
main = Test.Suite.run_main <|
|
||||
Examples_Spec.spec
|
@ -830,7 +830,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te
|
||||
table = table_builder [["A", ["foo", "bar", "foo", "foo"]], ["B", ["a", "b", "c", "d"]]]
|
||||
result = table.aggregate [Group_By "A", (Concatenate "B" prefix="[[" suffix="]]" separator="; ")]
|
||||
result.row_count . should_equal 2
|
||||
materialized = materialize result . sort "A"
|
||||
materialized = materialize result . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"])
|
||||
materialized.columns.length . should_equal 2
|
||||
materialized.columns.at 0 . name . should_equal "A"
|
||||
materialized.columns.at 0 . to_vector . should_equal ["bar", "foo"]
|
||||
@ -910,14 +910,14 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te
|
||||
|
||||
r1 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=True)]
|
||||
r1.row_count . should_equal 2
|
||||
m1 = materialize r1 . sort "G"
|
||||
m1 = materialize r1 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"])
|
||||
m1.columns.length . should_equal 2
|
||||
m1.columns.first.to_vector . should_equal ["bar", "foo"]
|
||||
m1.columns.second.to_vector . should_equal [0, 1]
|
||||
|
||||
r2 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=False)]
|
||||
r2.row_count . should_equal 2
|
||||
m2 = materialize r2 . sort "G"
|
||||
m2 = materialize r2 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"])
|
||||
m2.columns.length . should_equal 2
|
||||
m2.columns.first.to_vector . should_equal ["bar", "foo"]
|
||||
m2.columns.second.to_vector . should_equal [1, 2]
|
||||
@ -959,7 +959,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te
|
||||
|
||||
r2 = table.aggregate [Group_By "G", Average "X"]
|
||||
r2.row_count.should_equal 2
|
||||
m2 = materialize r2 . sort "G"
|
||||
m2 = materialize r2 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"])
|
||||
m2.columns.length . should_equal 2
|
||||
m2.columns.first.to_vector . should_equal ["a", "b"]
|
||||
m2.columns.second.to_vector . should_equal [0.5, 1]
|
||||
@ -1145,7 +1145,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te
|
||||
table = table_builder [["A", [1, 1, 2, 1]], ["B", [3, 2, 2, 3]], ["C", [11, 12, 13, 14]]]
|
||||
grouped = table.aggregate [Group_By "B", Group_By "A"]
|
||||
grouped.row_count . should_equal 3
|
||||
materialized = materialize grouped . sort ["A", "B"]
|
||||
materialized = materialize grouped . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B"])
|
||||
materialized.columns.length . should_equal 2
|
||||
materialized.columns.at 1 . name . should_equal "A"
|
||||
materialized.columns.at 1 . to_vector . should_equal [1, 1, 2]
|
||||
|
@ -1,14 +1,19 @@
|
||||
from Standard.Base import all
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name
|
||||
import Standard.Table.Data.Sort_Column_Selector
|
||||
import Standard.Table.Data.Sort_Column
|
||||
|
||||
import project.Database.Helpers.Fake_Test_Connection
|
||||
import Standard.Database.Data.Dialect
|
||||
import Standard.Database.Data.Table as Table_Module
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Data.Sql import Sql_Type
|
||||
from Standard.Table import No_Such_Column_Error, Order_Rule
|
||||
from Standard.Table import No_Such_Column_Error
|
||||
from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
|
||||
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
|
||||
|
||||
spec =
|
||||
@ -38,7 +43,7 @@ spec =
|
||||
Test.group "[Codegen] Basic Select" <|
|
||||
Test.specify "should select columns from a table" <|
|
||||
t1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1"', []]
|
||||
t2 = t1.select ["C", "B", "undefined"]
|
||||
t2 = t1.select_columns (By_Name ["C", "B", "undefined"]) reorder=True
|
||||
t2.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B" FROM "T1" AS "T1"', []]
|
||||
|
||||
foo = t1.at "A" . rename "FOO"
|
||||
@ -47,7 +52,7 @@ spec =
|
||||
t3 = t2.set "bar" foo
|
||||
t3.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B", "T1"."A" AS "bar" FROM "T1" AS "T1"', []]
|
||||
|
||||
Test.specify "should fail if at is called for a nonexisting column" <|
|
||||
Test.specify "should fail if at is called for a non-existent column" <|
|
||||
t1.at "undefined" . should_fail_with No_Such_Column_Error
|
||||
|
||||
Test.specify "should allow to limit the amount of returned results" <|
|
||||
@ -55,7 +60,7 @@ spec =
|
||||
t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" LIMIT 5', []]
|
||||
|
||||
Test.specify "should work correctly when there are no columns" <|
|
||||
empty = t1.select []
|
||||
empty = t1.select_columns (By_Name [])
|
||||
json = Json.from_pairs [["query", Nothing], ["message", "The table has no columns so a query cannot be generated."]]
|
||||
empty.to_json . should_equal json
|
||||
empty.columns.length . should_equal 0
|
||||
@ -146,28 +151,26 @@ spec =
|
||||
|
||||
Test.group "[Codegen] Sorting" <|
|
||||
Test.specify "should allow sorting by a single column name" <|
|
||||
r1 = t1.sort by="A" . at "B"
|
||||
r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST', []]
|
||||
r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . at "B"
|
||||
r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC', []]
|
||||
|
||||
r2 = t1.sort by="B" missing_last=False order=Sort_Direction.Descending . at "A"
|
||||
r2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" ORDER BY "T1"."B" DESC NULLS FIRST', []]
|
||||
r2 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "B" Sort_Direction.Descending]) . at "A"
|
||||
r2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" ORDER BY "T1"."B" DESC', []]
|
||||
|
||||
Test.specify 'should allow sorting by multiple column names' <|
|
||||
r1 = t1.sort by=['A', 'B']
|
||||
r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" ASC NULLS LAST', []]
|
||||
|
||||
Test.specify 'should allow sorting by expressions' <|
|
||||
sum = t1.at 'A' + t1.at 'B'
|
||||
r1 = t1.sort by=sum . at "C"
|
||||
r1.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY ("T1"."A" + "T1"."B") ASC NULLS LAST', []]
|
||||
r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'A', Sort_Column.Name 'B'])
|
||||
r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC, "T1"."B" ASC', []]
|
||||
|
||||
Test.specify 'should allow sorting with specific by-column rules' <|
|
||||
r1 = t1.sort by=['A', (Order_Rule 'B' order=Sort_Direction.Descending)]
|
||||
r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" DESC NULLS LAST', []]
|
||||
r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B" Sort_Direction.Descending])
|
||||
r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC, "T1"."B" DESC', []]
|
||||
|
||||
Test.specify 'should return dataflow error when passed a non-existent column' <|
|
||||
r = t1.sort by='foobar'
|
||||
r.should_fail_with No_Such_Column_Error
|
||||
Test.specify 'should return warnings and errors when passed a non-existent column' <|
|
||||
action = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_
|
||||
tester table =
|
||||
table.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1"', []]
|
||||
problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.group "Helpers" <|
|
||||
Test.specify "combine_names should combine lists of names" <|
|
||||
|
@ -1,7 +1,14 @@
|
||||
from Standard.Base import all
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name
|
||||
|
||||
from Standard.Database import all
|
||||
|
||||
import Standard.Table.Data.Table as Materialized_Table
|
||||
import Standard.Table.Data.Sort_Column_Selector
|
||||
import Standard.Table.Data.Sort_Column
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
|
||||
import project.Database.Helpers.Name_Generator
|
||||
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
@ -46,7 +53,7 @@ spec prefix connection pending=Nothing =
|
||||
ix2.name . should_equal 'a'
|
||||
ix2.to_vector . should_equal [1, 4]
|
||||
Test.specify "should work correctly when there are no columns" <|
|
||||
empty = t1.select []
|
||||
empty = t1.select_columns (By_Name [])
|
||||
empty.to_dataframe.columns.length . should_equal 0
|
||||
empty.to_dataframe.row_count . should_equal empty.row_count
|
||||
Test.specify "should handle bigger result sets" <|
|
||||
@ -125,24 +132,24 @@ spec prefix connection pending=Nothing =
|
||||
the Dataframes library, so it is independent of the library under
|
||||
testing here.
|
||||
Test.specify "should allow joining tables index-on-index" <|
|
||||
r_1 = a.set_index 'x' . join (b.set_index 'w') . to_dataframe . sort by=['y', 'z']
|
||||
r_1 = a.set_index 'x' . join (b.set_index 'w') . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z'])
|
||||
r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam']
|
||||
r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo']
|
||||
|
||||
r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . to_dataframe . sort by=['x', 'w']
|
||||
r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w'])
|
||||
r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6]
|
||||
r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3]
|
||||
|
||||
Test.specify "should allow joining tables column-on-index" <|
|
||||
r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . sort by=['y', 'z']
|
||||
r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z'])
|
||||
r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam']
|
||||
r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo']
|
||||
r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . sort by=['x', 'w']
|
||||
r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w'])
|
||||
r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6]
|
||||
r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3]
|
||||
|
||||
Test.specify "should allow self-joins and append suffixes to disambiguate column names" <|
|
||||
r_1 = a.join (a.set_index 'x') on='x' . to_dataframe . sort by='x'
|
||||
r_1 = a.join (a.set_index 'x') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x'])
|
||||
r_1.columns.map .name . should_equal ['x', 'y_left', 'y_right']
|
||||
r_1.at 'x' . to_vector . should_equal [0, 1, 3, 6, 7]
|
||||
expected_y = ['foo', 'bar', 'spam', 'eggs', 'baz']
|
||||
@ -159,8 +166,8 @@ spec prefix connection pending=Nothing =
|
||||
ta_2 = ta.set_index "id"
|
||||
tb_2 = tb.set_index "id"
|
||||
res = (tc.join ta_2 on="id_a") . join tb_2 on="id_b" left_suffix="_a" right_suffix="_b"
|
||||
sel = res.select ["name_a", "name_b"]
|
||||
df = sel.to_dataframe . sort by="name_a"
|
||||
sel = res.select_columns (By_Name ["name_a", "name_b"])
|
||||
df = sel.to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "name_a"])
|
||||
df . at "name_a" . to_vector . should_equal ["Foo", "Hmm"]
|
||||
df . at "name_b" . to_vector . should_equal ["Bar", "Hmm"]
|
||||
|
||||
@ -201,41 +208,6 @@ spec prefix connection pending=Nothing =
|
||||
empty.columns.length . should_equal 0
|
||||
empty.to_dataframe.columns.length . should_equal 0
|
||||
|
||||
Test.group prefix+"Old Aggregation" pending=pending <|
|
||||
t = upload "T6" <|
|
||||
Materialized_Table.new [["name", ["foo", "bar", "foo", "baz", "foo", "bar", "quux"]], ["price", [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]], ["quantity", [10, 20, 30, 40, 50, 60, 70]]]
|
||||
agg = t.group by='name'
|
||||
## A helper which makes sure that the groups are ordered according to the index, using the Table library
|
||||
determinize col =
|
||||
df = col.to_dataframe.to_table
|
||||
df.sort by=df.index . at col.name
|
||||
|
||||
Test.specify "should allow counting group sizes" <|
|
||||
determinize agg.count . to_vector . should_equal [2, 1, 3, 1]
|
||||
|
||||
Test.specify "should allow aggregating columns with basic arithmetic aggregators" <|
|
||||
determinize (agg.at 'price' . mean) . to_vector . should_equal [50.25, 6.7, 0.4, Nothing]
|
||||
determinize (agg.at 'price' . min) . to_vector . should_equal [3.5, 6.7, 0.4, Nothing]
|
||||
determinize (agg.at 'price' . max) . to_vector . should_equal [97, 6.7, 0.4, Nothing]
|
||||
|
||||
Test.specify "should allow to join multiple aggregations" <|
|
||||
m1 = agg.at 'price' . mean
|
||||
m2 = agg.at 'quantity' . max
|
||||
df = (m1.join m2).to_dataframe
|
||||
df2 = df.sort by=df.index
|
||||
df2.at 'price_mean' . to_vector . should_equal [50.25, 6.7, 0.4, Nothing]
|
||||
df2.at 'quantity_max' . to_vector . should_equal [60, 40, 50, 70]
|
||||
|
||||
Test.specify "should correctly compute the result size" <|
|
||||
m = agg.at 'price' . mean
|
||||
m.length . should_equal m.to_vector.length
|
||||
m.length . should_equal 4
|
||||
|
||||
Test.specify "should correctly count values" <|
|
||||
m = agg.at 'price' . mean
|
||||
m.count . should_equal 3
|
||||
m.count_missing . should_equal 1
|
||||
|
||||
Test.group prefix+"Column-wide statistics" pending=pending <|
|
||||
Test.specify 'should allow computing basic column-wide stats' <|
|
||||
t7 = upload "T7" <|
|
||||
@ -251,38 +223,29 @@ spec prefix connection pending=Nothing =
|
||||
Materialized_Table.new [["id", [1,2,3,4,5,6]], ["name", ["shoes","trousers","dress","skirt","blouse","t-shirt"]], ["quantity", [20,10,20,10,30,30]], ["rating", [3.0,Nothing,7.3,3.0,2.2,Nothing]], ["price", [37.2,42.1,64.1,87.4,13.5,64.2]]]
|
||||
|
||||
Test.specify "should allow sorting by a single column name" <|
|
||||
r_1 = df.sort by="quantity"
|
||||
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'quantity'])
|
||||
r_1.at 'id' . to_vector . should_equal [2,4,1,3,5,6]
|
||||
|
||||
r_2 = df.sort by="rating" missing_last=False
|
||||
r_2.at 'id' . to_vector . should_equal [2,6,5,1,4,3]
|
||||
|
||||
r_3 = df.sort by="rating" missing_last=False order=Sort_Direction.Descending
|
||||
r_3.at 'id' . to_vector . should_equal [2,6,3,1,4,5]
|
||||
r_3 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending])
|
||||
r_3.at 'id' . to_vector . should_equal [3,1,4,5,2,6]
|
||||
|
||||
Test.specify 'should allow sorting by multiple column names' <|
|
||||
r_1 = df.sort by=['quantity', 'rating']
|
||||
r_1.at 'id' . to_vector . should_equal [4,2,1,3,5,6]
|
||||
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'quantity', Sort_Column.Name 'rating'])
|
||||
r_1.at 'id' . to_vector . should_equal [2,4,1,3,6,5]
|
||||
|
||||
r_2 = df.sort by=['rating', 'quantity'] missing_last=False order=Sort_Direction.Descending
|
||||
r_2.at 'id' . to_vector . should_equal [6,2,3,1,4,5]
|
||||
|
||||
Test.specify 'should allow sorting by external columns' <|
|
||||
quality_ratio = df.at 'rating' / df.at 'price'
|
||||
|
||||
r_1 = df.sort by=quality_ratio
|
||||
r_1.at 'id' . to_vector . should_equal [4,1,3,5,2,6]
|
||||
|
||||
r_2 = df.sort by=['quantity', quality_ratio]
|
||||
r_2.at 'id' . to_vector . should_equal [4,2,1,3,5,6]
|
||||
r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending, Sort_Column.Name 'quantity' Sort_Direction.Descending])
|
||||
r_2.at 'id' . to_vector . should_equal [3,1,4,5,6,2]
|
||||
|
||||
Test.specify 'should allow sorting with specific by-column rules' <|
|
||||
r_1 = df.sort by=['quantity', (Order_Rule 'price' order=Sort_Direction.Descending)]
|
||||
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "quantity", Sort_Column.Name "price" Sort_Direction.Descending])
|
||||
r_1.at 'id' . to_vector . should_equal [4,2,3,1,6,5]
|
||||
|
||||
Test.specify 'should return dataflow error when passed a non-existent column' <|
|
||||
r = df.sort by='foobar'
|
||||
r.should_fail_with No_Such_Column_Error
|
||||
Test.specify 'should return warnings and errors when passed a non-existent column' <|
|
||||
action = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_
|
||||
tester table =
|
||||
table.at 'id' . to_vector . should_equal [1,2,3,4,5,6]
|
||||
problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <|
|
||||
ints = [1, 2, 3, 4, 5]
|
||||
@ -291,7 +254,7 @@ spec prefix connection pending=Nothing =
|
||||
texts = ["foo", "foo", "bar", "baz", "spam"]
|
||||
df = upload "T8" <|
|
||||
Materialized_Table.new [["ord", [0,3,2,4,1]], ["ints", ints], ["reals", reals], ["bools", bools], ["texts", texts]]
|
||||
r = df.sort by='ord'
|
||||
r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'ord'])
|
||||
|
||||
r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4]
|
||||
df.at 'ints' . to_vector . should_equal ints
|
||||
@ -309,22 +272,17 @@ spec prefix connection pending=Nothing =
|
||||
c = df.at 'rating'
|
||||
|
||||
r_1 = c.sort
|
||||
r_1.to_vector.should_equal [2.2, 3.0, 3.0, 7.3, Nothing, Nothing]
|
||||
r_1.to_vector.should_equal [Nothing, Nothing, 2.2, 3.0, 3.0, 7.3]
|
||||
|
||||
r_2 = c.sort order=Sort_Direction.Descending
|
||||
r_2.to_vector.should_equal [7.3, 3.0, 3.0, 2.2, Nothing, Nothing]
|
||||
|
||||
r_3 = c.sort order=Sort_Direction.Descending missing_last=False
|
||||
r_3.to_vector.should_equal [Nothing, Nothing, 7.3, 3.0, 3.0, 2.2]
|
||||
|
||||
Test.group prefix+"Index" pending=pending <|
|
||||
t0 = upload "Tix" <|
|
||||
Materialized_Table.new [["ix", [1,2,3]], ["c1", [4,5,6]]]
|
||||
t = t0.set_index 'ix'
|
||||
Test.specify "should be accessible by `at` like other columns" <|
|
||||
t.at 'ix' . to_vector . should_equal t.index.to_vector
|
||||
Test.specify "should be accessible by `select` like other columns" <|
|
||||
t.select ['ix'] . columns . first . to_vector . should_equal t.index.to_vector
|
||||
Test.specify "treated as a column indexed by itself should still correctly compute values" <|
|
||||
col = t.index+10
|
||||
vec = [11, 12, 13]
|
||||
@ -360,7 +318,7 @@ spec prefix connection pending=Nothing =
|
||||
(InMemory) table are ordered according to a specified column or list
|
||||
of columns.
|
||||
determinize_by order_column table =
|
||||
table.sort by=order_column
|
||||
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name order_column])
|
||||
|
||||
Test.specify "should allow counting group sizes and elements" <|
|
||||
aggregates = [Count Nothing, Count_Not_Nothing "price", Count_Nothing "price"]
|
||||
|
@ -1,5 +1,7 @@
|
||||
from Standard.Base import all
|
||||
from Standard.Table import all
|
||||
import Standard.Table.Data.Sort_Column_Selector
|
||||
import Standard.Table.Data.Sort_Column
|
||||
|
||||
from Standard.Table.Data.Table as Table_Internal import Empty_Error
|
||||
|
||||
@ -8,7 +10,7 @@ import Standard.Table.Data.Storage
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
import Standard.Visualization
|
||||
from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names
|
||||
from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, No_Input_Columns_Selected, Missing_Input_Columns
|
||||
|
||||
import project.Common_Table_Spec
|
||||
|
||||
@ -321,26 +323,6 @@ spec =
|
||||
i.at "Items Count" . to_vector . should_equal [3, 2, 4]
|
||||
i.at "Storage Type" . to_vector . should_equal [Storage.Text, Storage.Integer, Storage.Any]
|
||||
|
||||
Test.group "Aggregation" <|
|
||||
name = ['name', ["foo", "bar", "foo", "baz", "foo", "bar", "quux"]]
|
||||
price = ['price', [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]]
|
||||
quantity = ['quantity', [10, 20, 30, 40, 50, 60, 70]]
|
||||
t = Table.new [name, price, quantity]
|
||||
agg = t.group by='name'
|
||||
|
||||
Test.specify "should allow counting group sizes" <|
|
||||
agg.count.to_vector.should_equal [3, 2, 1, 1]
|
||||
|
||||
Test.specify "should allow aggregating columns with basic arithmetic aggregators" <|
|
||||
agg.at 'price' . mean . to_vector . should_equal [0.4, 50.25, 6.7, Nothing]
|
||||
agg.at 'price' . min . to_vector . should_equal [0.4, 3.5, 6.7, Nothing]
|
||||
|
||||
Test.specify "should allow aggregating with user-defined aggregate functions" <|
|
||||
median vec =
|
||||
sorted = vec.sort
|
||||
if sorted.is_empty then Nothing else sorted.at (sorted.length-1 / 2).floor
|
||||
agg.at 'quantity' . reduce median . to_vector . should_equal [30, 20, 40, 70]
|
||||
|
||||
Test.group "Column-wide statistics" <|
|
||||
Test.specify 'should allow computing basic column-wide stats' <|
|
||||
price = Column.from_vector 'price' [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]
|
||||
@ -353,54 +335,36 @@ spec =
|
||||
df = (Enso_Project.data / "clothes.csv").read
|
||||
|
||||
Test.specify "should allow sorting by a single column name" <|
|
||||
r_1 = df.sort by="Quantity"
|
||||
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Quantity"])
|
||||
r_1.at 'Id' . to_vector . should_equal [2,4,1,3,5,6]
|
||||
|
||||
r_2 = df.sort by="Rating" missing_last=False
|
||||
r_2.at 'Id' . to_vector . should_equal [2,6,5,1,4,3]
|
||||
|
||||
r_3 = df.sort by="Rating" missing_last=False order=Sort_Direction.Descending
|
||||
r_3.at 'Id' . to_vector . should_equal [2,6,3,1,4,5]
|
||||
r_3 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Rating" Sort_Direction.Descending])
|
||||
r_3.at 'Id' . to_vector . should_equal [3,1,4,5,2,6]
|
||||
|
||||
Test.specify 'should allow sorting by multiple column names' <|
|
||||
r_1 = df.sort by=['Quantity', 'Rating']
|
||||
r_1.at 'Id' . to_vector . should_equal [4,2,1,3,5,6]
|
||||
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating'])
|
||||
r_1.at 'Id' . to_vector . should_equal [2,4,1,3,6,5]
|
||||
|
||||
r_2 = df.sort by=['Rating', 'Quantity'] missing_last=False order=Sort_Direction.Descending
|
||||
r_2.at 'Id' . to_vector . should_equal [6,2,3,1,4,5]
|
||||
|
||||
Test.specify 'should allow sorting by external columns' <|
|
||||
quality_ratio = df.at 'Rating' / df.at 'Price'
|
||||
|
||||
r_1 = df.sort by=quality_ratio
|
||||
r_1.at 'Id' . to_vector . should_equal [4,1,3,5,2,6]
|
||||
|
||||
r_2 = df.sort by=['Quantity', quality_ratio]
|
||||
r_2.at 'Id' . to_vector . should_equal [4,2,1,3,5,6]
|
||||
r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Rating' Sort_Direction.Descending, Sort_Column.Name 'Quantity' Sort_Direction.Descending])
|
||||
r_2.at 'Id' . to_vector . should_equal [3,1,4,5,6,2]
|
||||
|
||||
Test.specify 'should allow sorting with specific by-column rules' <|
|
||||
r_1 = df.sort by=['Quantity', (Order_Rule 'Price' order=Sort_Direction.Descending)]
|
||||
r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Quantity", Sort_Column.Name "Price" Sort_Direction.Descending])
|
||||
r_1.at 'Id' . to_vector . should_equal [4,2,3,1,6,5]
|
||||
|
||||
Test.specify 'should respect defined comparison operations for custom types' <|
|
||||
c_1 = ['id', [1, 2, 3, 4, 5, 6]]
|
||||
c_2 = ['val', [My 1 2, My 3 4, My 2 1, My 5 2, My 7 0, My 4 -1]]
|
||||
df = Table.new [c_1, c_2]
|
||||
r = df.sort by='val'
|
||||
r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'val'])
|
||||
r.at 'id' . to_vector . should_equal [1,3,6,2,4,5]
|
||||
|
||||
Test.specify 'should allow passing a custom comparator per column and should missing-proof it' <|
|
||||
c_1 = ['id', [1, 2, 3, 4, 5, 6]]
|
||||
c_2 = ['val', [My 1 2, My 2 5, My 3 4, My 6 3, Nothing, My 1 0]]
|
||||
df = Table.new [c_1, c_2]
|
||||
|
||||
cmp a b = (a.x-a.y).abs . compare_to (b.x-b.y).abs
|
||||
r = df.sort by=(Order_Rule 'val' comparator=cmp)
|
||||
r.at 'id' . to_vector . should_equal [1,3,6,2,4,5]
|
||||
|
||||
Test.specify 'should return dataflow error when passed a non-existent column' <|
|
||||
r = df.sort by='foobar'
|
||||
r.should_fail_with No_Such_Column_Error
|
||||
Test.specify 'should return warnings and errors when passed a non-existent column' <|
|
||||
action = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_
|
||||
tester table =
|
||||
table.at 'Id' . to_vector . should_equal [1,2,3,4,5,6]
|
||||
problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <|
|
||||
ord = [0, 3, 2, 4, 1]
|
||||
@ -411,7 +375,7 @@ spec =
|
||||
objs = [Cons 1 2, Cons 2 3, Cons 6 7, Cons 8 9, Cons 10 30]
|
||||
|
||||
df = Table.new [['ord', ord], ['ints', ints], ['reals', reals], ['bools', bools], ['texts', texts], ['objs', objs]]
|
||||
r = df.sort by='ord'
|
||||
r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'ord'])
|
||||
|
||||
r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4]
|
||||
df.at 'ints' . to_vector . should_equal ints
|
||||
@ -507,9 +471,6 @@ spec =
|
||||
i = t.index
|
||||
c.to_vector . should_equal i.to_vector
|
||||
|
||||
Test.specify "should be accessible by `select` like other columns" <|
|
||||
t.select ['ix'] . columns . first . to_vector . should_equal t.index.to_vector
|
||||
|
||||
Test.specify "should be able to be set by column" <|
|
||||
with_index = t.set_index c
|
||||
with_index.index.to_vector . should_equal c.to_vector
|
||||
|
@ -60,8 +60,6 @@ import project.System.File_Spec
|
||||
import project.System.Process_Spec
|
||||
import project.System.Reporting_Stream_Decoder_Spec
|
||||
|
||||
import project.Examples_Spec
|
||||
|
||||
main = Test.Suite.run_main <|
|
||||
Any_Spec.spec
|
||||
Array_Spec.spec
|
||||
@ -70,7 +68,6 @@ main = Test.Suite.run_main <|
|
||||
Conversion_Spec.spec
|
||||
Deep_Export_Spec.spec
|
||||
Error_Spec.spec
|
||||
Examples_Spec.spec
|
||||
File_Spec.spec
|
||||
Reporting_Stream_Decoder_Spec.spec
|
||||
Http_Header_Spec.spec
|
||||
|
@ -3,6 +3,7 @@ from Standard.Base import all
|
||||
from Standard.Database import all
|
||||
import Standard.Database.Data.Table as Database_Table
|
||||
import Standard.Table.Data.Table as Dataframe_Table
|
||||
from Standard.Table.Data.Aggregate_Column import Group_By, Average
|
||||
import Standard.Visualization.Table.Visualization as Visualization
|
||||
import Standard.Test
|
||||
|
||||
@ -47,19 +48,9 @@ visualization_spec connection =
|
||||
json = make_json header=["A"] data=[['a', 'a']] all_rows=3 ixes_header=[] ixes=[]
|
||||
vis . should_equal json
|
||||
|
||||
g = t.group by=["A", "B"] . at "C" . mean
|
||||
g = t.aggregate [Group_By "A", Group_By "B", Average "C"] . at "Average C"
|
||||
vis2 = Visualization.prepare_visualization g 1
|
||||
json2 = make_json header=["C_mean"] data=[[4]] all_rows=2 ixes_header=["A", "B"] ixes=[['a'], [2]]
|
||||
vis2 . should_equal json2
|
||||
|
||||
Test.specify "should visualize database aggregates" <|
|
||||
agg = t.group by="A"
|
||||
vis = Visualization.prepare_visualization agg 1
|
||||
json = make_json header=["B", "C"] data=[[2], [3]] all_rows=3 ixes_header=["A"] ixes=[['a']]
|
||||
vis . should_equal json
|
||||
|
||||
vis2 = Visualization.prepare_visualization (agg.at "C") 1
|
||||
json2 = make_json header=["C"] data=[[3]] all_rows=3 ixes_header=["A"] ixes=[['a']]
|
||||
json2 = make_json header=["Average C"] data=[[4.0]] all_rows=2 ixes_header=[] ixes=[]
|
||||
vis2 . should_equal json2
|
||||
|
||||
t2 = Dataframe_Table.new [["A", [1, 2, 3]], ["B", [4, 5, 6]], ["C", [7, 8, 9]]]
|
||||
@ -78,21 +69,6 @@ visualization_spec connection =
|
||||
json = make_json header=["A"] data=[[1, 2]] all_rows=3 ixes_header=[""] ixes=[[0, 1]]
|
||||
vis . should_equal json
|
||||
|
||||
g = t2.group by="A" . at "C" . mean
|
||||
vis2 = Visualization.prepare_visualization g 1
|
||||
json2 = make_json header=["C_mean"] data=[[7.0]] all_rows=3 ixes_header=["A"] ixes=[[1]]
|
||||
vis2 . should_equal json2
|
||||
|
||||
Test.specify "should visualize dataframe aggregates" <|
|
||||
agg = t2.group by="A"
|
||||
vis = Visualization.prepare_visualization agg 1
|
||||
json = make_json header=["B", "C"] data=[[4], [7]] all_rows=3 ixes_header=["A"] ixes=[[1]]
|
||||
vis . should_equal json
|
||||
|
||||
vis2 = Visualization.prepare_visualization (agg.at "C") 1
|
||||
json2 = make_json header=["C"] data=[[7]] all_rows=3 ixes_header=["A"] ixes=[[1]]
|
||||
vis2 . should_equal json2
|
||||
|
||||
|
||||
Test.specify "should handle Vectors" <|
|
||||
vis = Visualization.prepare_visualization [1, 2, 3] 2
|
||||
|
Loading…
Reference in New Issue
Block a user