Add support for Blank_Columns to Table and Database (#3812)

Implements https://www.pivotaltracker.com/story/show/183390281 and https://www.pivotaltracker.com/story/show/183390394
This commit is contained in:
Radosław Waśko 2022-10-20 11:11:08 +02:00 committed by GitHub
parent 81e5e77ae8
commit cc76e7d36a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 478 additions and 259 deletions

View File

@ -214,6 +214,9 @@
- [Extended `Filter_Condition` with `Is_In` and `Not_In`.][3790]
- [Replaced `Table.drop_missing_rows` with `filter_blank_rows` with an updated
API.][3805]
- [Replaced `Table.drop_missing_columns` with
`Table.remove_columns Column_Selector.Blank_Columns` by adding the new column
selector variant.][3812]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -344,6 +347,7 @@
[3793]: https://github.com/enso-org/enso/pull/3793
[3790]: https://github.com/enso-org/enso/pull/3790
[3805]: https://github.com/enso-org/enso/pull/3805
[3812]: https://github.com/enso-org/enso/pull/3812
#### Enso Compiler

View File

@ -259,8 +259,8 @@ type Any
from Standard.Base import all
example_catch =
error = Error.throw (Illegal_Argument_Error "My message")
error.catch Illegal_Argument_Error (err -> err.message)
error = Error.throw (Illegal_Argument_Error_Data "My message")
error.catch Illegal_Argument_Error_Data (err -> err.message)
> Example
Catching any dataflow error and turning it into a regular value.

View File

@ -160,7 +160,7 @@ type Utf_16_Span
Utility function taking a range pointing at grapheme clusters and converting
to a range on the underlying code units.
range_to_char_indices : Text -> Range -> Range ! (Index_Out_Of_Bounds_Error | Illegal_Argument_Error)
range_to_char_indices text range = if range.step != 1 then Error.throw (Illegal_Argument_Error "Text indexing only supports ranges with step equal to 1.") else
range_to_char_indices text range = if range.step != 1 then Error.throw (Illegal_Argument_Error_Data "Text indexing only supports ranges with step equal to 1.") else
len = text.length
start = if range.start < 0 then range.start + len else range.start
end = if range.end == Nothing then len else (if range.end < 0 then range.end + len else range.end)

View File

@ -212,7 +212,7 @@ type Date
## Returns the century of the date.
century : Integer
century self = if self.year > 0 then (self.year - 1).div 100 + 1 else
Error.throw (Illegal_Argument_Error "Century can only be given for AD years.")
Error.throw (Illegal_Argument_Error_Data "Century can only be given for AD years.")
## Returns the quarter of the year the date falls into.
quarter : Integer

View File

@ -15,7 +15,7 @@ Day_Of_Week.from (that : Integer) (first_day:Day_Of_Week=Day_Of_Week.Sunday) (st
True ->
valid_range = if start_at_zero then "0-6" else "1-7"
message = "Invalid day of week (must be " + valid_range + ")."
Error.throw (Illegal_Argument_Error message)
Error.throw (Illegal_Argument_Error_Data message)
False ->
day_number = if first_day == Day_Of_Week.Sunday then shifted else
(shifted + (first_day.to_integer start_at_zero=True)) % 7

View File

@ -74,8 +74,8 @@ type Error
from Standard.Base import all
example_catch =
error = Error.throw (Illegal_Argument_Error "My message")
error.catch Illegal_Argument_Error (err -> err.message)
error = Error.throw (Illegal_Argument_Error_Data "My message")
error.catch Illegal_Argument_Error_Data (err -> err.message)
> Example
Catching any dataflow error and turning it into a regular value.
@ -299,7 +299,7 @@ type Panic
and rethrow any others, without affecting their stacktraces.
Panic.catch Any (Panic.throw "foo") caught_panic-> case caught_panic.payload of
Illegal_Argument_Error message _ -> "Illegal arguments were provided: "+message
Illegal_Argument_Error_Data message _ -> "Illegal arguments were provided: "+message
other_panic -> Panic.throw other_panic
throw : Any -> Panic
throw payload = @Builtin_Method "Panic.throw"
@ -378,13 +378,13 @@ type Panic
> Example
Handling a specific type of panic.
Panic.catch Illegal_Argument_Error (Panic.throw (Illegal_Argument_Error "Oh no!" Nothing)) error->
Panic.catch Illegal_Argument_Error_Data (Panic.throw (Illegal_Argument_Error_Data "Oh no!" Nothing)) error->
"Caught an `Illegal_Argument_Error`: "+error.payload.message
> Example
Handling any panic.
Panic.catch Any (Panic.throw (Illegal_Argument_Error "Oh no!" Nothing)) error->
Panic.catch Any (Panic.throw (Illegal_Argument_Error_Data "Oh no!" Nothing)) error->
"Caught some panic!"
> Example
@ -395,7 +395,7 @@ type Panic
polyglot java import java.lang.NumberFormatException
parse str =
Panic.catch NumberFormatException (Long.parseLong str) caught_panic->
Error.throw (Illegal_Argument_Error "The provided string is not a valid number: "+caught_panic.payload.cause.getMessage)
Error.throw (Illegal_Argument_Error_Data "The provided string is not a valid number: "+caught_panic.payload.cause.getMessage)
catch : Any -> Any -> (Caught_Panic -> Any) -> Any
catch panic_type ~action handler =
Panic.catch_primitive action caught_panic->
@ -430,7 +430,7 @@ type Panic
polyglot java import java.lang.NumberFormatException
parse str =
Panic.catch_java NumberFormatException (Long.parseLong str) java_exception->
Error.throw (Illegal_Argument_Error "The provided string is not a valid number: "+java_exception.getMessage)
Error.throw (Illegal_Argument_Error_Data "The provided string is not a valid number: "+java_exception.getMessage)
catch_java : Any -> Any -> (Throwable -> Any) -> Any
catch_java panic_type ~action handler =
Panic.catch_primitive action caught_panic-> case caught_panic.payload of
@ -457,12 +457,12 @@ type Panic
> Example
Converting an expected panic to a dataflow error.
Panic.recover Illegal_Argument_Error (Panic.throw (Illegal_Argument_Error "Oh!" Nothing))
Panic.recover Illegal_Argument_Error_Data (Panic.throw (Illegal_Argument_Error_Data "Oh!" Nothing))
> Example
Converting one of many expected panic types to a dataflow error.
Panic.recover [Illegal_Argument_Error, Illegal_State_Error] (Panic.throw (Illegal_Argument_Error "Oh!" Nothing))
Panic.recover [Illegal_Argument_Error, Illegal_State_Error] (Panic.throw (Illegal_Argument_Error_Data "Oh!" Nothing))
recover : (Vector.Vector Any | Any) -> Any -> Any
recover expected_types ~action =
types_to_check = case expected_types of

View File

@ -454,6 +454,35 @@ type Column
not : Column
not self = self.make_unary_op "NOT"
## UNSTABLE
Replaces `True` values with `when_true` and `False` with `when_false`.
Only meant for use with boolean columns.
TODO: Currently `when_true` and `when_false` need to be a single value.
In the future the API will also support row-based IIF if they are columns.
iif : Any -> Any -> Column
iif self when_true when_false =
## TODO we should adjust new_type based on types when_true and
when_false, but this relies on the Value Types design which is still
in progress. This function has status of an internal prototype for
now, so we just rely on a simplified handling. Once Value Types are
properly implemented, this should be accordingly extended for the
full implementation of IIF. We will need to handle when_true and
when_false being either columns or regular values and rely on a
mapping of Enso base types to SQL types, and a rule for extracting a
common type.
approximate_type x = case x of
_ : Integer -> SQL_Type.integer
_ : Decimal -> SQL_Type.real
_ : Text -> SQL_Type.text
_ : Boolean -> SQL_Type.boolean
_ -> Error.throw (Illegal_Argument_Error_Data "Unsupported type.")
left_type = approximate_type when_true
right_type = approximate_type when_false
if left_type != right_type then Error.throw (Illegal_Argument_Error_Data "when_true and when_false types do not match") else
self.make_op "IIF" [when_true, when_false] new_type=left_type
## UNSTABLE
Returns a column of booleans, with `True` items at the positions where
@ -473,6 +502,25 @@ type Column
is_empty : Column
is_empty self = self.make_unary_op "IS_EMPTY" new_type=SQL_Type.boolean
## PRIVATE
Returns a column of booleans with `True` at the positions where this
column contains a blank value.
Arguments:
- treat_nans_as_blank: If `True`, then `Number.nan` is considered as
blank.
? Blank values
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
is_blank : Boolean -> Boolean -> Column
is_blank self treat_nans_as_blank=False =
is_blank = case self.sql_type.is_definitely_text of
True -> self.is_empty
False -> self.is_missing
case treat_nans_as_blank && self.sql_type.is_definitely_double of
True -> is_blank || self.is_nan
False -> is_blank
## UNSTABLE
Returns a new column where missing values have been replaced with the

View File

@ -145,7 +145,7 @@ type Table
Icon: select_column
select_columns : Vector Text | Column_Selector -> Boolean -> Problem_Behavior -> Table
select_columns self (columns = Column_Selector.By_Index [0]) (reorder = False) (on_problems = Report_Warning) =
new_columns = Table_Helpers.select_columns internal_columns=self.internal_columns selector=columns reorder=reorder on_problems=on_problems
new_columns = self.columns_helper.select_columns selector=columns reorder=reorder on_problems=on_problems
self.updated_columns new_columns
## Returns a new table with the chosen set of columns, as specified by the
@ -195,7 +195,7 @@ type Table
table.remove_columns (Column_Selector.By_Column [column1, column2])
remove_columns : Vector Text | Column_Selector -> Problem_Behavior -> Table
remove_columns self (columns = Column_Selector.By_Index [0]) (on_problems = Report_Warning) =
new_columns = Table_Helpers.remove_columns internal_columns=self.internal_columns selector=columns on_problems=on_problems
new_columns = self.columns_helper.remove_columns selector=columns on_problems=on_problems
self.updated_columns new_columns
## Returns a new table with the specified selection of columns moved to
@ -250,7 +250,7 @@ type Table
table.reorder_columns (Column_Selector.By_Column [column1, column2])
reorder_columns : Vector Text | Column_Selector -> Position.Position -> Problem_Behavior -> Table
reorder_columns self (columns = Column_Selector.By_Index [0]) (position = Position.Before_Other_Columns) (on_problems = Report_Warning) =
new_columns = Table_Helpers.reorder_columns internal_columns=self.internal_columns selector=columns position=position on_problems=on_problems
new_columns = self.columns_helper.reorder_columns selector=columns position=position on_problems=on_problems
self.updated_columns new_columns
## Returns a new table with the columns sorted by name according to the
@ -797,24 +797,7 @@ type Table
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
filter_blank_rows : Boolean -> Boolean -> Table
filter_blank_rows self when_any=False treat_nans_as_blank=False =
can_contain_text col = col.sql_type.is_definitely_text
can_contain_double col = col.sql_type.is_definitely_double
Table_Helpers.filter_blank_rows self can_contain_text can_contain_double when_any treat_nans_as_blank
## DEPRECATED Will be replaced with `Incomplete_Columns` selector (to be used with `remove_columns`).
drop_missing_columns : Table
drop_missing_columns self =
rows_expr = Expression.Operation "COUNT_ROWS" []
all_rows_column_name = "row_count"
make_count_expr expr = Expression.Operation "COUNT" [expr]
cols = self.internal_columns.map (c -> [c.name, make_count_expr c.expression])
query = Query.Select [[all_rows_column_name, rows_expr]]+cols self.context
sql = self.connection.dialect.generate_sql query
table = self.connection.read_statement sql
all_rows = table.at all_rows_column_name . at 0
kept_columns = self.internal_columns . filter c->
all_rows == table.at c.name . at 0
self.updated_columns kept_columns
Table_Helpers.filter_blank_rows self when_any treat_nans_as_blank
## Returns the amount of rows in this table.
row_count : Integer
@ -917,6 +900,11 @@ type Table
new_ctx = self.context.set_index ixes
Column.Value internal.name self.connection internal.sql_type internal.expression new_ctx
## PRIVATE
columns_helper : Table_Column_Helper
columns_helper self =
Table_Helpers.Table_Column_Helper.Value self.internal_columns self.make_column self .read
## PRIVATE
Returns a copy of this table with updated internal columns.

View File

@ -29,7 +29,7 @@ make_expression aggregate dialect =
case aggregate of
Group_By c _ -> c.expression
Count _ -> Expression.Operation "COUNT_ROWS" []
Count_Distinct columns _ ignore_nothing -> if columns.is_empty then Error.throw (Illegal_Argument_Error "Count_Distinct must have at least one column.") else
Count_Distinct columns _ ignore_nothing -> if columns.is_empty then Error.throw (Illegal_Argument_Error_Data "Count_Distinct must have at least one column.") else
case ignore_nothing of
True -> Expression.Operation "COUNT_DISTINCT" (columns.map .expression)
False -> Expression.Operation "COUNT_DISTINCT_INCLUDE_NULL" (columns.map .expression)

View File

@ -170,7 +170,7 @@ base_dialect =
fun = name -> [name, make_function name]
arith = [bin "+", bin "-", bin "*", bin "/", bin "%"]
logic = [bin "AND", bin "OR", unary "NOT"]
logic = [bin "AND", bin "OR", unary "NOT", ["IIF", make_iif]]
compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]]
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
@ -186,6 +186,17 @@ is_empty = lift_unary_op "IS_EMPTY" arg->
is_empty = (arg ++ " = ''").paren
(is_null ++ " OR " ++ is_empty).paren
## PRIVATE
make_iif : Vector Builder -> Builder
make_iif arguments = case arguments.length of
3 ->
expr = arguments.at 0
when_true = arguments.at 1
when_false = arguments.at 2
(code "CASE WHEN" ++ expr ++ " THEN " ++ when_true ++ " WHEN " ++ expr ++ " IS NULL THEN NULL ELSE " ++ when_false ++ " END").paren
_ ->
Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation IIF")
## PRIVATE
make_between : Vector Builder -> Builder
make_between arguments = case arguments.length of

View File

@ -30,7 +30,7 @@
import Standard.Examples
example_drop_missing_cols =
Examples.inventory_table.drop_missing_columns
Examples.inventory_table.remove (Column_Selector.Blank_Columns when_any=True)
> Example
Fill missing values in a column with the value 20.5.

View File

@ -473,6 +473,22 @@ type Column
not : Column
not self = run_vectorized_unary_op self "not" .not
## UNSTABLE
Replaces `True` values with `when_true` and `False` with `when_false`.
Only meant for use with boolean columns.
TODO: Currently `when_true` and `when_false` need to be a single value.
In the future the API will also support row-based IIF if they are columns.
iif : Any -> Any -> Column
iif self when_true when_false = case self.storage_type of
Storage.Boolean ->
s = self.java_column.getStorage
ix = self.java_column.getIndex
rs = s.iif when_true when_false
Column.Column_Data (Java_Column.new "Result" ix rs)
_ -> Error.throw (Illegal_Argument_Error "`iif` can only be used with boolean columns.")
## Returns a column of booleans, with `True` items at the positions where
this column contains a `Nothing`.
@ -513,6 +529,24 @@ type Column
is_present : Column
is_present self = self.is_missing.not
## PRIVATE
Returns a column of booleans with `True` at the positions where this
column contains a blank value.
Arguments:
- treat_nans_as_blank: If `True`, then `Number.nan` is considered as
blank.
? Blank values
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
is_blank : Boolean -> Boolean -> Column
is_blank self treat_nans_as_blank=False =
case self.storage_type of
Storage.Text -> self.is_empty
Storage.Decimal -> if treat_nans_as_blank then self.is_missing || self.is_nan else self.is_missing
Storage.Any -> if treat_nans_as_blank then self.is_empty || self.is_nan else self.is_empty
_ -> self.is_missing
## ALIAS Fill Missing
Returns a new column where missing values have been replaced with the

View File

@ -25,3 +25,24 @@ type Column_Selector
this approach can be used to match columns with the same names as a set
of columns of some other table, for example, when preparing for a join.
By_Column (columns : Vector Column)
## ALIAS dropna
ALIAS drop_missing_columns
Select columns which are either all blank or contain blank values.
Arguments:
- when_any: By default, only columns consisting of all blank cells are
selected. If set to `True`, columns containing at least one blank value
will be selected too. If there are no rows, the column is treated as
blank regardless of this argument.
- treat_nans_as_blank: If `True`, then `Number.nan` is considered as
blank.
? Blank values
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
> Example
Remove completely blank columns from a table.
table.remove_columns Column_Selector.Blank_Columns
Blank_Columns when_any:Boolean=False treat_nans_as_blank:Boolean=False

View File

@ -271,7 +271,7 @@ type Table
Icon: select_column
select_columns : Vector Text | Column_Selector -> Boolean -> Problem_Behavior -> Table
select_columns self (columns = Column_Selector.By_Index [0]) (reorder = False) (on_problems = Report_Warning) =
new_columns = Table_Helpers.select_columns internal_columns=self.columns selector=columns reorder=reorder on_problems=on_problems
new_columns = self.columns_helper.select_columns selector=columns reorder=reorder on_problems=on_problems
Table.new new_columns
## Returns a new table with the chosen set of columns, as specified by the
@ -321,7 +321,7 @@ type Table
table.remove_columns (Column_Selector.By_Column [column1, column2])
remove_columns : Vector Text | Column_Selector -> Problem_Behavior -> Table
remove_columns self (columns = Column_Selector.By_Index [0]) (on_problems = Report_Warning) =
new_columns = Table_Helpers.remove_columns internal_columns=self.columns selector=columns on_problems=on_problems
new_columns = self.columns_helper.remove_columns selector=columns on_problems=on_problems
Table.new new_columns
## Returns a new table with the specified selection of columns moved to
@ -376,7 +376,7 @@ type Table
table.reorder_columns (Column_Selector.By_Column [column1, column2])
reorder_columns : Vector Text | Column_Selector -> Position.Position -> Problem_Behavior -> Table
reorder_columns self (columns = Column_Selector.By_Index [0]) (position = Position.Before_Other_Columns) (on_problems = Report_Warning) =
new_columns = Table_Helpers.reorder_columns internal_columns=self.columns selector=columns position=position on_problems=on_problems
new_columns = self.columns_helper.reorder_columns selector=columns position=position on_problems=on_problems
Table.new new_columns
## Returns a new table with the columns sorted by name according to the
@ -653,7 +653,7 @@ type Table
No_Output_Columns -> Maybe.Some No_Input_Columns_Selected
_ -> Nothing
key_columns = Warning.map_warnings_and_errors warning_mapper <|
Table_Helpers.select_columns internal_columns=self.columns selector=columns reorder=True on_problems=on_problems
self.columns_helper.select_columns selector=columns reorder=True on_problems=on_problems
java_columns = key_columns.map .java_column
text_folding_strategy = Case.folding_strategy case_sensitivity
java_table = Illegal_Argument_Error.handle_java_exception <|
@ -788,7 +788,7 @@ type Table
_ : Column_Selector -> columns
name : Text -> Column_Selector.By_Name [name]
index : Integer -> Column_Selector.By_Index [index]
selection = Table_Helpers.select_columns_helper self.columns selector reorder=False problem_builder
selection = self.columns_helper.select_columns_helper selector reorder=False problem_builder
selected_names = Map.from_vector (selection.map column-> [column.name, True])
map_preserve_name column f = column.map f . rename column.name
@ -1016,22 +1016,7 @@ type Table
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
filter_blank_rows : Boolean -> Boolean -> Table
filter_blank_rows self when_any=False treat_nans_as_blank=False =
can_contain_text col = case col.storage_type of
Storage.Text -> True
Storage.Any -> True
_ -> False
can_contain_double col = case col.storage_type of
Storage.Decimal -> True
Storage.Any -> True
_ -> False
Table_Helpers.filter_blank_rows self can_contain_text can_contain_double when_any treat_nans_as_blank
## DEPRECATED Will be replaced with `Incomplete_Columns` selector (to be used with `remove_columns`).
drop_missing_columns : Table
drop_missing_columns self =
non_missing = self.columns . filter (col -> col.count_missing == 0)
index = self.java_table.getIndex
Table.Table_Data (Java_Table.new (non_missing.map .java_column . to_array) index)
Table_Helpers.filter_blank_rows self when_any treat_nans_as_blank
## Returns the number of rows in this table.
@ -1200,6 +1185,11 @@ type Table
to_csv : Text
to_csv self = Text.from self (Delimited delimiter=",")
## PRIVATE
columns_helper : Table_Column_Helper
columns_helper self =
Table_Helpers.Table_Column_Helper.Value self.columns (x->x) self (x->x)
## UNSTABLE
An error returned when the table contains no rows.

View File

@ -63,6 +63,6 @@ type Excel_Format
format = should_treat_as_xls_format self.xls_format file
case self.section of
Sheet_Names -> Error.throw (Illegal_Argument_Error "Sheet_Names cannot be used for `write`.")
Range_Names -> Error.throw (Illegal_Argument_Error "Range_Names cannot be used for `write`.")
Sheet_Names -> Error.throw (Illegal_Argument_Error_Data "Sheet_Names cannot be used for `write`.")
Range_Names -> Error.throw (Illegal_Argument_Error_Data "Range_Names cannot be used for `write`.")
_ -> Excel_Writer.write_file file table on_existing_file self.section self.headers match_columns on_problems format

View File

@ -116,15 +116,16 @@ default_aggregate_column_name aggregate_column =
resolve_aggregate : Table -> Problem_Builder -> Aggregate_Column -> Aggregate_Column | Nothing
resolve_aggregate table problem_builder aggregate_column =
table_columns = table.columns
columns_helper = table.columns_helper
resolve : (Integer|Text|Column) -> Column ! Internal_Missing_Column_Error
resolve c =
res = Table_Helpers.resolve_column_helper table_columns c problem_builder
res = columns_helper.resolve_column_helper c problem_builder
res.if_nothing (Error.throw Internal_Missing_Column_Error)
resolve_selector_to_vector : Column_Selector -> [Column] ! Internal_Missing_Column_Error
resolve_selector_to_vector selector =
resolved = Table_Helpers.select_columns_helper table_columns selector reorder=True problem_builder
resolved = columns_helper.select_columns_helper selector reorder=True problem_builder
if resolved.is_empty then Error.throw Internal_Missing_Column_Error else resolved
resolve_order_by selector = case selector of
@ -140,10 +141,7 @@ resolve_aggregate table problem_builder aggregate_column =
Count new_name -> Count new_name
Count_Distinct c new_name ignore_nothing ->
new_c = case c of
## TODO once we have sum type pattern matching this could be replaced with a single branch
Column_Selector.By_Name _ _ -> resolve_selector_to_vector c
Column_Selector.By_Index _ -> resolve_selector_to_vector c
Column_Selector.By_Column _ -> resolve_selector_to_vector c
_ : Column_Selector -> resolve_selector_to_vector c
_ -> [resolve c]
Count_Distinct new_c new_name ignore_nothing
Count_Not_Nothing c new_name -> Count_Not_Nothing (resolve c) new_name

View File

@ -12,103 +12,199 @@ import project.Data.Sort_Column.Sort_Column
import project.Internal.Problem_Builder.Problem_Builder
import project.Internal.Unique_Name_Strategy.Unique_Name_Strategy
from project.Data.Aggregate_Column.Aggregate_Column import Minimum, Maximum
from project.Errors import Missing_Input_Columns_Data, No_Output_Columns, Too_Many_Column_Names_Provided_Data, Duplicate_Output_Column_Names_Data, Invalid_Output_Column_Names_Data, No_Input_Columns_Selected
polyglot java import java.util.HashSet
## PRIVATE
A helper function encapsulating shared code for `select_columns`
implementations of various Table variants. See the documentation for the
Table type for details.
type Table_Column_Helper
## PRIVATE
Helps managing table columns.
It takes a list of columns and returns the selected columns. It is the
responsibility of each implementation to reconstruct a proper table from the
resulting list of columns.
Arguments:
- internal_columns: A list of all columns in a table.
- make_column: A function which takes the internal column and creates a
fully fledged column from it.
- table: A reference to the table.
- materialize: A function which takes a table and materializes it to
in-memory.
Value internal_columns make_column table materialize
Arguments:
- internal_columns: A list of all columns in a table.
- selector: Column selection criteria or vector of column names.
- reorder: Specifies whether to reorder the matched columns according to the
order of the selection criteria.
If `False`, the matched entries are returned in the same order as in the
input.
If `True`, the matched entries are returned in the order of the criteria
matching them. If a single object has been matched by multiple criteria, it
is placed in the group belonging to the first matching criterion on the
list. If a single criterion's group has more than one element, their
relative order is the same as in the input.
- on_problems: Specifies the behavior when a problem occurs during the
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
select_columns : Vector -> Vector | Column_Selector -> Boolean -> Problem_Behavior -> Vector
select_columns internal_columns selector reorder on_problems =
problem_builder = Problem_Builder.new
result = select_columns_helper internal_columns selector reorder problem_builder
if result.is_empty then
problem_builder.report_other_warning No_Output_Columns
problem_builder.attach_problems_after on_problems result
## PRIVATE
A helper function encapsulating shared code for `select_columns`
implementations of various Table variants. See the documentation for the
Table type for details.
## PRIVATE
A helper function encapsulating shared code for `remove_columns`
implementations of various Table variants. See the documentation for the
Table type for details.
It returns the selected columns. It is the responsibility of each
implementation to reconstruct a proper table from the
resulting list of columns.
It takes a list of columns and returns the columns which should be kept. It
is the responsibility of each implementation to reconstruct a proper table
from the resulting list of columns.
Arguments:
- selector: Column selection criteria or vector of column names.
- reorder: Specifies whether to reorder the matched columns according to
the order of the selection criteria.
If `False`, the matched entries are returned in the same order as in
the input.
If `True`, the matched entries are returned in the order of the
criteria matching them. If a single object has been matched by multiple
criteria, it is placed in the group belonging to the first matching
criterion on the list. If a single criterion's group has more than one
element, their relative order is the same as in the input.
- on_problems: Specifies the behavior when a problem occurs during the
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
select_columns : Vector | Column_Selector -> Boolean -> Problem_Behavior -> Vector
select_columns self selector reorder on_problems =
problem_builder = Problem_Builder.new
result = self.select_columns_helper selector reorder problem_builder
if result.is_empty then
problem_builder.report_other_warning No_Output_Columns
problem_builder.attach_problems_after on_problems result
Arguments:
- internal_columns: A list of all columns in a table.
- selector: Column selection criteria or vector of column names.
- on_problems: Specifies the behavior when a problem occurs during the
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
remove_columns : Vector -> Vector | Column_Selector -> Problem_Behavior -> Vector
remove_columns internal_columns selector on_problems =
problem_builder = Problem_Builder.new
selection = select_columns_helper internal_columns selector reorder=False problem_builder
selected_names = Map.from_vector (selection.map column-> [column.name, True])
result = internal_columns.filter column->
should_be_removed = selected_names.get_or_else column.name False
should_be_removed.not
if result.is_empty then
problem_builder.report_other_warning No_Output_Columns
problem_builder.attach_problems_after on_problems result
## PRIVATE
A helper function encapsulating shared code for `remove_columns`
implementations of various Table variants. See the documentation for the
Table type for details.
## PRIVATE
A helper function encapsulating shared code for `reorder_columns`
implementations of various Table variants. See the documentation for the
Table type for details.
It returns the columns which should be kept. It is the responsibility of
each implementation to reconstruct a proper table from the resulting list
of columns.
It takes a list of columns and returns the columns which should be kept. It
is the responsibility of each implementation to reconstruct a proper table
from the resulting list of columns.
Arguments:
- internal_columns: A list of all columns in a table.
- selector: A selector specifying which columns should be moved and the order
in which they should appear in the result.
- position: Specifies how to place the selected columns in relation to the
columns which were not matched by the `selector` (if any).
- on_problems: Specifies the behavior when a problem occurs during the
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
reorder_columns : Vector -> Vector | Column_Selector -> Position.Position -> Problem_Behavior -> Vector
reorder_columns internal_columns selector position on_problems =
problem_builder = Problem_Builder.new
selection = select_columns_helper internal_columns selector reorder=True problem_builder
problem_builder.attach_problems_before on_problems <|
Arguments:
- selector: Column selection criteria or vector of column names.
- on_problems: Specifies the behavior when a problem occurs during the
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
remove_columns : Vector | Column_Selector -> Problem_Behavior -> Vector
remove_columns self selector on_problems =
problem_builder = Problem_Builder.new
selection = self.select_columns_helper selector reorder=False problem_builder
selected_names = Map.from_vector (selection.map column-> [column.name, True])
other_columns = internal_columns.filter column->
is_selected = selected_names.get_or_else column.name False
is_selected.not
result = case position of
Position.Before_Other_Columns -> selection + other_columns
Position.After_Other_Columns -> other_columns + selection
result
result = self.internal_columns.filter column->
should_be_removed = selected_names.get_or_else column.name False
should_be_removed.not
if result.is_empty then
problem_builder.report_other_warning No_Output_Columns
problem_builder.attach_problems_after on_problems result
## PRIVATE
A helper function encapsulating shared code for `reorder_columns`
implementations of various Table variants. See the documentation for the
Table type for details.
It returns the columns which should be kept. It is the responsibility of
each implementation to reconstruct a proper table from the resulting list
of columns.
Arguments:
- selector: A selector specifying which columns should be moved and the
order in which they should appear in the result.
- position: Specifies how to place the selected columns in relation to
the columns which were not matched by the `selector` (if any).
- on_problems: Specifies the behavior when a problem occurs during the
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
reorder_columns : Vector | Column_Selector -> Position.Position -> Problem_Behavior -> Vector
reorder_columns self selector position on_problems =
problem_builder = Problem_Builder.new
selection = self.select_columns_helper selector reorder=True problem_builder
problem_builder.attach_problems_before on_problems <|
selected_names = Map.from_vector (selection.map column-> [column.name, True])
other_columns = self.internal_columns.filter column->
is_selected = selected_names.get_or_else column.name False
is_selected.not
result = case position of
Position.Before_Other_Columns -> selection + other_columns
Position.After_Other_Columns -> other_columns + selection
result
## PRIVATE
A helper function which selects columns from the table based on the
provided selection criteria.
Arguments:
- selector: Column selection criteria or vector of column names.
- reorder: Specifies whether to reorder the matched columns according to
the order of the selection criteria.
If `False`, the matched entries are returned in the same order as in
the input.
If `True`, the matched entries are returned in the order of the
criteria matching them. If a single object has been matched by multiple
criteria, it is placed in the group belonging to the first matching
criterion on the list. If a single criterion's group has more than one
element, their relative order is the same as in the input.
- problem_builder: Encapsulates the aggregation of encountered problems.
select_columns_helper : Vector | Column_Selector -> Boolean -> Problem_Builder -> Vector
select_columns_helper self selector reorder problem_builder = case selector of
_ : Vector.Vector ->
self.select_columns_helper (Column_Selector.By_Name selector) reorder problem_builder
Column_Selector.By_Name names matcher ->
valid_names = validate_unique names problem_builder.report_duplicate_column_selectors
Matching.match_criteria_callback matcher self.internal_columns valid_names reorder=reorder name_mapper=(_.name) problem_callback=problem_builder.report_missing_input_columns
Column_Selector.By_Index indices ->
good_indices = validate_indices self.internal_columns.length indices problem_builder . map .first
case reorder of
True ->
select_indices_reordering self.internal_columns good_indices
False ->
select_indices_preserving_order self.internal_columns good_indices
Column_Selector.By_Column columns ->
column_names = columns.map .name
new_selector = Column_Selector.By_Name column_names Text_Matcher.Case_Sensitive
self.select_columns_helper new_selector reorder=reorder problem_builder=problem_builder
Column_Selector.Blank_Columns when_any treat_nans_as_blank -> if self.internal_columns.is_empty then [] else
blanks = self.internal_columns.map_with_index ix-> internal_column->
column = self.make_column internal_column
blank_indicator = column.is_blank treat_nans_as_blank
blank_indicator.iif 1 0 . rename "blanks_"+ix.to_text
## We cannot just use a custom_column in the aggregate because of
how the column selector works. We may need to revisit this. For
now we need to use tricks like that:
modified_table = blanks.fold (self.table.select_columns [] on_problems=Problem_Behavior.Ignore) table-> blanks_col->
table.set blanks_col.name blanks_col
# Maximum is equivalent to Exists and Minimum is equivalent to Forall.
col_aggregate = if when_any then Maximum else Minimum
aggregates = blanks.map blanks_col-> col_aggregate blanks_col.name
result = self.materialize <| Panic.rethrow <|
modified_table.aggregate aggregates on_problems=Problem_Behavior.Report_Error
## The `reorder` argument has no meaning for Blank_Columns selector
- either way all blank columns are selected in the order that
they are in the Table.
self.internal_columns.filter_with_index i-> _->
## TODO once we have the Row type it would be nice to use it here
case result.at i . first of
## Nothing is returned if there were no rows, in that case
we treat the column as blank by convention, regardless of
`when_any`.
Nothing -> True
1 -> True
0 -> False
_ -> Panic.throw (Illegal_State_Error_Data "Unexpected result. Perhaps an implementation bug of Column_Selector.Blank_Columns.")
## PRIVATE
A helper function which selects a single column from the table. It is aligned
with the `select_columns_helper`, apart from Blank_Columns selector which is
only handled by the other method but not this one.
resolve_column_helper : (Integer | Text | Column) -> Problem_Builder -> a | Nothing
resolve_column_helper self selector problem_builder = case selector of
_ : Text ->
matched_columns = Matching.match_criteria_callback Text_Matcher.Case_Sensitive self.internal_columns [selector] reorder=True name_mapper=(_.name) problem_callback=problem_builder.report_missing_input_columns
if matched_columns.length == 1 then matched_columns.first else
if matched_columns.length == 0 then Nothing else
Panic.throw (Illegal_State_Error_Data "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?")
_ : Integer -> case is_index_valid self.internal_columns.length selector of
True -> self.internal_columns.at selector
False ->
problem_builder.report_oob_indices [selector]
Nothing
## A wildcard to match any backend's column.
_ ->
self.resolve_column_helper selector.name problem_builder=problem_builder
## PRIVATE
A helper function encapsulating shared code for `rename_columns`
@ -215,61 +311,6 @@ sort_columns internal_columns direction text_ordering =
False -> .compare_to
internal_columns.sort on=mapper by=comparator order=direction
## PRIVATE
A helper function which selects columns from the table based on the provided
selection criteria.
Arguments:
- internal_columns: A list of all columns in a table.
- selector: Column selection criteria or vector of column names.
- reorder: Specifies whether to reorder the matched columns according to the
order of the selection criteria.
If `False`, the matched entries are returned in the same order as in the
input.
If `True`, the matched entries are returned in the order of the criteria
matching them. If a single object has been matched by multiple criteria, it
is placed in the group belonging to the first matching criterion on the
list. If a single criterion's group has more than one element, their
relative order is the same as in the input.
- problem_builder: Encapsulates the aggregation of encountered problems.
select_columns_helper : Vector -> Vector | Column_Selector -> Boolean -> Problem_Builder -> Vector
select_columns_helper internal_columns selector reorder problem_builder = case selector of
_ : Vector.Vector ->
select_columns_helper internal_columns (Column_Selector.By_Name selector) reorder problem_builder
Column_Selector.By_Name names matcher ->
valid_names = validate_unique names problem_builder.report_duplicate_column_selectors
Matching.match_criteria_callback matcher internal_columns valid_names reorder=reorder name_mapper=(_.name) problem_callback=problem_builder.report_missing_input_columns
Column_Selector.By_Index indices ->
good_indices = validate_indices internal_columns.length indices problem_builder . map .first
case reorder of
True ->
select_indices_reordering internal_columns good_indices
False ->
select_indices_preserving_order internal_columns good_indices
Column_Selector.By_Column columns ->
column_names = columns.map .name
new_selector = Column_Selector.By_Name column_names Text_Matcher.Case_Sensitive
select_columns_helper internal_columns new_selector reorder=reorder problem_builder=problem_builder
## PRIVATE
A helper function which selects a single column from the table. It is aligned with the
`select_columns_helper`.
resolve_column_helper : Vector a -> (Integer | Text | Column) -> Problem_Builder -> a | Nothing
resolve_column_helper internal_columns selector problem_builder = case selector of
_ : Text ->
matched_columns = Matching.match_criteria_callback Text_Matcher.Case_Sensitive internal_columns [selector] reorder=True name_mapper=(_.name) problem_callback=problem_builder.report_missing_input_columns
if matched_columns.length == 1 then matched_columns.first else
if matched_columns.length == 0 then Nothing else
Panic.throw (Illegal_State_Error_Data "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?")
_ : Integer -> case is_index_valid internal_columns.length selector of
True -> internal_columns.at selector
False ->
problem_builder.report_oob_indices [selector]
Nothing
## A wildcard to match any backend's column.
_ ->
resolve_column_helper internal_columns selector.name problem_builder=problem_builder
## PRIVATE
Converts the generic `No_Matches_Found` error to a more specific
`Missing_Input_Columns`. Any other errors are returned as-is.
@ -558,20 +599,13 @@ select_columns_by_column_reference internal_columns column_selectors problem_bui
## PRIVATE
A helper method gathering the common logic for constructing expressions that
can filter out blank rows.
filter_blank_rows : Table -> (Column -> Boolean) -> (Column -> Boolean) -> Boolean -> Boolean -> Table
filter_blank_rows table can_contain_text can_contain_double when_any treat_nans_as_blank =
filter_blank_rows : Table -> Boolean -> Boolean -> Table
filter_blank_rows table when_any treat_nans_as_blank =
cols = table.columns
case cols.not_empty of
True ->
handle col =
is_blank = case can_contain_text col of
True -> col.is_empty
False -> col.is_missing
case treat_nans_as_blank && (can_contain_double col) of
True -> is_blank || col.is_nan
False -> is_blank
merge = if when_any then (||) else (&&)
missing_mask = cols.map handle . reduce merge
missing_mask = cols.map (_.is_blank treat_nans_as_blank) . reduce merge
non_missing_mask = missing_mask.not
table.filter non_missing_mask
False -> table

View File

@ -10,6 +10,10 @@ public class Polyglot_Utils {
* type.
*/
public static Object convertPolyglotValue(Value item) {
if (item == null) {
return null;
}
if (item.isDate()) {
LocalDate d = item.asDate();
if (item.isTime()) {

View File

@ -2,6 +2,8 @@ package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.List;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.MapOperation;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
@ -165,6 +167,22 @@ public final class BoolStorage extends Storage<Boolean> {
return negated;
}
public Storage<?> iif(Value when_true, Value when_false) {
Object on_true = Polyglot_Utils.convertPolyglotValue(when_true);
Object on_false = Polyglot_Utils.convertPolyglotValue(when_false);
InferredBuilder builder = new InferredBuilder(size);
for (int i = 0; i < size; i++) {
if (isMissing.get(i)) {
builder.append(null);
} else if (getItem(i)) {
builder.append(on_true);
} else {
builder.append(on_false);
}
}
return builder.seal();
}
private static MapOpStorage<Boolean, BoolStorage> buildOps() {
MapOpStorage<Boolean, BoolStorage> ops = new MapOpStorage<>();
ops.add(

View File

@ -177,7 +177,7 @@ public abstract class Storage<T> {
if (name != null && isOpVectorized(name)) {
return runVectorizedMap(name, null);
}
Object missingValue = onMissing == null ? null : Polyglot_Utils.convertPolyglotValue(onMissing);
Object missingValue = Polyglot_Utils.convertPolyglotValue(onMissing);
Builder builder = new InferredBuilder(size());
for (int i = 0; i < size(); i++) {
Object it = getItemBoxed(i);

View File

@ -3,9 +3,10 @@ from Standard.Base.Data.Index_Sub_Range import While, Sample, Every
import Standard.Base.Data.Index_Sub_Range
from Standard.Base.Error.Problem_Behavior import Report_Error
from Standard.Table import Column_Name_Mapping, Sort_Column, Sort_Column_Selector, Position
from Standard.Table import Column_Name_Mapping, Sort_Column, Sort_Column_Selector, Position, Column_Selector
import Standard.Table.Data.Value_Type.Value_Type
from Standard.Table.Data.Column_Selector.Column_Selector import By_Name, By_Index, By_Column
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Count_Distinct
from Standard.Table.Errors import all
from Standard.Database.Errors import SQL_Error_Data, Unsupported_Database_Operation_Error_Data
@ -1294,17 +1295,33 @@ spec prefix table_builder test_selection pending=Nothing =
problems = [Index_Out_Of_Bounds_Error_Data 4 1]
Problems.test_problem_handling action problems tester
Test.group prefix+"Column Operations" pending=pending <|
Test.specify "iif" <|
t = table_builder [["X", [True, False, Nothing, True]]]
t.at "X" . iif 22 33 . to_vector . should_equal [22, 33, Nothing, 22]
Test.specify "iif on Columns" pending="Not implemented yet." Nothing
Test.group prefix+"Dropping Missing Values" pending=pending <|
t4 = table_builder [["a", [0, 1, Nothing, 42, Nothing, 5]], ["b", [True, Nothing, True, False, Nothing, False]], ["c", ["", "foo", "bar", Nothing, Nothing, " "]]]
t0 = table_builder [["a", [0, 1, Nothing, 42, Nothing, 5]], ["b", [True, Nothing, True, False, Nothing, False]], ["c", ["", "foo", "bar", Nothing, Nothing, " "]]]
t1 =
a = ["a", [1, Nothing, 3, 4]]
b = ["b", ["a", "b", Nothing, " "]]
c = ["c", [10, 20, 30, 40]]
d = ["d", [Nothing, True, False, True]]
e = ["e", ["", "", "foo", "bar"]]
f = ["f", [Nothing, "", Nothing, ""]]
table_builder [a, b, c, d, e, f]
Test.specify "filter_blank_rows should drop rows that contain at least one missing cell" <|
d = t4.filter_blank_rows when_any=True
d = t0.filter_blank_rows when_any=True
d.row_count . should_equal 1
d.at "a" . to_vector . should_equal [5]
d.at "b" . to_vector . should_equal [False]
d.at "c" . to_vector . should_equal [" "]
Test.specify "filter_blank_rows should drop rows that are all blank" <|
d2 = t4.filter_blank_rows when_any=False
d2 = t0.filter_blank_rows when_any=False
d2.at "a" . to_vector . should_equal [0, 1, Nothing, 42, 5]
d2.at "b" . to_vector . should_equal [True, Nothing, True, False, False]
d2.at "c" . to_vector . should_equal ["", "foo", "bar", Nothing, " "]
@ -1331,29 +1348,86 @@ spec prefix table_builder test_selection pending=Nothing =
t6.row_count . should_equal 0
t6.columns . should_equal []
t = table_builder [["X", [2.0, 1.5, Number.nan, Number.nan]], ["Y", [Nothing, 2.0, Nothing, 5.0]]]
Test.specify "should allow to remove blank columns" <|
r1 = t1.remove_columns (Column_Selector.Blank_Columns when_any=False)
r1.columns.map .name . should_equal ["a", "b", "c", "d", "e"]
r1.at "a" . to_vector . should_equal [1, Nothing, 3, 4]
r2 = t1.remove_columns (Column_Selector.Blank_Columns when_any=True)
r2.columns.map .name . should_equal ["c"]
r2.at "c" . to_vector . should_equal [10, 20, 30, 40]
t3 = table_builder [["X", [2.0, 1.5, Number.nan, Number.nan]], ["Y", [Nothing, 2.0, Nothing, 5.0]]]
t4 =
c = ["c", [10, 20, 40, 30]]
g = ["g", [Number.nan, 1, 2, 3.4]]
h = ["h", [Number.nan, Nothing, Number.nan, Nothing]]
table_builder [c, g, h]
if test_selection.is_nan_and_nothing_distinct then
Test.specify "should not treat NaNs as blank by default" <|
t1 = t.filter_blank_rows when_any=True
r1 = t3.filter_blank_rows when_any=True
# We cannot use `Vector.==` because `NaN != NaN`.
t1.at "X" . to_vector . to_text . should_equal "[1.5, NaN]"
t1.at "Y" . to_vector . should_equal [2.0, 5.0]
r1.at "X" . to_vector . to_text . should_equal "[1.5, NaN]"
r1.at "Y" . to_vector . should_equal [2.0, 5.0]
t2 = t.filter_blank_rows when_any=False
t2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN, NaN]"
t2.at "Y" . to_vector . should_equal [Nothing, 2.0, Nothing, 5.0]
r2 = t3.filter_blank_rows when_any=False
r2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN, NaN]"
r2.at "Y" . to_vector . should_equal [Nothing, 2.0, Nothing, 5.0]
r3 = t4.remove_columns (Column_Selector.Blank_Columns when_any=False)
r3.columns.map .name . should_equal ["c", "g", "h"]
r3.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
r4 = t4.remove_columns (Column_Selector.Blank_Columns when_any=True)
r4.columns.map .name . should_equal ["c", "g"]
r4.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
Test.specify "should allow to treat NaNs as blank if asked" <|
t1 = t.filter_blank_rows when_any=True treat_nans_as_blank=True
r1 = t3.filter_blank_rows when_any=True treat_nans_as_blank=True
# We cannot use `Vector.==` because `NaN != NaN`.
t1.at "X" . to_vector . should_equal [1.5]
t1.at "Y" . to_vector . should_equal [2.0]
r1.at "X" . to_vector . should_equal [1.5]
r1.at "Y" . to_vector . should_equal [2.0]
t2 = t.filter_blank_rows when_any=False treat_nans_as_blank=True
t2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN]"
t2.at "Y" . to_vector . should_equal [Nothing, 2.0, 5.0]
r2 = t3.filter_blank_rows when_any=False treat_nans_as_blank=True
r2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN]"
r2.at "Y" . to_vector . should_equal [Nothing, 2.0, 5.0]
r3 = t4.remove_columns (Column_Selector.Blank_Columns when_any=False treat_nans_as_blank=True)
r3.columns.map .name . should_equal ["c", "g"]
r3.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
r4 = t4.remove_columns (Column_Selector.Blank_Columns when_any=True treat_nans_as_blank=True)
r4.columns.map .name . should_equal ["c"]
r4.at "c" . to_vector . should_equal [10, 20, 40, 30]
if test_selection.is_nan_and_nothing_distinct.not then
Test.specify "this backend treats NaN as Nothing" <|
t.at "X" . to_vector . should_equal [2.0, 1.5, Nothing, Nothing]
t.at "X" . is_nan . to_vector . should_fail_with Unsupported_Database_Operation_Error_Data
t3.at "X" . to_vector . should_equal [2.0, 1.5, Nothing, Nothing]
t3.at "X" . is_nan . to_vector . should_fail_with Unsupported_Database_Operation_Error_Data
Test.specify "Blank_Columns selector should work for all kinds of methods accepting Column_Selector" <|
t = table_builder [["X", [1, 2, 3, 4]], ["Y", [Nothing, "", Nothing, Nothing]], ["Z", [Nothing, True, False, Nothing]]]
t.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["Y"]
t.select_columns (Column_Selector.Blank_Columns when_any=True) . columns . map .name . should_equal ["Y", "Z"]
t.reorder_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["Y", "X", "Z"]
r1 = t.aggregate [Count_Distinct Column_Selector.Blank_Columns]
r1.columns . map .name . should_equal ["Count Distinct Y"]
r1.at "Count Distinct Y" . to_vector . should_equal [2]
r2 = t.aggregate [(Count_Distinct Column_Selector.Blank_Columns ignore_nothing=True)]
r2.columns . map .name . should_equal ["Count Distinct Y"]
r2.at "Count Distinct Y" . to_vector . should_equal [1]
Test.specify "Blank_Columns selector should deal with edge cases" <|
t = table_builder [["X", [1, 2, 3, 4]]]
no_cols = t.select_columns []
no_rows = t.filter "X" (Filter_Condition.Equal to=0)
no_cols.columns . should_equal []
no_rows.row_count . should_equal 0
no_rows.at "X" . to_vector . should_equal []
no_cols.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal []
no_rows.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["X"]
no_rows.remove_columns Column_Selector.Blank_Columns . columns . map .name . should_equal []

View File

@ -185,18 +185,6 @@ spec prefix connection pending=Nothing =
col.count . should_equal 3
col.count_missing . should_equal 2
Test.specify "drop_missing_columns should drop columns that contain at least one missing row in a Table" <|
t5 = upload "T5" <|
Table.new [["a", [1, 2, 3]], ["b", [True, False, Nothing]], ["c", ["foo", Nothing, "aaa"]]]
r = t5.drop_missing_columns
r.columns.map .name . should_equal ["a"]
r.at "a" . to_vector . should_equal [1, 2, 3]
empty = t4.drop_missing_columns
empty.column_count . should_equal 0
empty.read.column_count . should_equal 0
Test.group prefix+"Column-wide statistics" pending=pending <|
Test.specify 'should allow computing basic column-wide stats' <|
t7 = upload "T7" <|

View File

@ -21,7 +21,7 @@ type Fake_Test_Connection
query self query alias="" = case query of
Text -> self.query (SQL_Query.Table_Name query) alias
SQL_Query.Raw_SQL _ ->
Error.throw (Illegal_Argument_Error "Cannot query a fake connection with raw SQL")
Error.throw (Illegal_Argument_Error_Data "Cannot query a fake connection with raw SQL")
SQL_Query.Table_Name name ->
columns = self.tables.get name
Database_Table.make_table self name columns (Context.for_table name)

View File

@ -396,15 +396,22 @@ spec =
t2.at "X" . to_vector . should_equal [1, 5]
t2.at "Y" . to_vector . should_equal ['A', 0]
# TODO move these to Common_Table_Spec
Test.specify "should drop columns that contain at least one missing row in a Table" <|
a = ["a", [1, Nothing, 3, 4]]
b = ["b", [1, 2, Nothing, "x"]]
c = ["c", [10, 20, 30, 40]]
t = Table.new [a, b, c]
d = t.drop_missing_columns
d.columns.map .name . should_equal ["c"]
d.at "c" . to_vector . should_equal [10, 20, 30, 40]
t3 = Table.new [["X", [1, 2, 3]], ["Y", ["", Nothing, Number.nan]]]
t4 = t3.remove_columns (Column_Selector.Blank_Columns treat_nans_as_blank=True)
t4.columns . map .name . should_equal ["X"]
Test.specify "Blank_Columns selector should work for all kinds of methods accepting Column_Selector" <|
t = Table.new [["X", [1, 2, 3, 4, 5]], ["Y", ["", Nothing, Nothing, Number.nan, ""]]]
r1 = t.distinct (Column_Selector.Blank_Columns treat_nans_as_blank=True)
r1.at "Y" . to_vector . to_text . should_equal "['', Nothing, NaN]"
r1.at "X" . to_vector . should_equal [1, 2, 4]
# TODO this could be moved to Common_Table_Spec once replace_text is implemented for Database too
t2 = Table.new [["X", [1, 2, 3, 4, 5]], ["Y", ["", Nothing, Nothing, Nothing, ""]], ["Z", ["", "---", "-1", "", "foobar"]]]
r2 = t2.replace_text (Column_Selector.Blank_Columns when_any=True) "-" "A"
r2.at "X" . to_vector . should_equal [1, 2, 3, 4, 5]
r2.at "Y" . to_vector . should_equal ["", Nothing, Nothing, Nothing, ""]
r2.at "Z" . to_vector . should_equal ["", "AAA", "A1", "", "foobar"]
Test.group "Info" <|
Test.specify "should return Table information" <|