mirror of
https://github.com/enso-org/enso.git
synced 2024-11-26 17:06:48 +03:00
Add support for Blank_Columns
to Table and Database (#3812)
Implements https://www.pivotaltracker.com/story/show/183390281 and https://www.pivotaltracker.com/story/show/183390394
This commit is contained in:
parent
81e5e77ae8
commit
cc76e7d36a
@ -214,6 +214,9 @@
|
||||
- [Extended `Filter_Condition` with `Is_In` and `Not_In`.][3790]
|
||||
- [Replaced `Table.drop_missing_rows` with `filter_blank_rows` with an updated
|
||||
API.][3805]
|
||||
- [Replaced `Table.drop_missing_columns` with
|
||||
`Table.remove_columns Column_Selector.Blank_Columns` by adding the new column
|
||||
selector variant.][3812]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -344,6 +347,7 @@
|
||||
[3793]: https://github.com/enso-org/enso/pull/3793
|
||||
[3790]: https://github.com/enso-org/enso/pull/3790
|
||||
[3805]: https://github.com/enso-org/enso/pull/3805
|
||||
[3812]: https://github.com/enso-org/enso/pull/3812
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -259,8 +259,8 @@ type Any
|
||||
from Standard.Base import all
|
||||
|
||||
example_catch =
|
||||
error = Error.throw (Illegal_Argument_Error "My message")
|
||||
error.catch Illegal_Argument_Error (err -> err.message)
|
||||
error = Error.throw (Illegal_Argument_Error_Data "My message")
|
||||
error.catch Illegal_Argument_Error_Data (err -> err.message)
|
||||
|
||||
> Example
|
||||
Catching any dataflow error and turning it into a regular value.
|
||||
|
@ -160,7 +160,7 @@ type Utf_16_Span
|
||||
Utility function taking a range pointing at grapheme clusters and converting
|
||||
to a range on the underlying code units.
|
||||
range_to_char_indices : Text -> Range -> Range ! (Index_Out_Of_Bounds_Error | Illegal_Argument_Error)
|
||||
range_to_char_indices text range = if range.step != 1 then Error.throw (Illegal_Argument_Error "Text indexing only supports ranges with step equal to 1.") else
|
||||
range_to_char_indices text range = if range.step != 1 then Error.throw (Illegal_Argument_Error_Data "Text indexing only supports ranges with step equal to 1.") else
|
||||
len = text.length
|
||||
start = if range.start < 0 then range.start + len else range.start
|
||||
end = if range.end == Nothing then len else (if range.end < 0 then range.end + len else range.end)
|
||||
|
@ -212,7 +212,7 @@ type Date
|
||||
## Returns the century of the date.
|
||||
century : Integer
|
||||
century self = if self.year > 0 then (self.year - 1).div 100 + 1 else
|
||||
Error.throw (Illegal_Argument_Error "Century can only be given for AD years.")
|
||||
Error.throw (Illegal_Argument_Error_Data "Century can only be given for AD years.")
|
||||
|
||||
## Returns the quarter of the year the date falls into.
|
||||
quarter : Integer
|
||||
|
@ -15,7 +15,7 @@ Day_Of_Week.from (that : Integer) (first_day:Day_Of_Week=Day_Of_Week.Sunday) (st
|
||||
True ->
|
||||
valid_range = if start_at_zero then "0-6" else "1-7"
|
||||
message = "Invalid day of week (must be " + valid_range + ")."
|
||||
Error.throw (Illegal_Argument_Error message)
|
||||
Error.throw (Illegal_Argument_Error_Data message)
|
||||
False ->
|
||||
day_number = if first_day == Day_Of_Week.Sunday then shifted else
|
||||
(shifted + (first_day.to_integer start_at_zero=True)) % 7
|
||||
|
@ -74,8 +74,8 @@ type Error
|
||||
from Standard.Base import all
|
||||
|
||||
example_catch =
|
||||
error = Error.throw (Illegal_Argument_Error "My message")
|
||||
error.catch Illegal_Argument_Error (err -> err.message)
|
||||
error = Error.throw (Illegal_Argument_Error_Data "My message")
|
||||
error.catch Illegal_Argument_Error_Data (err -> err.message)
|
||||
|
||||
> Example
|
||||
Catching any dataflow error and turning it into a regular value.
|
||||
@ -299,7 +299,7 @@ type Panic
|
||||
and rethrow any others, without affecting their stacktraces.
|
||||
|
||||
Panic.catch Any (Panic.throw "foo") caught_panic-> case caught_panic.payload of
|
||||
Illegal_Argument_Error message _ -> "Illegal arguments were provided: "+message
|
||||
Illegal_Argument_Error_Data message _ -> "Illegal arguments were provided: "+message
|
||||
other_panic -> Panic.throw other_panic
|
||||
throw : Any -> Panic
|
||||
throw payload = @Builtin_Method "Panic.throw"
|
||||
@ -378,13 +378,13 @@ type Panic
|
||||
> Example
|
||||
Handling a specific type of panic.
|
||||
|
||||
Panic.catch Illegal_Argument_Error (Panic.throw (Illegal_Argument_Error "Oh no!" Nothing)) error->
|
||||
Panic.catch Illegal_Argument_Error_Data (Panic.throw (Illegal_Argument_Error_Data "Oh no!" Nothing)) error->
|
||||
"Caught an `Illegal_Argument_Error`: "+error.payload.message
|
||||
|
||||
> Example
|
||||
Handling any panic.
|
||||
|
||||
Panic.catch Any (Panic.throw (Illegal_Argument_Error "Oh no!" Nothing)) error->
|
||||
Panic.catch Any (Panic.throw (Illegal_Argument_Error_Data "Oh no!" Nothing)) error->
|
||||
"Caught some panic!"
|
||||
|
||||
> Example
|
||||
@ -395,7 +395,7 @@ type Panic
|
||||
polyglot java import java.lang.NumberFormatException
|
||||
parse str =
|
||||
Panic.catch NumberFormatException (Long.parseLong str) caught_panic->
|
||||
Error.throw (Illegal_Argument_Error "The provided string is not a valid number: "+caught_panic.payload.cause.getMessage)
|
||||
Error.throw (Illegal_Argument_Error_Data "The provided string is not a valid number: "+caught_panic.payload.cause.getMessage)
|
||||
catch : Any -> Any -> (Caught_Panic -> Any) -> Any
|
||||
catch panic_type ~action handler =
|
||||
Panic.catch_primitive action caught_panic->
|
||||
@ -430,7 +430,7 @@ type Panic
|
||||
polyglot java import java.lang.NumberFormatException
|
||||
parse str =
|
||||
Panic.catch_java NumberFormatException (Long.parseLong str) java_exception->
|
||||
Error.throw (Illegal_Argument_Error "The provided string is not a valid number: "+java_exception.getMessage)
|
||||
Error.throw (Illegal_Argument_Error_Data "The provided string is not a valid number: "+java_exception.getMessage)
|
||||
catch_java : Any -> Any -> (Throwable -> Any) -> Any
|
||||
catch_java panic_type ~action handler =
|
||||
Panic.catch_primitive action caught_panic-> case caught_panic.payload of
|
||||
@ -457,12 +457,12 @@ type Panic
|
||||
> Example
|
||||
Converting an expected panic to a dataflow error.
|
||||
|
||||
Panic.recover Illegal_Argument_Error (Panic.throw (Illegal_Argument_Error "Oh!" Nothing))
|
||||
Panic.recover Illegal_Argument_Error_Data (Panic.throw (Illegal_Argument_Error_Data "Oh!" Nothing))
|
||||
|
||||
> Example
|
||||
Converting one of many expected panic types to a dataflow error.
|
||||
|
||||
Panic.recover [Illegal_Argument_Error, Illegal_State_Error] (Panic.throw (Illegal_Argument_Error "Oh!" Nothing))
|
||||
Panic.recover [Illegal_Argument_Error, Illegal_State_Error] (Panic.throw (Illegal_Argument_Error_Data "Oh!" Nothing))
|
||||
recover : (Vector.Vector Any | Any) -> Any -> Any
|
||||
recover expected_types ~action =
|
||||
types_to_check = case expected_types of
|
||||
|
@ -454,6 +454,35 @@ type Column
|
||||
not : Column
|
||||
not self = self.make_unary_op "NOT"
|
||||
|
||||
## UNSTABLE
|
||||
Replaces `True` values with `when_true` and `False` with `when_false`.
|
||||
Only meant for use with boolean columns.
|
||||
|
||||
TODO: Currently `when_true` and `when_false` need to be a single value.
|
||||
In the future the API will also support row-based IIF if they are columns.
|
||||
iif : Any -> Any -> Column
|
||||
iif self when_true when_false =
|
||||
## TODO we should adjust new_type based on types when_true and
|
||||
when_false, but this relies on the Value Types design which is still
|
||||
in progress. This function has status of an internal prototype for
|
||||
now, so we just rely on a simplified handling. Once Value Types are
|
||||
properly implemented, this should be accordingly extended for the
|
||||
full implementation of IIF. We will need to handle when_true and
|
||||
when_false being either columns or regular values and rely on a
|
||||
mapping of Enso base types to SQL types, and a rule for extracting a
|
||||
common type.
|
||||
approximate_type x = case x of
|
||||
_ : Integer -> SQL_Type.integer
|
||||
_ : Decimal -> SQL_Type.real
|
||||
_ : Text -> SQL_Type.text
|
||||
_ : Boolean -> SQL_Type.boolean
|
||||
_ -> Error.throw (Illegal_Argument_Error_Data "Unsupported type.")
|
||||
left_type = approximate_type when_true
|
||||
right_type = approximate_type when_false
|
||||
if left_type != right_type then Error.throw (Illegal_Argument_Error_Data "when_true and when_false types do not match") else
|
||||
self.make_op "IIF" [when_true, when_false] new_type=left_type
|
||||
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns a column of booleans, with `True` items at the positions where
|
||||
@ -473,6 +502,25 @@ type Column
|
||||
is_empty : Column
|
||||
is_empty self = self.make_unary_op "IS_EMPTY" new_type=SQL_Type.boolean
|
||||
|
||||
## PRIVATE
|
||||
Returns a column of booleans with `True` at the positions where this
|
||||
column contains a blank value.
|
||||
|
||||
Arguments:
|
||||
- treat_nans_as_blank: If `True`, then `Number.nan` is considered as
|
||||
blank.
|
||||
|
||||
? Blank values
|
||||
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
|
||||
is_blank : Boolean -> Boolean -> Column
|
||||
is_blank self treat_nans_as_blank=False =
|
||||
is_blank = case self.sql_type.is_definitely_text of
|
||||
True -> self.is_empty
|
||||
False -> self.is_missing
|
||||
case treat_nans_as_blank && self.sql_type.is_definitely_double of
|
||||
True -> is_blank || self.is_nan
|
||||
False -> is_blank
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns a new column where missing values have been replaced with the
|
||||
|
@ -145,7 +145,7 @@ type Table
|
||||
Icon: select_column
|
||||
select_columns : Vector Text | Column_Selector -> Boolean -> Problem_Behavior -> Table
|
||||
select_columns self (columns = Column_Selector.By_Index [0]) (reorder = False) (on_problems = Report_Warning) =
|
||||
new_columns = Table_Helpers.select_columns internal_columns=self.internal_columns selector=columns reorder=reorder on_problems=on_problems
|
||||
new_columns = self.columns_helper.select_columns selector=columns reorder=reorder on_problems=on_problems
|
||||
self.updated_columns new_columns
|
||||
|
||||
## Returns a new table with the chosen set of columns, as specified by the
|
||||
@ -195,7 +195,7 @@ type Table
|
||||
table.remove_columns (Column_Selector.By_Column [column1, column2])
|
||||
remove_columns : Vector Text | Column_Selector -> Problem_Behavior -> Table
|
||||
remove_columns self (columns = Column_Selector.By_Index [0]) (on_problems = Report_Warning) =
|
||||
new_columns = Table_Helpers.remove_columns internal_columns=self.internal_columns selector=columns on_problems=on_problems
|
||||
new_columns = self.columns_helper.remove_columns selector=columns on_problems=on_problems
|
||||
self.updated_columns new_columns
|
||||
|
||||
## Returns a new table with the specified selection of columns moved to
|
||||
@ -250,7 +250,7 @@ type Table
|
||||
table.reorder_columns (Column_Selector.By_Column [column1, column2])
|
||||
reorder_columns : Vector Text | Column_Selector -> Position.Position -> Problem_Behavior -> Table
|
||||
reorder_columns self (columns = Column_Selector.By_Index [0]) (position = Position.Before_Other_Columns) (on_problems = Report_Warning) =
|
||||
new_columns = Table_Helpers.reorder_columns internal_columns=self.internal_columns selector=columns position=position on_problems=on_problems
|
||||
new_columns = self.columns_helper.reorder_columns selector=columns position=position on_problems=on_problems
|
||||
self.updated_columns new_columns
|
||||
|
||||
## Returns a new table with the columns sorted by name according to the
|
||||
@ -797,24 +797,7 @@ type Table
|
||||
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
|
||||
filter_blank_rows : Boolean -> Boolean -> Table
|
||||
filter_blank_rows self when_any=False treat_nans_as_blank=False =
|
||||
can_contain_text col = col.sql_type.is_definitely_text
|
||||
can_contain_double col = col.sql_type.is_definitely_double
|
||||
Table_Helpers.filter_blank_rows self can_contain_text can_contain_double when_any treat_nans_as_blank
|
||||
|
||||
## DEPRECATED Will be replaced with `Incomplete_Columns` selector (to be used with `remove_columns`).
|
||||
drop_missing_columns : Table
|
||||
drop_missing_columns self =
|
||||
rows_expr = Expression.Operation "COUNT_ROWS" []
|
||||
all_rows_column_name = "row_count"
|
||||
make_count_expr expr = Expression.Operation "COUNT" [expr]
|
||||
cols = self.internal_columns.map (c -> [c.name, make_count_expr c.expression])
|
||||
query = Query.Select [[all_rows_column_name, rows_expr]]+cols self.context
|
||||
sql = self.connection.dialect.generate_sql query
|
||||
table = self.connection.read_statement sql
|
||||
all_rows = table.at all_rows_column_name . at 0
|
||||
kept_columns = self.internal_columns . filter c->
|
||||
all_rows == table.at c.name . at 0
|
||||
self.updated_columns kept_columns
|
||||
Table_Helpers.filter_blank_rows self when_any treat_nans_as_blank
|
||||
|
||||
## Returns the amount of rows in this table.
|
||||
row_count : Integer
|
||||
@ -917,6 +900,11 @@ type Table
|
||||
new_ctx = self.context.set_index ixes
|
||||
Column.Value internal.name self.connection internal.sql_type internal.expression new_ctx
|
||||
|
||||
## PRIVATE
|
||||
columns_helper : Table_Column_Helper
|
||||
columns_helper self =
|
||||
Table_Helpers.Table_Column_Helper.Value self.internal_columns self.make_column self .read
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Returns a copy of this table with updated internal columns.
|
||||
|
@ -29,7 +29,7 @@ make_expression aggregate dialect =
|
||||
case aggregate of
|
||||
Group_By c _ -> c.expression
|
||||
Count _ -> Expression.Operation "COUNT_ROWS" []
|
||||
Count_Distinct columns _ ignore_nothing -> if columns.is_empty then Error.throw (Illegal_Argument_Error "Count_Distinct must have at least one column.") else
|
||||
Count_Distinct columns _ ignore_nothing -> if columns.is_empty then Error.throw (Illegal_Argument_Error_Data "Count_Distinct must have at least one column.") else
|
||||
case ignore_nothing of
|
||||
True -> Expression.Operation "COUNT_DISTINCT" (columns.map .expression)
|
||||
False -> Expression.Operation "COUNT_DISTINCT_INCLUDE_NULL" (columns.map .expression)
|
||||
|
@ -170,7 +170,7 @@ base_dialect =
|
||||
fun = name -> [name, make_function name]
|
||||
|
||||
arith = [bin "+", bin "-", bin "*", bin "/", bin "%"]
|
||||
logic = [bin "AND", bin "OR", unary "NOT"]
|
||||
logic = [bin "AND", bin "OR", unary "NOT", ["IIF", make_iif]]
|
||||
compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]]
|
||||
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
|
||||
counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
|
||||
@ -186,6 +186,17 @@ is_empty = lift_unary_op "IS_EMPTY" arg->
|
||||
is_empty = (arg ++ " = ''").paren
|
||||
(is_null ++ " OR " ++ is_empty).paren
|
||||
|
||||
## PRIVATE
|
||||
make_iif : Vector Builder -> Builder
|
||||
make_iif arguments = case arguments.length of
|
||||
3 ->
|
||||
expr = arguments.at 0
|
||||
when_true = arguments.at 1
|
||||
when_false = arguments.at 2
|
||||
(code "CASE WHEN" ++ expr ++ " THEN " ++ when_true ++ " WHEN " ++ expr ++ " IS NULL THEN NULL ELSE " ++ when_false ++ " END").paren
|
||||
_ ->
|
||||
Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation IIF")
|
||||
|
||||
## PRIVATE
|
||||
make_between : Vector Builder -> Builder
|
||||
make_between arguments = case arguments.length of
|
||||
|
@ -30,7 +30,7 @@
|
||||
import Standard.Examples
|
||||
|
||||
example_drop_missing_cols =
|
||||
Examples.inventory_table.drop_missing_columns
|
||||
Examples.inventory_table.remove (Column_Selector.Blank_Columns when_any=True)
|
||||
|
||||
> Example
|
||||
Fill missing values in a column with the value 20.5.
|
||||
|
@ -473,6 +473,22 @@ type Column
|
||||
not : Column
|
||||
not self = run_vectorized_unary_op self "not" .not
|
||||
|
||||
## UNSTABLE
|
||||
Replaces `True` values with `when_true` and `False` with `when_false`.
|
||||
Only meant for use with boolean columns.
|
||||
|
||||
TODO: Currently `when_true` and `when_false` need to be a single value.
|
||||
In the future the API will also support row-based IIF if they are columns.
|
||||
iif : Any -> Any -> Column
|
||||
iif self when_true when_false = case self.storage_type of
|
||||
Storage.Boolean ->
|
||||
s = self.java_column.getStorage
|
||||
ix = self.java_column.getIndex
|
||||
rs = s.iif when_true when_false
|
||||
Column.Column_Data (Java_Column.new "Result" ix rs)
|
||||
_ -> Error.throw (Illegal_Argument_Error "`iif` can only be used with boolean columns.")
|
||||
|
||||
|
||||
## Returns a column of booleans, with `True` items at the positions where
|
||||
this column contains a `Nothing`.
|
||||
|
||||
@ -513,6 +529,24 @@ type Column
|
||||
is_present : Column
|
||||
is_present self = self.is_missing.not
|
||||
|
||||
## PRIVATE
|
||||
Returns a column of booleans with `True` at the positions where this
|
||||
column contains a blank value.
|
||||
|
||||
Arguments:
|
||||
- treat_nans_as_blank: If `True`, then `Number.nan` is considered as
|
||||
blank.
|
||||
|
||||
? Blank values
|
||||
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
|
||||
is_blank : Boolean -> Boolean -> Column
|
||||
is_blank self treat_nans_as_blank=False =
|
||||
case self.storage_type of
|
||||
Storage.Text -> self.is_empty
|
||||
Storage.Decimal -> if treat_nans_as_blank then self.is_missing || self.is_nan else self.is_missing
|
||||
Storage.Any -> if treat_nans_as_blank then self.is_empty || self.is_nan else self.is_empty
|
||||
_ -> self.is_missing
|
||||
|
||||
## ALIAS Fill Missing
|
||||
|
||||
Returns a new column where missing values have been replaced with the
|
||||
|
@ -25,3 +25,24 @@ type Column_Selector
|
||||
this approach can be used to match columns with the same names as a set
|
||||
of columns of some other table, for example, when preparing for a join.
|
||||
By_Column (columns : Vector Column)
|
||||
|
||||
## ALIAS dropna
|
||||
ALIAS drop_missing_columns
|
||||
Select columns which are either all blank or contain blank values.
|
||||
|
||||
Arguments:
|
||||
- when_any: By default, only columns consisting of all blank cells are
|
||||
selected. If set to `True`, columns containing at least one blank value
|
||||
will be selected too. If there are no rows, the column is treated as
|
||||
blank regardless of this argument.
|
||||
- treat_nans_as_blank: If `True`, then `Number.nan` is considered as
|
||||
blank.
|
||||
|
||||
? Blank values
|
||||
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
|
||||
|
||||
> Example
|
||||
Remove completely blank columns from a table.
|
||||
|
||||
table.remove_columns Column_Selector.Blank_Columns
|
||||
Blank_Columns when_any:Boolean=False treat_nans_as_blank:Boolean=False
|
||||
|
@ -271,7 +271,7 @@ type Table
|
||||
Icon: select_column
|
||||
select_columns : Vector Text | Column_Selector -> Boolean -> Problem_Behavior -> Table
|
||||
select_columns self (columns = Column_Selector.By_Index [0]) (reorder = False) (on_problems = Report_Warning) =
|
||||
new_columns = Table_Helpers.select_columns internal_columns=self.columns selector=columns reorder=reorder on_problems=on_problems
|
||||
new_columns = self.columns_helper.select_columns selector=columns reorder=reorder on_problems=on_problems
|
||||
Table.new new_columns
|
||||
|
||||
## Returns a new table with the chosen set of columns, as specified by the
|
||||
@ -321,7 +321,7 @@ type Table
|
||||
table.remove_columns (Column_Selector.By_Column [column1, column2])
|
||||
remove_columns : Vector Text | Column_Selector -> Problem_Behavior -> Table
|
||||
remove_columns self (columns = Column_Selector.By_Index [0]) (on_problems = Report_Warning) =
|
||||
new_columns = Table_Helpers.remove_columns internal_columns=self.columns selector=columns on_problems=on_problems
|
||||
new_columns = self.columns_helper.remove_columns selector=columns on_problems=on_problems
|
||||
Table.new new_columns
|
||||
|
||||
## Returns a new table with the specified selection of columns moved to
|
||||
@ -376,7 +376,7 @@ type Table
|
||||
table.reorder_columns (Column_Selector.By_Column [column1, column2])
|
||||
reorder_columns : Vector Text | Column_Selector -> Position.Position -> Problem_Behavior -> Table
|
||||
reorder_columns self (columns = Column_Selector.By_Index [0]) (position = Position.Before_Other_Columns) (on_problems = Report_Warning) =
|
||||
new_columns = Table_Helpers.reorder_columns internal_columns=self.columns selector=columns position=position on_problems=on_problems
|
||||
new_columns = self.columns_helper.reorder_columns selector=columns position=position on_problems=on_problems
|
||||
Table.new new_columns
|
||||
|
||||
## Returns a new table with the columns sorted by name according to the
|
||||
@ -653,7 +653,7 @@ type Table
|
||||
No_Output_Columns -> Maybe.Some No_Input_Columns_Selected
|
||||
_ -> Nothing
|
||||
key_columns = Warning.map_warnings_and_errors warning_mapper <|
|
||||
Table_Helpers.select_columns internal_columns=self.columns selector=columns reorder=True on_problems=on_problems
|
||||
self.columns_helper.select_columns selector=columns reorder=True on_problems=on_problems
|
||||
java_columns = key_columns.map .java_column
|
||||
text_folding_strategy = Case.folding_strategy case_sensitivity
|
||||
java_table = Illegal_Argument_Error.handle_java_exception <|
|
||||
@ -788,7 +788,7 @@ type Table
|
||||
_ : Column_Selector -> columns
|
||||
name : Text -> Column_Selector.By_Name [name]
|
||||
index : Integer -> Column_Selector.By_Index [index]
|
||||
selection = Table_Helpers.select_columns_helper self.columns selector reorder=False problem_builder
|
||||
selection = self.columns_helper.select_columns_helper selector reorder=False problem_builder
|
||||
selected_names = Map.from_vector (selection.map column-> [column.name, True])
|
||||
|
||||
map_preserve_name column f = column.map f . rename column.name
|
||||
@ -1016,22 +1016,7 @@ type Table
|
||||
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
|
||||
filter_blank_rows : Boolean -> Boolean -> Table
|
||||
filter_blank_rows self when_any=False treat_nans_as_blank=False =
|
||||
can_contain_text col = case col.storage_type of
|
||||
Storage.Text -> True
|
||||
Storage.Any -> True
|
||||
_ -> False
|
||||
can_contain_double col = case col.storage_type of
|
||||
Storage.Decimal -> True
|
||||
Storage.Any -> True
|
||||
_ -> False
|
||||
Table_Helpers.filter_blank_rows self can_contain_text can_contain_double when_any treat_nans_as_blank
|
||||
|
||||
## DEPRECATED Will be replaced with `Incomplete_Columns` selector (to be used with `remove_columns`).
|
||||
drop_missing_columns : Table
|
||||
drop_missing_columns self =
|
||||
non_missing = self.columns . filter (col -> col.count_missing == 0)
|
||||
index = self.java_table.getIndex
|
||||
Table.Table_Data (Java_Table.new (non_missing.map .java_column . to_array) index)
|
||||
Table_Helpers.filter_blank_rows self when_any treat_nans_as_blank
|
||||
|
||||
## Returns the number of rows in this table.
|
||||
|
||||
@ -1200,6 +1185,11 @@ type Table
|
||||
to_csv : Text
|
||||
to_csv self = Text.from self (Delimited delimiter=",")
|
||||
|
||||
## PRIVATE
|
||||
columns_helper : Table_Column_Helper
|
||||
columns_helper self =
|
||||
Table_Helpers.Table_Column_Helper.Value self.columns (x->x) self (x->x)
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
An error returned when the table contains no rows.
|
||||
|
@ -63,6 +63,6 @@ type Excel_Format
|
||||
format = should_treat_as_xls_format self.xls_format file
|
||||
|
||||
case self.section of
|
||||
Sheet_Names -> Error.throw (Illegal_Argument_Error "Sheet_Names cannot be used for `write`.")
|
||||
Range_Names -> Error.throw (Illegal_Argument_Error "Range_Names cannot be used for `write`.")
|
||||
Sheet_Names -> Error.throw (Illegal_Argument_Error_Data "Sheet_Names cannot be used for `write`.")
|
||||
Range_Names -> Error.throw (Illegal_Argument_Error_Data "Range_Names cannot be used for `write`.")
|
||||
_ -> Excel_Writer.write_file file table on_existing_file self.section self.headers match_columns on_problems format
|
||||
|
@ -116,15 +116,16 @@ default_aggregate_column_name aggregate_column =
|
||||
resolve_aggregate : Table -> Problem_Builder -> Aggregate_Column -> Aggregate_Column | Nothing
|
||||
resolve_aggregate table problem_builder aggregate_column =
|
||||
table_columns = table.columns
|
||||
columns_helper = table.columns_helper
|
||||
|
||||
resolve : (Integer|Text|Column) -> Column ! Internal_Missing_Column_Error
|
||||
resolve c =
|
||||
res = Table_Helpers.resolve_column_helper table_columns c problem_builder
|
||||
res = columns_helper.resolve_column_helper c problem_builder
|
||||
res.if_nothing (Error.throw Internal_Missing_Column_Error)
|
||||
|
||||
resolve_selector_to_vector : Column_Selector -> [Column] ! Internal_Missing_Column_Error
|
||||
resolve_selector_to_vector selector =
|
||||
resolved = Table_Helpers.select_columns_helper table_columns selector reorder=True problem_builder
|
||||
resolved = columns_helper.select_columns_helper selector reorder=True problem_builder
|
||||
if resolved.is_empty then Error.throw Internal_Missing_Column_Error else resolved
|
||||
|
||||
resolve_order_by selector = case selector of
|
||||
@ -140,10 +141,7 @@ resolve_aggregate table problem_builder aggregate_column =
|
||||
Count new_name -> Count new_name
|
||||
Count_Distinct c new_name ignore_nothing ->
|
||||
new_c = case c of
|
||||
## TODO once we have sum type pattern matching this could be replaced with a single branch
|
||||
Column_Selector.By_Name _ _ -> resolve_selector_to_vector c
|
||||
Column_Selector.By_Index _ -> resolve_selector_to_vector c
|
||||
Column_Selector.By_Column _ -> resolve_selector_to_vector c
|
||||
_ : Column_Selector -> resolve_selector_to_vector c
|
||||
_ -> [resolve c]
|
||||
Count_Distinct new_c new_name ignore_nothing
|
||||
Count_Not_Nothing c new_name -> Count_Not_Nothing (resolve c) new_name
|
||||
|
@ -12,103 +12,199 @@ import project.Data.Sort_Column.Sort_Column
|
||||
import project.Internal.Problem_Builder.Problem_Builder
|
||||
import project.Internal.Unique_Name_Strategy.Unique_Name_Strategy
|
||||
|
||||
from project.Data.Aggregate_Column.Aggregate_Column import Minimum, Maximum
|
||||
from project.Errors import Missing_Input_Columns_Data, No_Output_Columns, Too_Many_Column_Names_Provided_Data, Duplicate_Output_Column_Names_Data, Invalid_Output_Column_Names_Data, No_Input_Columns_Selected
|
||||
|
||||
polyglot java import java.util.HashSet
|
||||
|
||||
## PRIVATE
|
||||
A helper function encapsulating shared code for `select_columns`
|
||||
implementations of various Table variants. See the documentation for the
|
||||
Table type for details.
|
||||
type Table_Column_Helper
|
||||
## PRIVATE
|
||||
Helps managing table columns.
|
||||
|
||||
It takes a list of columns and returns the selected columns. It is the
|
||||
responsibility of each implementation to reconstruct a proper table from the
|
||||
resulting list of columns.
|
||||
Arguments:
|
||||
- internal_columns: A list of all columns in a table.
|
||||
- make_column: A function which takes the internal column and creates a
|
||||
fully fledged column from it.
|
||||
- table: A reference to the table.
|
||||
- materialize: A function which takes a table and materializes it to
|
||||
in-memory.
|
||||
Value internal_columns make_column table materialize
|
||||
|
||||
Arguments:
|
||||
- internal_columns: A list of all columns in a table.
|
||||
- selector: Column selection criteria or vector of column names.
|
||||
- reorder: Specifies whether to reorder the matched columns according to the
|
||||
order of the selection criteria.
|
||||
If `False`, the matched entries are returned in the same order as in the
|
||||
input.
|
||||
If `True`, the matched entries are returned in the order of the criteria
|
||||
matching them. If a single object has been matched by multiple criteria, it
|
||||
is placed in the group belonging to the first matching criterion on the
|
||||
list. If a single criterion's group has more than one element, their
|
||||
relative order is the same as in the input.
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
operation. By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
select_columns : Vector -> Vector | Column_Selector -> Boolean -> Problem_Behavior -> Vector
|
||||
select_columns internal_columns selector reorder on_problems =
|
||||
problem_builder = Problem_Builder.new
|
||||
result = select_columns_helper internal_columns selector reorder problem_builder
|
||||
if result.is_empty then
|
||||
problem_builder.report_other_warning No_Output_Columns
|
||||
problem_builder.attach_problems_after on_problems result
|
||||
## PRIVATE
|
||||
A helper function encapsulating shared code for `select_columns`
|
||||
implementations of various Table variants. See the documentation for the
|
||||
Table type for details.
|
||||
|
||||
## PRIVATE
|
||||
A helper function encapsulating shared code for `remove_columns`
|
||||
implementations of various Table variants. See the documentation for the
|
||||
Table type for details.
|
||||
It returns the selected columns. It is the responsibility of each
|
||||
implementation to reconstruct a proper table from the
|
||||
resulting list of columns.
|
||||
|
||||
It takes a list of columns and returns the columns which should be kept. It
|
||||
is the responsibility of each implementation to reconstruct a proper table
|
||||
from the resulting list of columns.
|
||||
Arguments:
|
||||
- selector: Column selection criteria or vector of column names.
|
||||
- reorder: Specifies whether to reorder the matched columns according to
|
||||
the order of the selection criteria.
|
||||
If `False`, the matched entries are returned in the same order as in
|
||||
the input.
|
||||
If `True`, the matched entries are returned in the order of the
|
||||
criteria matching them. If a single object has been matched by multiple
|
||||
criteria, it is placed in the group belonging to the first matching
|
||||
criterion on the list. If a single criterion's group has more than one
|
||||
element, their relative order is the same as in the input.
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
operation. By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
select_columns : Vector | Column_Selector -> Boolean -> Problem_Behavior -> Vector
|
||||
select_columns self selector reorder on_problems =
|
||||
problem_builder = Problem_Builder.new
|
||||
result = self.select_columns_helper selector reorder problem_builder
|
||||
if result.is_empty then
|
||||
problem_builder.report_other_warning No_Output_Columns
|
||||
problem_builder.attach_problems_after on_problems result
|
||||
|
||||
Arguments:
|
||||
- internal_columns: A list of all columns in a table.
|
||||
- selector: Column selection criteria or vector of column names.
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
operation. By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
remove_columns : Vector -> Vector | Column_Selector -> Problem_Behavior -> Vector
|
||||
remove_columns internal_columns selector on_problems =
|
||||
problem_builder = Problem_Builder.new
|
||||
selection = select_columns_helper internal_columns selector reorder=False problem_builder
|
||||
selected_names = Map.from_vector (selection.map column-> [column.name, True])
|
||||
result = internal_columns.filter column->
|
||||
should_be_removed = selected_names.get_or_else column.name False
|
||||
should_be_removed.not
|
||||
if result.is_empty then
|
||||
problem_builder.report_other_warning No_Output_Columns
|
||||
problem_builder.attach_problems_after on_problems result
|
||||
## PRIVATE
|
||||
A helper function encapsulating shared code for `remove_columns`
|
||||
implementations of various Table variants. See the documentation for the
|
||||
Table type for details.
|
||||
|
||||
## PRIVATE
|
||||
A helper function encapsulating shared code for `reorder_columns`
|
||||
implementations of various Table variants. See the documentation for the
|
||||
Table type for details.
|
||||
It returns the columns which should be kept. It is the responsibility of
|
||||
each implementation to reconstruct a proper table from the resulting list
|
||||
of columns.
|
||||
|
||||
It takes a list of columns and returns the columns which should be kept. It
|
||||
is the responsibility of each implementation to reconstruct a proper table
|
||||
from the resulting list of columns.
|
||||
|
||||
Arguments:
|
||||
- internal_columns: A list of all columns in a table.
|
||||
- selector: A selector specifying which columns should be moved and the order
|
||||
in which they should appear in the result.
|
||||
- position: Specifies how to place the selected columns in relation to the
|
||||
columns which were not matched by the `selector` (if any).
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
operation. By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
reorder_columns : Vector -> Vector | Column_Selector -> Position.Position -> Problem_Behavior -> Vector
|
||||
reorder_columns internal_columns selector position on_problems =
|
||||
problem_builder = Problem_Builder.new
|
||||
selection = select_columns_helper internal_columns selector reorder=True problem_builder
|
||||
problem_builder.attach_problems_before on_problems <|
|
||||
Arguments:
|
||||
- selector: Column selection criteria or vector of column names.
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
operation. By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
remove_columns : Vector | Column_Selector -> Problem_Behavior -> Vector
|
||||
remove_columns self selector on_problems =
|
||||
problem_builder = Problem_Builder.new
|
||||
selection = self.select_columns_helper selector reorder=False problem_builder
|
||||
selected_names = Map.from_vector (selection.map column-> [column.name, True])
|
||||
other_columns = internal_columns.filter column->
|
||||
is_selected = selected_names.get_or_else column.name False
|
||||
is_selected.not
|
||||
result = case position of
|
||||
Position.Before_Other_Columns -> selection + other_columns
|
||||
Position.After_Other_Columns -> other_columns + selection
|
||||
result
|
||||
result = self.internal_columns.filter column->
|
||||
should_be_removed = selected_names.get_or_else column.name False
|
||||
should_be_removed.not
|
||||
if result.is_empty then
|
||||
problem_builder.report_other_warning No_Output_Columns
|
||||
problem_builder.attach_problems_after on_problems result
|
||||
|
||||
## PRIVATE
|
||||
A helper function encapsulating shared code for `reorder_columns`
|
||||
implementations of various Table variants. See the documentation for the
|
||||
Table type for details.
|
||||
|
||||
It returns the columns which should be kept. It is the responsibility of
|
||||
each implementation to reconstruct a proper table from the resulting list
|
||||
of columns.
|
||||
|
||||
Arguments:
|
||||
- selector: A selector specifying which columns should be moved and the
|
||||
order in which they should appear in the result.
|
||||
- position: Specifies how to place the selected columns in relation to
|
||||
the columns which were not matched by the `selector` (if any).
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
operation. By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
reorder_columns : Vector | Column_Selector -> Position.Position -> Problem_Behavior -> Vector
|
||||
reorder_columns self selector position on_problems =
|
||||
problem_builder = Problem_Builder.new
|
||||
selection = self.select_columns_helper selector reorder=True problem_builder
|
||||
problem_builder.attach_problems_before on_problems <|
|
||||
selected_names = Map.from_vector (selection.map column-> [column.name, True])
|
||||
other_columns = self.internal_columns.filter column->
|
||||
is_selected = selected_names.get_or_else column.name False
|
||||
is_selected.not
|
||||
result = case position of
|
||||
Position.Before_Other_Columns -> selection + other_columns
|
||||
Position.After_Other_Columns -> other_columns + selection
|
||||
result
|
||||
|
||||
## PRIVATE
|
||||
A helper function which selects columns from the table based on the
|
||||
provided selection criteria.
|
||||
|
||||
Arguments:
|
||||
- selector: Column selection criteria or vector of column names.
|
||||
- reorder: Specifies whether to reorder the matched columns according to
|
||||
the order of the selection criteria.
|
||||
If `False`, the matched entries are returned in the same order as in
|
||||
the input.
|
||||
If `True`, the matched entries are returned in the order of the
|
||||
criteria matching them. If a single object has been matched by multiple
|
||||
criteria, it is placed in the group belonging to the first matching
|
||||
criterion on the list. If a single criterion's group has more than one
|
||||
element, their relative order is the same as in the input.
|
||||
- problem_builder: Encapsulates the aggregation of encountered problems.
|
||||
select_columns_helper : Vector | Column_Selector -> Boolean -> Problem_Builder -> Vector
|
||||
select_columns_helper self selector reorder problem_builder = case selector of
|
||||
_ : Vector.Vector ->
|
||||
self.select_columns_helper (Column_Selector.By_Name selector) reorder problem_builder
|
||||
Column_Selector.By_Name names matcher ->
|
||||
valid_names = validate_unique names problem_builder.report_duplicate_column_selectors
|
||||
Matching.match_criteria_callback matcher self.internal_columns valid_names reorder=reorder name_mapper=(_.name) problem_callback=problem_builder.report_missing_input_columns
|
||||
Column_Selector.By_Index indices ->
|
||||
good_indices = validate_indices self.internal_columns.length indices problem_builder . map .first
|
||||
case reorder of
|
||||
True ->
|
||||
select_indices_reordering self.internal_columns good_indices
|
||||
False ->
|
||||
select_indices_preserving_order self.internal_columns good_indices
|
||||
Column_Selector.By_Column columns ->
|
||||
column_names = columns.map .name
|
||||
new_selector = Column_Selector.By_Name column_names Text_Matcher.Case_Sensitive
|
||||
self.select_columns_helper new_selector reorder=reorder problem_builder=problem_builder
|
||||
Column_Selector.Blank_Columns when_any treat_nans_as_blank -> if self.internal_columns.is_empty then [] else
|
||||
blanks = self.internal_columns.map_with_index ix-> internal_column->
|
||||
column = self.make_column internal_column
|
||||
blank_indicator = column.is_blank treat_nans_as_blank
|
||||
blank_indicator.iif 1 0 . rename "blanks_"+ix.to_text
|
||||
## We cannot just use a custom_column in the aggregate because of
|
||||
how the column selector works. We may need to revisit this. For
|
||||
now we need to use tricks like that:
|
||||
modified_table = blanks.fold (self.table.select_columns [] on_problems=Problem_Behavior.Ignore) table-> blanks_col->
|
||||
table.set blanks_col.name blanks_col
|
||||
# Maximum is equivalent to Exists and Minimum is equivalent to Forall.
|
||||
col_aggregate = if when_any then Maximum else Minimum
|
||||
aggregates = blanks.map blanks_col-> col_aggregate blanks_col.name
|
||||
result = self.materialize <| Panic.rethrow <|
|
||||
modified_table.aggregate aggregates on_problems=Problem_Behavior.Report_Error
|
||||
|
||||
## The `reorder` argument has no meaning for Blank_Columns selector
|
||||
- either way all blank columns are selected in the order that
|
||||
they are in the Table.
|
||||
self.internal_columns.filter_with_index i-> _->
|
||||
## TODO once we have the Row type it would be nice to use it here
|
||||
case result.at i . first of
|
||||
## Nothing is returned if there were no rows, in that case
|
||||
we treat the column as blank by convention, regardless of
|
||||
`when_any`.
|
||||
Nothing -> True
|
||||
1 -> True
|
||||
0 -> False
|
||||
_ -> Panic.throw (Illegal_State_Error_Data "Unexpected result. Perhaps an implementation bug of Column_Selector.Blank_Columns.")
|
||||
|
||||
## PRIVATE
|
||||
A helper function which selects a single column from the table. It is aligned
|
||||
with the `select_columns_helper`, apart from Blank_Columns selector which is
|
||||
only handled by the other method but not this one.
|
||||
resolve_column_helper : (Integer | Text | Column) -> Problem_Builder -> a | Nothing
|
||||
resolve_column_helper self selector problem_builder = case selector of
|
||||
_ : Text ->
|
||||
matched_columns = Matching.match_criteria_callback Text_Matcher.Case_Sensitive self.internal_columns [selector] reorder=True name_mapper=(_.name) problem_callback=problem_builder.report_missing_input_columns
|
||||
if matched_columns.length == 1 then matched_columns.first else
|
||||
if matched_columns.length == 0 then Nothing else
|
||||
Panic.throw (Illegal_State_Error_Data "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?")
|
||||
_ : Integer -> case is_index_valid self.internal_columns.length selector of
|
||||
True -> self.internal_columns.at selector
|
||||
False ->
|
||||
problem_builder.report_oob_indices [selector]
|
||||
Nothing
|
||||
## A wildcard to match any backend's column.
|
||||
_ ->
|
||||
self.resolve_column_helper selector.name problem_builder=problem_builder
|
||||
|
||||
|
||||
## PRIVATE
|
||||
A helper function encapsulating shared code for `rename_columns`
|
||||
@ -215,61 +311,6 @@ sort_columns internal_columns direction text_ordering =
|
||||
False -> .compare_to
|
||||
internal_columns.sort on=mapper by=comparator order=direction
|
||||
|
||||
## PRIVATE
|
||||
A helper function which selects columns from the table based on the provided
|
||||
selection criteria.
|
||||
|
||||
Arguments:
|
||||
- internal_columns: A list of all columns in a table.
|
||||
- selector: Column selection criteria or vector of column names.
|
||||
- reorder: Specifies whether to reorder the matched columns according to the
|
||||
order of the selection criteria.
|
||||
If `False`, the matched entries are returned in the same order as in the
|
||||
input.
|
||||
If `True`, the matched entries are returned in the order of the criteria
|
||||
matching them. If a single object has been matched by multiple criteria, it
|
||||
is placed in the group belonging to the first matching criterion on the
|
||||
list. If a single criterion's group has more than one element, their
|
||||
relative order is the same as in the input.
|
||||
- problem_builder: Encapsulates the aggregation of encountered problems.
|
||||
select_columns_helper : Vector -> Vector | Column_Selector -> Boolean -> Problem_Builder -> Vector
|
||||
select_columns_helper internal_columns selector reorder problem_builder = case selector of
|
||||
_ : Vector.Vector ->
|
||||
select_columns_helper internal_columns (Column_Selector.By_Name selector) reorder problem_builder
|
||||
Column_Selector.By_Name names matcher ->
|
||||
valid_names = validate_unique names problem_builder.report_duplicate_column_selectors
|
||||
Matching.match_criteria_callback matcher internal_columns valid_names reorder=reorder name_mapper=(_.name) problem_callback=problem_builder.report_missing_input_columns
|
||||
Column_Selector.By_Index indices ->
|
||||
good_indices = validate_indices internal_columns.length indices problem_builder . map .first
|
||||
case reorder of
|
||||
True ->
|
||||
select_indices_reordering internal_columns good_indices
|
||||
False ->
|
||||
select_indices_preserving_order internal_columns good_indices
|
||||
Column_Selector.By_Column columns ->
|
||||
column_names = columns.map .name
|
||||
new_selector = Column_Selector.By_Name column_names Text_Matcher.Case_Sensitive
|
||||
select_columns_helper internal_columns new_selector reorder=reorder problem_builder=problem_builder
|
||||
|
||||
## PRIVATE
|
||||
A helper function which selects a single column from the table. It is aligned with the
|
||||
`select_columns_helper`.
|
||||
resolve_column_helper : Vector a -> (Integer | Text | Column) -> Problem_Builder -> a | Nothing
|
||||
resolve_column_helper internal_columns selector problem_builder = case selector of
|
||||
_ : Text ->
|
||||
matched_columns = Matching.match_criteria_callback Text_Matcher.Case_Sensitive internal_columns [selector] reorder=True name_mapper=(_.name) problem_callback=problem_builder.report_missing_input_columns
|
||||
if matched_columns.length == 1 then matched_columns.first else
|
||||
if matched_columns.length == 0 then Nothing else
|
||||
Panic.throw (Illegal_State_Error_Data "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?")
|
||||
_ : Integer -> case is_index_valid internal_columns.length selector of
|
||||
True -> internal_columns.at selector
|
||||
False ->
|
||||
problem_builder.report_oob_indices [selector]
|
||||
Nothing
|
||||
## A wildcard to match any backend's column.
|
||||
_ ->
|
||||
resolve_column_helper internal_columns selector.name problem_builder=problem_builder
|
||||
|
||||
## PRIVATE
|
||||
Converts the generic `No_Matches_Found` error to a more specific
|
||||
`Missing_Input_Columns`. Any other errors are returned as-is.
|
||||
@ -558,20 +599,13 @@ select_columns_by_column_reference internal_columns column_selectors problem_bui
|
||||
## PRIVATE
|
||||
A helper method gathering the common logic for constructing expressions that
|
||||
can filter out blank rows.
|
||||
filter_blank_rows : Table -> (Column -> Boolean) -> (Column -> Boolean) -> Boolean -> Boolean -> Table
|
||||
filter_blank_rows table can_contain_text can_contain_double when_any treat_nans_as_blank =
|
||||
filter_blank_rows : Table -> Boolean -> Boolean -> Table
|
||||
filter_blank_rows table when_any treat_nans_as_blank =
|
||||
cols = table.columns
|
||||
case cols.not_empty of
|
||||
True ->
|
||||
handle col =
|
||||
is_blank = case can_contain_text col of
|
||||
True -> col.is_empty
|
||||
False -> col.is_missing
|
||||
case treat_nans_as_blank && (can_contain_double col) of
|
||||
True -> is_blank || col.is_nan
|
||||
False -> is_blank
|
||||
merge = if when_any then (||) else (&&)
|
||||
missing_mask = cols.map handle . reduce merge
|
||||
missing_mask = cols.map (_.is_blank treat_nans_as_blank) . reduce merge
|
||||
non_missing_mask = missing_mask.not
|
||||
table.filter non_missing_mask
|
||||
False -> table
|
||||
|
@ -10,6 +10,10 @@ public class Polyglot_Utils {
|
||||
* type.
|
||||
*/
|
||||
public static Object convertPolyglotValue(Value item) {
|
||||
if (item == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (item.isDate()) {
|
||||
LocalDate d = item.asDate();
|
||||
if (item.isTime()) {
|
||||
|
@ -2,6 +2,8 @@ package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
import org.enso.base.polyglot.Polyglot_Utils;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.column.operation.map.MapOperation;
|
||||
import org.enso.table.data.column.operation.map.UnaryMapOperation;
|
||||
@ -165,6 +167,22 @@ public final class BoolStorage extends Storage<Boolean> {
|
||||
return negated;
|
||||
}
|
||||
|
||||
public Storage<?> iif(Value when_true, Value when_false) {
|
||||
Object on_true = Polyglot_Utils.convertPolyglotValue(when_true);
|
||||
Object on_false = Polyglot_Utils.convertPolyglotValue(when_false);
|
||||
InferredBuilder builder = new InferredBuilder(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (isMissing.get(i)) {
|
||||
builder.append(null);
|
||||
} else if (getItem(i)) {
|
||||
builder.append(on_true);
|
||||
} else {
|
||||
builder.append(on_false);
|
||||
}
|
||||
}
|
||||
return builder.seal();
|
||||
}
|
||||
|
||||
private static MapOpStorage<Boolean, BoolStorage> buildOps() {
|
||||
MapOpStorage<Boolean, BoolStorage> ops = new MapOpStorage<>();
|
||||
ops.add(
|
||||
|
@ -177,7 +177,7 @@ public abstract class Storage<T> {
|
||||
if (name != null && isOpVectorized(name)) {
|
||||
return runVectorizedMap(name, null);
|
||||
}
|
||||
Object missingValue = onMissing == null ? null : Polyglot_Utils.convertPolyglotValue(onMissing);
|
||||
Object missingValue = Polyglot_Utils.convertPolyglotValue(onMissing);
|
||||
Builder builder = new InferredBuilder(size());
|
||||
for (int i = 0; i < size(); i++) {
|
||||
Object it = getItemBoxed(i);
|
||||
|
@ -3,9 +3,10 @@ from Standard.Base.Data.Index_Sub_Range import While, Sample, Every
|
||||
import Standard.Base.Data.Index_Sub_Range
|
||||
from Standard.Base.Error.Problem_Behavior import Report_Error
|
||||
|
||||
from Standard.Table import Column_Name_Mapping, Sort_Column, Sort_Column_Selector, Position
|
||||
from Standard.Table import Column_Name_Mapping, Sort_Column, Sort_Column_Selector, Position, Column_Selector
|
||||
import Standard.Table.Data.Value_Type.Value_Type
|
||||
from Standard.Table.Data.Column_Selector.Column_Selector import By_Name, By_Index, By_Column
|
||||
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Count_Distinct
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
from Standard.Database.Errors import SQL_Error_Data, Unsupported_Database_Operation_Error_Data
|
||||
@ -1294,17 +1295,33 @@ spec prefix table_builder test_selection pending=Nothing =
|
||||
problems = [Index_Out_Of_Bounds_Error_Data 4 1]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.group prefix+"Column Operations" pending=pending <|
|
||||
Test.specify "iif" <|
|
||||
t = table_builder [["X", [True, False, Nothing, True]]]
|
||||
t.at "X" . iif 22 33 . to_vector . should_equal [22, 33, Nothing, 22]
|
||||
|
||||
Test.specify "iif on Columns" pending="Not implemented yet." Nothing
|
||||
|
||||
Test.group prefix+"Dropping Missing Values" pending=pending <|
|
||||
t4 = table_builder [["a", [0, 1, Nothing, 42, Nothing, 5]], ["b", [True, Nothing, True, False, Nothing, False]], ["c", ["", "foo", "bar", Nothing, Nothing, " "]]]
|
||||
t0 = table_builder [["a", [0, 1, Nothing, 42, Nothing, 5]], ["b", [True, Nothing, True, False, Nothing, False]], ["c", ["", "foo", "bar", Nothing, Nothing, " "]]]
|
||||
t1 =
|
||||
a = ["a", [1, Nothing, 3, 4]]
|
||||
b = ["b", ["a", "b", Nothing, " "]]
|
||||
c = ["c", [10, 20, 30, 40]]
|
||||
d = ["d", [Nothing, True, False, True]]
|
||||
e = ["e", ["", "", "foo", "bar"]]
|
||||
f = ["f", [Nothing, "", Nothing, ""]]
|
||||
table_builder [a, b, c, d, e, f]
|
||||
|
||||
Test.specify "filter_blank_rows should drop rows that contain at least one missing cell" <|
|
||||
d = t4.filter_blank_rows when_any=True
|
||||
d = t0.filter_blank_rows when_any=True
|
||||
d.row_count . should_equal 1
|
||||
d.at "a" . to_vector . should_equal [5]
|
||||
d.at "b" . to_vector . should_equal [False]
|
||||
d.at "c" . to_vector . should_equal [" "]
|
||||
|
||||
Test.specify "filter_blank_rows should drop rows that are all blank" <|
|
||||
d2 = t4.filter_blank_rows when_any=False
|
||||
d2 = t0.filter_blank_rows when_any=False
|
||||
d2.at "a" . to_vector . should_equal [0, 1, Nothing, 42, 5]
|
||||
d2.at "b" . to_vector . should_equal [True, Nothing, True, False, False]
|
||||
d2.at "c" . to_vector . should_equal ["", "foo", "bar", Nothing, " "]
|
||||
@ -1331,29 +1348,86 @@ spec prefix table_builder test_selection pending=Nothing =
|
||||
t6.row_count . should_equal 0
|
||||
t6.columns . should_equal []
|
||||
|
||||
t = table_builder [["X", [2.0, 1.5, Number.nan, Number.nan]], ["Y", [Nothing, 2.0, Nothing, 5.0]]]
|
||||
Test.specify "should allow to remove blank columns" <|
|
||||
r1 = t1.remove_columns (Column_Selector.Blank_Columns when_any=False)
|
||||
r1.columns.map .name . should_equal ["a", "b", "c", "d", "e"]
|
||||
r1.at "a" . to_vector . should_equal [1, Nothing, 3, 4]
|
||||
|
||||
r2 = t1.remove_columns (Column_Selector.Blank_Columns when_any=True)
|
||||
r2.columns.map .name . should_equal ["c"]
|
||||
r2.at "c" . to_vector . should_equal [10, 20, 30, 40]
|
||||
|
||||
t3 = table_builder [["X", [2.0, 1.5, Number.nan, Number.nan]], ["Y", [Nothing, 2.0, Nothing, 5.0]]]
|
||||
t4 =
|
||||
c = ["c", [10, 20, 40, 30]]
|
||||
g = ["g", [Number.nan, 1, 2, 3.4]]
|
||||
h = ["h", [Number.nan, Nothing, Number.nan, Nothing]]
|
||||
table_builder [c, g, h]
|
||||
if test_selection.is_nan_and_nothing_distinct then
|
||||
Test.specify "should not treat NaNs as blank by default" <|
|
||||
t1 = t.filter_blank_rows when_any=True
|
||||
r1 = t3.filter_blank_rows when_any=True
|
||||
# We cannot use `Vector.==` because `NaN != NaN`.
|
||||
t1.at "X" . to_vector . to_text . should_equal "[1.5, NaN]"
|
||||
t1.at "Y" . to_vector . should_equal [2.0, 5.0]
|
||||
r1.at "X" . to_vector . to_text . should_equal "[1.5, NaN]"
|
||||
r1.at "Y" . to_vector . should_equal [2.0, 5.0]
|
||||
|
||||
t2 = t.filter_blank_rows when_any=False
|
||||
t2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN, NaN]"
|
||||
t2.at "Y" . to_vector . should_equal [Nothing, 2.0, Nothing, 5.0]
|
||||
r2 = t3.filter_blank_rows when_any=False
|
||||
r2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN, NaN]"
|
||||
r2.at "Y" . to_vector . should_equal [Nothing, 2.0, Nothing, 5.0]
|
||||
|
||||
r3 = t4.remove_columns (Column_Selector.Blank_Columns when_any=False)
|
||||
r3.columns.map .name . should_equal ["c", "g", "h"]
|
||||
r3.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
|
||||
|
||||
r4 = t4.remove_columns (Column_Selector.Blank_Columns when_any=True)
|
||||
r4.columns.map .name . should_equal ["c", "g"]
|
||||
r4.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
|
||||
|
||||
Test.specify "should allow to treat NaNs as blank if asked" <|
|
||||
t1 = t.filter_blank_rows when_any=True treat_nans_as_blank=True
|
||||
r1 = t3.filter_blank_rows when_any=True treat_nans_as_blank=True
|
||||
# We cannot use `Vector.==` because `NaN != NaN`.
|
||||
t1.at "X" . to_vector . should_equal [1.5]
|
||||
t1.at "Y" . to_vector . should_equal [2.0]
|
||||
r1.at "X" . to_vector . should_equal [1.5]
|
||||
r1.at "Y" . to_vector . should_equal [2.0]
|
||||
|
||||
t2 = t.filter_blank_rows when_any=False treat_nans_as_blank=True
|
||||
t2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN]"
|
||||
t2.at "Y" . to_vector . should_equal [Nothing, 2.0, 5.0]
|
||||
r2 = t3.filter_blank_rows when_any=False treat_nans_as_blank=True
|
||||
r2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN]"
|
||||
r2.at "Y" . to_vector . should_equal [Nothing, 2.0, 5.0]
|
||||
|
||||
r3 = t4.remove_columns (Column_Selector.Blank_Columns when_any=False treat_nans_as_blank=True)
|
||||
r3.columns.map .name . should_equal ["c", "g"]
|
||||
r3.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
|
||||
|
||||
r4 = t4.remove_columns (Column_Selector.Blank_Columns when_any=True treat_nans_as_blank=True)
|
||||
r4.columns.map .name . should_equal ["c"]
|
||||
r4.at "c" . to_vector . should_equal [10, 20, 40, 30]
|
||||
|
||||
if test_selection.is_nan_and_nothing_distinct.not then
|
||||
Test.specify "this backend treats NaN as Nothing" <|
|
||||
t.at "X" . to_vector . should_equal [2.0, 1.5, Nothing, Nothing]
|
||||
t.at "X" . is_nan . to_vector . should_fail_with Unsupported_Database_Operation_Error_Data
|
||||
t3.at "X" . to_vector . should_equal [2.0, 1.5, Nothing, Nothing]
|
||||
t3.at "X" . is_nan . to_vector . should_fail_with Unsupported_Database_Operation_Error_Data
|
||||
|
||||
Test.specify "Blank_Columns selector should work for all kinds of methods accepting Column_Selector" <|
|
||||
t = table_builder [["X", [1, 2, 3, 4]], ["Y", [Nothing, "", Nothing, Nothing]], ["Z", [Nothing, True, False, Nothing]]]
|
||||
|
||||
t.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["Y"]
|
||||
t.select_columns (Column_Selector.Blank_Columns when_any=True) . columns . map .name . should_equal ["Y", "Z"]
|
||||
|
||||
t.reorder_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["Y", "X", "Z"]
|
||||
|
||||
r1 = t.aggregate [Count_Distinct Column_Selector.Blank_Columns]
|
||||
r1.columns . map .name . should_equal ["Count Distinct Y"]
|
||||
r1.at "Count Distinct Y" . to_vector . should_equal [2]
|
||||
r2 = t.aggregate [(Count_Distinct Column_Selector.Blank_Columns ignore_nothing=True)]
|
||||
r2.columns . map .name . should_equal ["Count Distinct Y"]
|
||||
r2.at "Count Distinct Y" . to_vector . should_equal [1]
|
||||
|
||||
Test.specify "Blank_Columns selector should deal with edge cases" <|
|
||||
t = table_builder [["X", [1, 2, 3, 4]]]
|
||||
no_cols = t.select_columns []
|
||||
no_rows = t.filter "X" (Filter_Condition.Equal to=0)
|
||||
no_cols.columns . should_equal []
|
||||
no_rows.row_count . should_equal 0
|
||||
no_rows.at "X" . to_vector . should_equal []
|
||||
|
||||
no_cols.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal []
|
||||
no_rows.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["X"]
|
||||
no_rows.remove_columns Column_Selector.Blank_Columns . columns . map .name . should_equal []
|
||||
|
@ -185,18 +185,6 @@ spec prefix connection pending=Nothing =
|
||||
col.count . should_equal 3
|
||||
col.count_missing . should_equal 2
|
||||
|
||||
Test.specify "drop_missing_columns should drop columns that contain at least one missing row in a Table" <|
|
||||
t5 = upload "T5" <|
|
||||
Table.new [["a", [1, 2, 3]], ["b", [True, False, Nothing]], ["c", ["foo", Nothing, "aaa"]]]
|
||||
|
||||
r = t5.drop_missing_columns
|
||||
r.columns.map .name . should_equal ["a"]
|
||||
r.at "a" . to_vector . should_equal [1, 2, 3]
|
||||
|
||||
empty = t4.drop_missing_columns
|
||||
empty.column_count . should_equal 0
|
||||
empty.read.column_count . should_equal 0
|
||||
|
||||
Test.group prefix+"Column-wide statistics" pending=pending <|
|
||||
Test.specify 'should allow computing basic column-wide stats' <|
|
||||
t7 = upload "T7" <|
|
||||
|
@ -21,7 +21,7 @@ type Fake_Test_Connection
|
||||
query self query alias="" = case query of
|
||||
Text -> self.query (SQL_Query.Table_Name query) alias
|
||||
SQL_Query.Raw_SQL _ ->
|
||||
Error.throw (Illegal_Argument_Error "Cannot query a fake connection with raw SQL")
|
||||
Error.throw (Illegal_Argument_Error_Data "Cannot query a fake connection with raw SQL")
|
||||
SQL_Query.Table_Name name ->
|
||||
columns = self.tables.get name
|
||||
Database_Table.make_table self name columns (Context.for_table name)
|
||||
|
@ -396,15 +396,22 @@ spec =
|
||||
t2.at "X" . to_vector . should_equal [1, 5]
|
||||
t2.at "Y" . to_vector . should_equal ['A', 0]
|
||||
|
||||
# TODO move these to Common_Table_Spec
|
||||
Test.specify "should drop columns that contain at least one missing row in a Table" <|
|
||||
a = ["a", [1, Nothing, 3, 4]]
|
||||
b = ["b", [1, 2, Nothing, "x"]]
|
||||
c = ["c", [10, 20, 30, 40]]
|
||||
t = Table.new [a, b, c]
|
||||
d = t.drop_missing_columns
|
||||
d.columns.map .name . should_equal ["c"]
|
||||
d.at "c" . to_vector . should_equal [10, 20, 30, 40]
|
||||
t3 = Table.new [["X", [1, 2, 3]], ["Y", ["", Nothing, Number.nan]]]
|
||||
t4 = t3.remove_columns (Column_Selector.Blank_Columns treat_nans_as_blank=True)
|
||||
t4.columns . map .name . should_equal ["X"]
|
||||
|
||||
Test.specify "Blank_Columns selector should work for all kinds of methods accepting Column_Selector" <|
|
||||
t = Table.new [["X", [1, 2, 3, 4, 5]], ["Y", ["", Nothing, Nothing, Number.nan, ""]]]
|
||||
r1 = t.distinct (Column_Selector.Blank_Columns treat_nans_as_blank=True)
|
||||
r1.at "Y" . to_vector . to_text . should_equal "['', Nothing, NaN]"
|
||||
r1.at "X" . to_vector . should_equal [1, 2, 4]
|
||||
|
||||
# TODO this could be moved to Common_Table_Spec once replace_text is implemented for Database too
|
||||
t2 = Table.new [["X", [1, 2, 3, 4, 5]], ["Y", ["", Nothing, Nothing, Nothing, ""]], ["Z", ["", "---", "-1", "", "foobar"]]]
|
||||
r2 = t2.replace_text (Column_Selector.Blank_Columns when_any=True) "-" "A"
|
||||
r2.at "X" . to_vector . should_equal [1, 2, 3, 4, 5]
|
||||
r2.at "Y" . to_vector . should_equal ["", Nothing, Nothing, Nothing, ""]
|
||||
r2.at "Z" . to_vector . should_equal ["", "AAA", "A1", "", "foobar"]
|
||||
|
||||
Test.group "Info" <|
|
||||
Test.specify "should return Table information" <|
|
||||
|
Loading…
Reference in New Issue
Block a user