mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 13:02:07 +03:00
Add drop down for replace, remove Column_Selector (#7295)
- Add dropdowns for `replace` functions. - Retire `Column_Selector` type. - Add `select_blank_columns` and `remove_blank_columns` functions to table types. - Allow Regex to be used to pick columns.
This commit is contained in:
parent
3273ab654d
commit
aaa235fbad
@ -520,6 +520,7 @@
|
||||
- [Improving date/time support in Table - added `date_diff`, `date_add`,
|
||||
`date_part` and some shorthands. Extended `Time_Period` with milli-, micro-
|
||||
and nanosecond periods.][7221]
|
||||
- [Retire `Column_Selector` and allow regex based selection of columns.][7295]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -749,6 +750,7 @@
|
||||
[7223]: https://github.com/enso-org/enso/pull/7223
|
||||
[7234]: https://github.com/enso-org/enso/pull/7234
|
||||
[7221]: https://github.com/enso-org/enso/pull/7221
|
||||
[7295]: https://github.com/enso-org/enso/pull/7295
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -60,13 +60,3 @@ type Case_Sensitivity
|
||||
to_explicit_sensitivity_in_memory self = case self of
|
||||
Case_Sensitivity.Default -> Case_Sensitivity.Sensitive
|
||||
_ -> self
|
||||
|
||||
## PRIVATE
|
||||
Create matcher function
|
||||
create_match_function : Boolean -> (Text -> Text -> Boolean)
|
||||
create_match_function self use_regex=False = case use_regex of
|
||||
True -> (name-> pattern-> Regex.compile pattern case_insensitive=self.is_case_insensitive_in_memory . matches name)
|
||||
False -> case self of
|
||||
Case_Sensitivity.Default -> (==)
|
||||
Case_Sensitivity.Sensitive -> (==)
|
||||
Case_Sensitivity.Insensitive locale -> (name-> criterion-> name.equals_ignore_case criterion locale)
|
||||
|
@ -35,7 +35,7 @@ from project.Data.Boolean import Boolean, False, True
|
||||
from project.Data.Json import Invalid_JSON, JS_Object, Json
|
||||
from project.Data.Numbers import Decimal, Integer, Number, Number_Parse_Error
|
||||
from project.Data.Range.Extensions import all
|
||||
from project.Widget_Helpers import make_date_format_selector, make_date_time_format_selector, make_delimiter_selector, make_time_format_selector
|
||||
from project.Widget_Helpers import make_date_format_selector, make_date_time_format_selector, make_delimiter_selector, make_regex_text_widget, make_time_format_selector
|
||||
|
||||
polyglot java import com.ibm.icu.lang.UCharacter
|
||||
polyglot java import com.ibm.icu.text.BreakIterator
|
||||
@ -477,8 +477,9 @@ Text.tokenize self pattern="." case_sensitivity=Case_Sensitivity.Sensitive =
|
||||
Regexp replace.
|
||||
|
||||
'<a href="url">content</a>'.replace '<a href="(.*?)">(.*?)</a>'.to_regex '$2 is at $1'== 'content is at url'
|
||||
@term make_regex_text_widget
|
||||
Text.replace : Text | Regex -> Text -> Case_Sensitivity -> Boolean -> Text ! Illegal_Argument
|
||||
Text.replace self term replacement case_sensitivity=Case_Sensitivity.Default only_first=False =
|
||||
Text.replace self term:(Text | Regex) replacement:Text (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default) only_first:Boolean=False =
|
||||
case term of
|
||||
_ : Text -> if term.is_empty then self else
|
||||
array_from_single_result result = case result of
|
||||
@ -496,11 +497,7 @@ Text.replace self term replacement case_sensitivity=Case_Sensitivity.Default onl
|
||||
Text_Utils.span_of_case_insensitive self term locale.java_locale False
|
||||
Text_Utils.replace_spans self spans_array replacement
|
||||
_ : Regex ->
|
||||
updated_regex = case case_sensitivity of
|
||||
Case_Sensitivity.Default -> term
|
||||
_ ->
|
||||
case_insensitive = case_sensitivity.is_case_insensitive_in_memory
|
||||
term.recompile case_insensitive
|
||||
updated_regex = term.recompile case_sensitivity
|
||||
updated_regex.replace self replacement only_first
|
||||
|
||||
## ALIAS Get Words
|
||||
|
@ -3,6 +3,7 @@ import project.Data.Filter_Condition.Filter_Condition
|
||||
import project.Data.Map.Map
|
||||
import project.Data.Numbers.Integer
|
||||
import project.Data.Range.Range
|
||||
import project.Data.Text.Case_Sensitivity.Case_Sensitivity
|
||||
import project.Data.Text.Helpers
|
||||
import project.Data.Text.Prim_Text_Helper
|
||||
import project.Data.Text.Regex.Internal.Match_Iterator.Match_Iterator
|
||||
@ -370,8 +371,11 @@ type Regex
|
||||
|
||||
Recompile the underlying regex string; used to change the
|
||||
case-sensitivity of a compiled Regex.
|
||||
recompile : Boolean | Nothing -> Regex ! Regex_Syntax_Error | Illegal_Argument
|
||||
recompile self case_insensitive=False =
|
||||
recompile : Case_Sensitivity -> Regex ! Regex_Syntax_Error | Illegal_Argument
|
||||
recompile self case_sensitivity:Case_Sensitivity = case case_sensitivity of
|
||||
Case_Sensitivity.Default -> self
|
||||
_ ->
|
||||
case_insensitive = case_sensitivity.is_case_insensitive_in_memory
|
||||
should_recompile = self.case_insensitive != case_insensitive
|
||||
if should_recompile.not then self else
|
||||
Regex.compile self.internal_regex_object.pattern case_insensitive
|
||||
|
@ -10,9 +10,9 @@ polyglot java import java.time.temporal.ChronoUnit
|
||||
polyglot java import java.time.temporal.TemporalAdjuster
|
||||
polyglot java import java.time.temporal.TemporalAdjusters
|
||||
polyglot java import java.time.temporal.TemporalUnit
|
||||
polyglot java import org.enso.base.Time_Utils
|
||||
polyglot java import org.enso.base.time.Date_Period_Utils
|
||||
polyglot java import org.enso.base.time.CustomTemporalUnits
|
||||
polyglot java import org.enso.base.time.Date_Period_Utils
|
||||
polyglot java import org.enso.base.Time_Utils
|
||||
|
||||
## Represents a unit of time longer on the scale of days (longer than a day).
|
||||
type Date_Period
|
||||
|
@ -5,8 +5,8 @@ from project.Data.Boolean import Boolean, False, True
|
||||
|
||||
polyglot java import java.time.temporal.ChronoUnit
|
||||
polyglot java import java.time.temporal.TemporalUnit
|
||||
polyglot java import org.enso.base.Time_Utils
|
||||
polyglot java import org.enso.base.time.CustomTemporalUnits
|
||||
polyglot java import org.enso.base.Time_Utils
|
||||
|
||||
## Represents a unit of time of a day or shorter.
|
||||
type Time_Period
|
||||
|
@ -5,6 +5,12 @@ import project.Data.Time.Time_Of_Day.Time_Of_Day
|
||||
import project.Metadata.Widget
|
||||
from project.Metadata import make_single_choice
|
||||
|
||||
## PRIVATE
|
||||
Creates a Regex / Text Widget for search and replace.
|
||||
make_regex_text_widget : Widget
|
||||
make_regex_text_widget =
|
||||
make_single_choice [["Text", '""'], ["Regular Expression", '(Regex.compile "^$")']]
|
||||
|
||||
## PRIVATE
|
||||
Creates a Single_Choice Widget for delimiters.
|
||||
make_delimiter_selector : Widget
|
||||
|
@ -1,8 +1,8 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
import Standard.Base.Internal.Rounding_Helpers
|
||||
from Standard.Base.Widget_Helpers import make_regex_text_widget
|
||||
|
||||
import Standard.Table.Data.Column.Column as Materialized_Column
|
||||
import Standard.Table.Data.Type.Enso_Types
|
||||
@ -1124,7 +1124,6 @@ type Column
|
||||
- case_sensitivity: Specifies if the text values should be compared case
|
||||
sensitively.
|
||||
- only_first: If True, only replace the first match.
|
||||
- use_regex: If true, the term is used as a regular expression.
|
||||
|
||||
> Example
|
||||
Replace dashes with underscores.
|
||||
@ -1140,7 +1139,8 @@ type Column
|
||||
Replace texts in quotes with parentheses.
|
||||
|
||||
column.replace '"(.*?)"'.to_regex '($1)'
|
||||
replace : Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
|
||||
@term make_regex_text_widget
|
||||
replace : Text | Regex | Column -> Text | Column -> Case_Sensitivity -> Boolean -> Column
|
||||
replace self term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False =
|
||||
_ = [term, new_text, case_sensitivity, only_first]
|
||||
msg = "`Column.replace` is not yet implemented."
|
||||
|
@ -3,7 +3,7 @@ import Standard.Base.Errors.Unimplemented.Unimplemented
|
||||
|
||||
import Standard.Table.Internal.Naming_Helpers.Naming_Helpers
|
||||
import Standard.Table.Internal.Problem_Builder.Problem_Builder
|
||||
from Standard.Table import Aggregate_Column, Column_Selector, Join_Kind, Value_Type
|
||||
from Standard.Table import Aggregate_Column, Join_Kind, Value_Type
|
||||
|
||||
import project.Connection.Connection.Connection
|
||||
import project.Data.SQL.Builder
|
||||
@ -255,6 +255,6 @@ default_fetch_primary_key connection table_name =
|
||||
rs = metadata.getPrimaryKeys Nothing Nothing table_name
|
||||
keys_table = result_set_to_table rs connection.dialect.make_column_fetcher_for_type
|
||||
# The names of the columns are sometimes lowercase and sometimes uppercase, so we do a case insensitive select first.
|
||||
selected = keys_table.select_columns [Column_Selector.By_Name "COLUMN_NAME", Column_Selector.By_Name "KEY_SEQ"] reorder=True
|
||||
selected = keys_table.select_columns ["COLUMN_NAME", "KEY_SEQ"] case_sensitivity=Case_Sensitivity.Insensitive reorder=True
|
||||
key_column_names = selected.order_by 1 . at 0 . to_vector
|
||||
if key_column_names.is_empty then Nothing else key_column_names
|
||||
|
@ -28,7 +28,7 @@ import Standard.Table.Internal.Table_Helpers
|
||||
import Standard.Table.Internal.Table_Helpers.Table_Column_Helper
|
||||
import Standard.Table.Internal.Unique_Name_Strategy.Unique_Name_Strategy
|
||||
import Standard.Table.Internal.Widget_Helpers
|
||||
from Standard.Table import Aggregate_Column, Auto, Column_Selector, Data_Formatter, Match_Columns, Position, Set_Mode, Sort_Column, Value_Type
|
||||
from Standard.Table import Aggregate_Column, Auto, Data_Formatter, Match_Columns, Position, Set_Mode, Sort_Column, Value_Type
|
||||
from Standard.Table.Data.Column import get_item_string, normalize_string_for_display
|
||||
from Standard.Table.Data.Table import print_table
|
||||
from Standard.Table.Errors import all
|
||||
@ -147,8 +147,10 @@ type Table
|
||||
dropped from the output.
|
||||
|
||||
Arguments:
|
||||
- columns: Column selection criteria - a single instance or Vector of
|
||||
names, indexes or `Column_Selector`.
|
||||
- columns: Specifies columns by a single instance or Vector of names,
|
||||
indexes or regular expressions to match names.
|
||||
- case_sensitivity: Controls whether to be case sensitive when matching
|
||||
column names.
|
||||
- reorder: By default, or if set to `False`, columns in the output will
|
||||
be in the same order as in the input table. If `True`, the order in the
|
||||
output table will match the order in the columns list. If a column is
|
||||
@ -182,16 +184,16 @@ type Table
|
||||
> Example
|
||||
Select columns matching a regular expression.
|
||||
|
||||
table.select_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
|
||||
table.select_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive
|
||||
|
||||
> Example
|
||||
Select the first two columns and the last column, moving the last one to front.
|
||||
|
||||
table.select_columns [-1, 0, 1] reorder=True
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
select_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
|
||||
select_columns self (columns = [self.columns.first.name]) (reorder = False) (error_on_missing_columns = True) (on_problems = Report_Warning) =
|
||||
new_columns = self.columns_helper.select_columns selectors=columns reorder=reorder error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
select_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
|
||||
select_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (reorder:Boolean=False) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) =
|
||||
new_columns = self.columns_helper.select_columns columns case_sensitivity reorder error_on_missing_columns on_problems
|
||||
self.updated_columns new_columns
|
||||
|
||||
## ALIAS drop_columns
|
||||
@ -201,8 +203,10 @@ type Table
|
||||
input.
|
||||
|
||||
Arguments:
|
||||
- columns: Column selection criteria - a single instance or Vector of
|
||||
names, indexes or `Column_Selector`, which are to be removed.
|
||||
- columns: Specifies columns by a name, index or regular expression to
|
||||
match names, or a Vector of these.
|
||||
- case_sensitivity: Controls whether to be case sensitive when matching
|
||||
column names.
|
||||
- error_on_missing_columns: Specifies if a missing input column should
|
||||
result in an error regardless of the `on_problems` settings. Defaults
|
||||
to `False`.
|
||||
@ -232,27 +236,80 @@ type Table
|
||||
> Example
|
||||
Remove columns matching a regular expression.
|
||||
|
||||
table.remove_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
|
||||
table.remove_columns "foo.+".to_regex Case_Sensitivity.Insensitive
|
||||
|
||||
> Example
|
||||
Remove the first two columns and the last column.
|
||||
|
||||
table.remove_columns [-1, 0, 1]
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
remove_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
|
||||
remove_columns self (columns = [self.columns.first.name]) (error_on_missing_columns = False) (on_problems = Report_Warning) =
|
||||
new_columns = self.columns_helper.remove_columns selectors=columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
remove_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
|
||||
remove_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) =
|
||||
new_columns = self.columns_helper.remove_columns columns case_sensitivity error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
self.updated_columns new_columns
|
||||
|
||||
## ALIAS select_na
|
||||
ALIAS select_missing_columns
|
||||
|
||||
Select columns which are either all blank or contain blank values. If no
|
||||
rows are present, all columns are considered blank.
|
||||
|
||||
Arguments:
|
||||
- when_any: By default, only columns consisting of all blank cells are
|
||||
selected. If set to `True`, columns with one or more blank values are
|
||||
selected.
|
||||
- treat_nans_as_blank: specified whether `Number.nan` is considered as
|
||||
blank. By default, it is not.
|
||||
|
||||
? Blank values
|
||||
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
|
||||
|
||||
> Example
|
||||
Select completely blank columns from a table.
|
||||
|
||||
table.select_blank_columns
|
||||
select_blank_columns : Boolean -> Boolean -> Table
|
||||
select_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) =
|
||||
new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank
|
||||
if new_columns.length == 0 then Error.throw (No_Output_Columns) else
|
||||
self.updated_columns new_columns
|
||||
|
||||
## ALIAS drop_na
|
||||
ALIAS drop_missing_columns
|
||||
|
||||
Remove columns which are either all blank or contain blank values. If no
|
||||
rows are present, all columns are considered blank.
|
||||
|
||||
Arguments:
|
||||
- when_any: By default, only columns consisting of all blank cells are
|
||||
selected. If set to `True`, columns with one or more blank values are
|
||||
selected.
|
||||
- treat_nans_as_blank: specified whether `Number.nan` is considered as
|
||||
blank. By default, it is not.
|
||||
|
||||
? Blank values
|
||||
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
|
||||
|
||||
> Example
|
||||
Remove completely blank columns from a table.
|
||||
|
||||
table.remove_blank_columns
|
||||
remove_blank_columns : Boolean -> Boolean -> Table
|
||||
remove_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) =
|
||||
new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank invert_selection=True
|
||||
if new_columns.length == 0 then Error.throw (No_Output_Columns) else
|
||||
self.updated_columns new_columns
|
||||
|
||||
## Returns a new table with the specified selection of columns moved to
|
||||
either the start or the end in the specified order.
|
||||
|
||||
Arguments:
|
||||
- columns: Column selection criteria - a single instance or Vector of
|
||||
names, indexes or `Column_Selector`, which should be reordered and
|
||||
specifying their order.
|
||||
- columns: Specifies columns by a name, index or regular expression to
|
||||
match names, or a Vector of these.
|
||||
- position: Specifies how to place the selected columns in relation to
|
||||
the remaining columns which were not matched by `columns` (if any).
|
||||
- case_sensitivity: Controls whether to be case sensitive when matching
|
||||
column names.
|
||||
- error_on_missing_columns: Specifies if a missing input column should
|
||||
result in an error regardless of the `on_problems` settings. Defaults
|
||||
to `False`.
|
||||
@ -279,7 +336,7 @@ type Table
|
||||
> Example
|
||||
Move columns matching a regular expression to front, keeping columns matching "foo.+" before columns matching "b.*".
|
||||
|
||||
table.reorder_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
|
||||
table.reorder_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive
|
||||
|
||||
> Example
|
||||
Swap the first two columns.
|
||||
@ -291,9 +348,9 @@ type Table
|
||||
|
||||
table.reorder_columns [0] position=Position.After_Other_Columns
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
reorder_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Position -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns
|
||||
reorder_columns self (columns = [self.columns.first.name]) (position = Position.Before_Other_Columns) (error_on_missing_columns = False) (on_problems = Report_Warning) =
|
||||
new_columns = self.columns_helper.reorder_columns selectors=columns position=position error_on_missing_columns on_problems=on_problems
|
||||
reorder_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Position -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns
|
||||
reorder_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (position:Position=Position.Before_Other_Columns) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) =
|
||||
new_columns = self.columns_helper.reorder_columns columns position case_sensitivity error_on_missing_columns on_problems
|
||||
self.updated_columns new_columns
|
||||
|
||||
## Returns a new table with the columns sorted by name according to the
|
||||
@ -328,7 +385,10 @@ type Table
|
||||
|
||||
Arguments:
|
||||
- column_map: Mapping from old column names to new or a vector of new
|
||||
column names to apply by position.
|
||||
column names to apply by position. `Regex` objects can be used
|
||||
within the mapping to do pattern based renaming.
|
||||
- case_sensitivity: Controls whether to be case sensitive when matching
|
||||
column names.
|
||||
- error_on_missing_columns: Specifies if a missing input column should
|
||||
result in an error regardless of the `on_problems` settings. Defaults
|
||||
to `True`.
|
||||
@ -379,12 +439,11 @@ type Table
|
||||
> Example
|
||||
For all columns starting with the prefix `name=`, replace it with `key:`.
|
||||
|
||||
by_name = Column_Selector.By_Name "name=(.*)" Case_Sensitivity.Sensitive use_regex=True
|
||||
table.rename_columns (Map.from_vector [[by_name, "key:$1"]])
|
||||
table.rename_columns (Map.from_vector [["name=(.*)".to_regex, "key:$1"]])
|
||||
@column_map Widget_Helpers.make_rename_name_vector_selector
|
||||
rename_columns : Map (Text | Integer | Column_Selector) Text | Vector Text | Vector Vector -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names
|
||||
rename_columns self column_map=["Column"] (error_on_missing_columns=True) (on_problems=Report_Warning) =
|
||||
new_names = Table_Helpers.rename_columns internal_columns=self.internal_columns mapping=column_map error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
rename_columns : Map (Text | Integer | Regex) Text | Vector Text | Vector Vector -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names
|
||||
rename_columns self (column_map:(Map | Vector)=["Column"]) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) =
|
||||
new_names = Table_Helpers.rename_columns self.internal_columns column_map case_sensitivity error_on_missing_columns on_problems
|
||||
Warning.with_suspended new_names names->
|
||||
self.updated_columns (self.internal_columns.map c-> c.rename (names.at c.name))
|
||||
|
||||
@ -566,10 +625,10 @@ type Table
|
||||
problem is reported.
|
||||
@group_by Widget_Helpers.make_column_name_vector_selector
|
||||
@order_by Widget_Helpers.make_order_by_selector
|
||||
add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Column_Selector) | Text | Integer -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
|
||||
add_row_number self (name:Text = "Row") (from:Integer = 1) (step:Integer = 1) group_by=[] order_by=[] on_problems=Problem_Behavior.Report_Warning =
|
||||
add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
|
||||
add_row_number self (name:Text="Row") (from:Integer=1) (step:Integer=1) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=Problem_Behavior.Report_Warning) =
|
||||
problem_builder = Problem_Builder.new error_on_missing_columns=True
|
||||
grouping_columns = self.columns_helper.select_columns_helper group_by True problem_builder
|
||||
grouping_columns = self.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder
|
||||
grouping_columns.each internal_column->
|
||||
column = self.make_column internal_column
|
||||
if column.value_type.is_floating_point then
|
||||
@ -944,9 +1003,9 @@ type Table
|
||||
`Floating_Point_Equality` is reported according to the `on_problems`
|
||||
setting.
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
distinct : Vector (Integer | Text | Column_Selector) | Text | Integer -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
|
||||
distinct : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
|
||||
distinct self columns=self.column_names case_sensitivity=Case_Sensitivity.Default error_on_missing_columns=True on_problems=Report_Warning =
|
||||
key_columns = self.columns_helper.select_columns selectors=columns reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _->
|
||||
key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _->
|
||||
Error.throw No_Input_Columns_Selected
|
||||
problem_builder = Problem_Builder.new
|
||||
new_table = self.connection.dialect.prepare_distinct self key_columns case_sensitivity problem_builder
|
||||
@ -1494,7 +1553,7 @@ type Table
|
||||
B | Name | Another
|
||||
B | Country | Germany
|
||||
@key_columns Widget_Helpers.make_column_name_vector_selector
|
||||
transpose : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names
|
||||
transpose : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names
|
||||
transpose self key_columns=[] (attribute_column_name="Name") (value_column_name="Value") (error_on_missing_columns=True) (on_problems = Report_Warning) =
|
||||
## Avoid unused arguments warning. We cannot rename arguments to `_`,
|
||||
because we need to keep the API consistent with the in-memory table.
|
||||
@ -1552,7 +1611,7 @@ type Table
|
||||
@group_by Widget_Helpers.make_column_name_vector_selector
|
||||
@name_column Widget_Helpers.make_column_name_selector
|
||||
@values (Widget_Helpers.make_aggregate_column_selector include_group_by=False)
|
||||
cross_tab : Vector (Integer | Text | Column_Selector | Aggregate_Column) | Text | Integer -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings
|
||||
cross_tab : Vector (Integer | Text | Regex | Aggregate_Column) | Text | Integer | Regex -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings
|
||||
cross_tab self group_by name_column values=Aggregate_Column.Count (on_problems=Report_Warning) =
|
||||
## Avoid unused arguments warning. We cannot rename arguments to `_`,
|
||||
because we need to keep the API consistent with the in-memory table.
|
||||
@ -1610,9 +1669,9 @@ type Table
|
||||
table.parse "birthday" Value_Type.Date
|
||||
@type (Widget_Helpers.parse_type_selector include_auto=False)
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
parse : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table
|
||||
parse : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table
|
||||
parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type format=Nothing error_on_missing_columns=True on_problems=Report_Warning =
|
||||
selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False . map self.make_column
|
||||
selected.fold self table-> column_to_parse->
|
||||
new_column = column_to_parse.parse type format on_problems
|
||||
table.set new_column new_name=column_to_parse.name set_mode=Set_Mode.Update
|
||||
@ -1779,9 +1838,9 @@ type Table
|
||||
types. Due to this, a Mixed column containing values `[2, "3"]` will
|
||||
actually be converted into `[2, Nothing]` when casting to Integer type.
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
cast : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
|
||||
cast : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
|
||||
cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
|
||||
selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False . map self.make_column
|
||||
selected.fold self table-> column_to_cast->
|
||||
new_column = column_to_cast.cast value_type on_problems
|
||||
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
|
||||
@ -2022,7 +2081,8 @@ type Table
|
||||
been replaced with the provided default(s).
|
||||
|
||||
Arguments:
|
||||
- columns: The column(s) to fill missing values of.
|
||||
- columns: Specifies columns by a name, index or regular expression to
|
||||
match names, or a Vector of these.
|
||||
- default: The value to replace missing values with. If this argument
|
||||
is a column, the value from `default` at the corresponding position
|
||||
will be used.
|
||||
@ -2032,7 +2092,7 @@ type Table
|
||||
|
||||
fill_nothing = table.fill_nothing ["col0", "col1"] 20.5
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
fill_nothing : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table
|
||||
fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table
|
||||
fill_nothing self columns default =
|
||||
transformer col = col.fill_nothing default
|
||||
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
|
||||
@ -2040,10 +2100,11 @@ type Table
|
||||
## ALIAS Fill Empty, if_empty
|
||||
|
||||
Returns a new column where empty Text values have been replaced with the
|
||||
provided default(s).
|
||||
provided default.
|
||||
|
||||
Arguments:
|
||||
- columns: The column(s) to fill empty values.
|
||||
- columns: Specifies columns by a name, index or regular expression to
|
||||
match names, or a Vector of these.
|
||||
- default: The value to replace empty values with. If this argument
|
||||
is a column, the value from `default` at the corresponding position
|
||||
will be used.
|
||||
@ -2053,7 +2114,7 @@ type Table
|
||||
|
||||
fill_empty = table.fill_empty ["col0", "col1"] "hello"
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
fill_empty : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table
|
||||
fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table
|
||||
fill_empty self columns default =
|
||||
transformer col = col.fill_empty default
|
||||
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
|
||||
@ -2062,11 +2123,11 @@ type Table
|
||||
row of the specified column. If `term` is empty, the function returns the
|
||||
table unchanged.
|
||||
|
||||
This method follows the exact replacement semantics of the
|
||||
`Text.replace` method.
|
||||
This method follows the exact replacement semantics of `Text.replace`.
|
||||
|
||||
Arguments:
|
||||
- columns: The column(s) to replace values on.
|
||||
- columns: Specifies columns by a name, index or regular expression to
|
||||
match names, or a Vector of these.
|
||||
- term: The term to find. Can be `Text`, `Regex`, or a `Column` of
|
||||
strings.
|
||||
- replacement: The text to replace matches with.
|
||||
@ -2077,19 +2138,19 @@ type Table
|
||||
> Example
|
||||
Replace dashes with underscores.
|
||||
|
||||
table.replace "-" "_"
|
||||
table.replace ["col0", "col1"] "-" "_"
|
||||
|
||||
> Example
|
||||
Remove leading and trailing spaces from cells.
|
||||
|
||||
column.replace "^\s*(.*?)\s*$".to_regex "$1"
|
||||
table.replace ["col.*".to_regex] "^\s*(.*?)\s*$".to_regex "$1"
|
||||
|
||||
> Example
|
||||
Replace texts in quotes with parentheses.
|
||||
|
||||
column.replace '"(.*?)"'.to_regex '($1)'
|
||||
column.replace ["col0"] '"(.*?)"'.to_regex '($1)'
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
replace : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
|
||||
replace : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
|
||||
replace self columns term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False =
|
||||
_ = [columns, term, new_text, case_sensitivity, only_first]
|
||||
Error.throw (Unsupported_Database_Operation.Error "Text replace is currently not supported in the database backend.")
|
||||
@ -2101,7 +2162,8 @@ type Table
|
||||
Arguments:
|
||||
- connection: The connection to a database.
|
||||
- table_name: The name of the table to get.
|
||||
- columns: List of columns to fetch. Each column is represented by a pair of column name and its expected SQL Type.
|
||||
- columns: List of columns to fetch. Each column is represented by a pair of
|
||||
column name and its expected SQL Type.
|
||||
- ctx: The context to use for the table.
|
||||
make_table : Connection -> Text -> Vector -> Context -> Table
|
||||
make_table connection table_name columns ctx =
|
||||
|
@ -2,7 +2,6 @@ from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
import Standard.Table.Internal.Widget_Helpers
|
||||
from Standard.Table import Column_Selector
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
import project.Connection.Connection.Connection
|
||||
|
@ -3,7 +3,6 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
import Standard.Table.Data.Table.Table
|
||||
import Standard.Table.Internal.Widget_Helpers
|
||||
from Standard.Table import Column_Selector
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
import project.Connection.Connection.Connection
|
||||
|
@ -268,8 +268,8 @@ make_is_in_column arguments = case arguments.length of
|
||||
_ -> Error.throw <| Illegal_State.Error ("The operation IS_IN_COLUMN requires at exactly 3 arguments: the expression, the IN subquery, the subquery checking for nulls.")
|
||||
|
||||
## PRIVATE
|
||||
make_row_number : Vector Builder -> Builder
|
||||
make_row_number arguments (metadata : Row_Number_Metadata) = if arguments.length < 3 then Error.throw (Illegal_State.Error "Wrong amount of parameters in ROW_NUMBER IR. This is a bug in the Database library.") else
|
||||
make_row_number : Vector Builder -> Row_Number_Metadata -> Builder
|
||||
make_row_number (arguments : Vector) (metadata : Row_Number_Metadata) = if arguments.length < 3 then Error.throw (Illegal_State.Error "Wrong amount of parameters in ROW_NUMBER IR. This is a bug in the Database library.") else
|
||||
offset = arguments.at 0
|
||||
step = arguments.at 1
|
||||
|
||||
|
@ -6,7 +6,7 @@ import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
import Standard.Base.Runtime.Context
|
||||
|
||||
import Standard.Table.Data.Table.Table as In_Memory_Table
|
||||
from Standard.Table import Aggregate_Column, Column_Selector, Join_Kind, Value_Type
|
||||
from Standard.Table import Aggregate_Column, Join_Kind, Value_Type
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
import project.Connection.Connection.Connection
|
||||
|
@ -29,7 +29,7 @@
|
||||
import Standard.Examples
|
||||
|
||||
example_drop_missing_cols =
|
||||
Examples.inventory_table.remove (Column_Selector.Blank_Columns when_any=True)
|
||||
Examples.inventory_table.remove_blank_columns when_any=True
|
||||
|
||||
> Example
|
||||
Fill missing values in a column with the value 20.5.
|
||||
|
@ -1,7 +1,6 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Column_Selector.Column_Selector
|
||||
import project.Data.Sort_Column.Sort_Column
|
||||
|
||||
## Defines an Aggregate Column
|
||||
@ -32,7 +31,7 @@ type Aggregate_Column
|
||||
multiple selection.
|
||||
- new_name: name of new column.
|
||||
- ignore_nothing: if all values are Nothing won't be included.
|
||||
Count_Distinct (columns:(Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector | Column))=0) (new_name:Text="") (ignore_nothing:Boolean=False) # Column needed because of 6866
|
||||
Count_Distinct (columns:(Text | Integer | Regex | Vector (Integer | Text | Regex | Column))=0) (new_name:Text="") (ignore_nothing:Boolean=False) # Column needed because of 6866
|
||||
|
||||
## ALIAS Count_Not_Null
|
||||
|
||||
|
@ -5,6 +5,7 @@ import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
import Standard.Base.Internal.Rounding_Helpers
|
||||
from Standard.Base.Widget_Helpers import make_regex_text_widget
|
||||
|
||||
import project.Data.Data_Formatter.Data_Formatter
|
||||
import project.Data.Table.Table
|
||||
@ -1212,7 +1213,8 @@ type Column
|
||||
Replace texts in quotes with parentheses.
|
||||
|
||||
column.replace '"(.*?)"'.to_regex '($1)'
|
||||
replace : Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
|
||||
@term make_regex_text_widget
|
||||
replace : Text | Regex | Column -> Text | Column -> Case_Sensitivity -> Boolean -> Column
|
||||
replace self term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False =
|
||||
Value_Type.expect_text self <|
|
||||
term_fn = wrap_text_or_regex_argument_as_value_provider term
|
||||
|
@ -1,32 +0,0 @@
|
||||
from Standard.Base import all
|
||||
|
||||
## Specifies a selection of columns from the table on which an operation is
|
||||
going to be performed.
|
||||
type Column_Selector
|
||||
## Selects columns based on their names.
|
||||
|
||||
It can do regex-based and case insensitive matching if requested.
|
||||
It is possible for it to match multiple columns, in which case all the
|
||||
matched ones will be included in the same relative order as in the table.
|
||||
By_Name name:Text case_sensitivity:Case_Sensitivity=Case_Sensitivity.Insensitive use_regex:Boolean=False
|
||||
|
||||
## ALIAS dropna
|
||||
ALIAS drop_missing_columns
|
||||
Select columns which are either all blank or contain blank values.
|
||||
|
||||
Arguments:
|
||||
- when_any: By default, only columns consisting of all blank cells are
|
||||
selected. If set to `True`, columns containing at least one blank value
|
||||
will be selected too. If there are no rows, the column is treated as
|
||||
blank regardless of this argument.
|
||||
- treat_nans_as_blank: If `True`, then `Number.nan` is considered as
|
||||
blank.
|
||||
|
||||
? Blank values
|
||||
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
|
||||
|
||||
> Example
|
||||
Remove completely blank columns from a table.
|
||||
|
||||
table.remove_columns Column_Selector.Blank_Columns
|
||||
Blank_Columns when_any:Boolean=False treat_nans_as_blank:Boolean=False
|
@ -15,7 +15,6 @@ from Standard.Base.Widget_Helpers import make_delimiter_selector
|
||||
import project.Data.Aggregate_Column.Aggregate_Column
|
||||
import project.Data.Column as Column_Module
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Column_Selector.Column_Selector
|
||||
import project.Data.Data_Formatter.Data_Formatter
|
||||
import project.Data.Expression.Expression
|
||||
import project.Data.Expression.Expression_Error
|
||||
@ -267,8 +266,10 @@ type Table
|
||||
dropped from the output.
|
||||
|
||||
Arguments:
|
||||
- columns: Column selection criteria - a single instance or Vector of
|
||||
names, indexes or `Column_Selector`.
|
||||
- columns: Specifies columns by a name, index or regular expression to
|
||||
match names, or a Vector of these.
|
||||
- case_sensitivity: Controls whether to be case sensitive when matching
|
||||
column names.
|
||||
- reorder: By default, or if set to `False`, columns in the output will
|
||||
be in the same order as in the input table. If `True`, the order in the
|
||||
output table will match the order in the columns list. If a column is
|
||||
@ -303,27 +304,29 @@ type Table
|
||||
> Example
|
||||
Select columns matching a regular expression.
|
||||
|
||||
table.select_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
|
||||
table.select_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive
|
||||
|
||||
> Example
|
||||
Select the first two columns and the last column, moving the last one to front.
|
||||
|
||||
table.select_columns [-1, 0, 1] reorder=True
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
select_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
|
||||
select_columns self columns=[self.columns.first.name] (reorder = False) (error_on_missing_columns = True) (on_problems = Report_Warning) =
|
||||
new_columns = self.columns_helper.select_columns selectors=columns reorder=reorder error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
select_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
|
||||
select_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (reorder:Boolean=False) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) =
|
||||
new_columns = self.columns_helper.select_columns columns case_sensitivity reorder error_on_missing_columns on_problems
|
||||
Table.new new_columns
|
||||
|
||||
## ALIAS drop_columns
|
||||
Returns a new table with the chosen set of columns, as specified by thez
|
||||
Returns a new table with the chosen set of columns, as specified by the
|
||||
`columns`, removed from the input table. Any unmatched input columns will
|
||||
be kept in the output. Columns are returned in the same order as in the
|
||||
input.
|
||||
|
||||
Arguments:
|
||||
- columns: Column selection criteria - a single instance or Vector of
|
||||
names, indexes or `Column_Selector`, which are to be removed.
|
||||
- columns: Specifies columns by a name, index or regular expression to
|
||||
match names, or a Vector of these.
|
||||
- case_sensitivity: Controls whether to be case sensitive when matching
|
||||
column names.
|
||||
- error_on_missing_columns: Specifies if a missing input column should
|
||||
result in an error regardless of the `on_problems` settings. Defaults
|
||||
to `False`.
|
||||
@ -353,27 +356,80 @@ type Table
|
||||
> Example
|
||||
Remove columns matching a regular expression.
|
||||
|
||||
table.remove_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
|
||||
table.remove_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive
|
||||
|
||||
> Example
|
||||
Remove the first two columns and the last column.
|
||||
|
||||
table.remove_columns [-1, 0, 1]
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
remove_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
|
||||
remove_columns self (columns=[self.columns.first.name]) (error_on_missing_columns = False) (on_problems = Report_Warning) =
|
||||
new_columns = self.columns_helper.remove_columns selectors=columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
remove_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
|
||||
remove_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) =
|
||||
new_columns = self.columns_helper.remove_columns columns case_sensitivity error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
Table.new new_columns
|
||||
|
||||
## ALIAS select_na
|
||||
ALIAS select_missing_columns
|
||||
|
||||
Select columns which are either all blank or contain blank values. If no
|
||||
rows are present, all columns are considered blank.
|
||||
|
||||
Arguments:
|
||||
- when_any: By default, only columns consisting of all blank cells are
|
||||
selected. If set to `True`, columns with one or more blank values are
|
||||
selected.
|
||||
- treat_nans_as_blank: specifies whether `Number.nan` is considered as
|
||||
blank. By default, it is not.
|
||||
|
||||
? Blank values
|
||||
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
|
||||
|
||||
> Example
|
||||
Select completely blank columns from a table.
|
||||
|
||||
table.select_blank_columns
|
||||
select_blank_columns : Boolean -> Boolean -> Table ! No_Output_Columns
|
||||
select_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) =
|
||||
new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank
|
||||
if new_columns.length == 0 then Error.throw (No_Output_Columns) else
|
||||
Table.new new_columns
|
||||
|
||||
## ALIAS drop_na
|
||||
ALIAS drop_missing_columns
|
||||
|
||||
Remove columns which are either all blank or contain blank values. If no
|
||||
rows are present, all columns are considered blank.
|
||||
|
||||
Arguments:
|
||||
- when_any: By default, only columns consisting of all blank cells are
|
||||
selected. If set to `True`, columns with one or more blank values are
|
||||
selected.
|
||||
- treat_nans_as_blank: specified whether `Number.nan` is considered as
|
||||
blank. By default, it is not.
|
||||
|
||||
? Blank values
|
||||
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
|
||||
|
||||
> Example
|
||||
Remove completely blank columns from a table.
|
||||
|
||||
table.remove_blank_columns
|
||||
remove_blank_columns : Boolean -> Boolean -> Table ! No_Output_Columns
|
||||
remove_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) =
|
||||
new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank invert_selection=True
|
||||
if new_columns.length == 0 then Error.throw (No_Output_Columns) else
|
||||
Table.new new_columns
|
||||
|
||||
## Returns a new table with the specified selection of columns moved to
|
||||
either the start or the end in the specified order.
|
||||
|
||||
Arguments:
|
||||
- columns: Column selection criteria - a single instance or Vector of
|
||||
names, indexes or `Column_Selector`, which should be reordered and
|
||||
specifying their order.
|
||||
- columns: Specifies columns by a name, index or regular expression to
|
||||
match names, or a Vector of these.
|
||||
- position: Specifies how to place the selected columns in relation to
|
||||
the remaining columns which were not matched by `columns` (if any).
|
||||
- case_sensitivity: Controls whether to be case sensitive when matching
|
||||
column names.
|
||||
- error_on_missing_columns: Specifies if a missing input column should
|
||||
result in an error regardless of the `on_problems` settings. Defaults
|
||||
to `False`.
|
||||
@ -400,7 +456,7 @@ type Table
|
||||
> Example
|
||||
Move columns matching a regular expression to front, keeping columns matching "foo.+" before columns matching "b.*".
|
||||
|
||||
table.reorder_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
|
||||
table.reorder_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive
|
||||
|
||||
> Example
|
||||
Swap the first two columns.
|
||||
@ -412,9 +468,9 @@ type Table
|
||||
|
||||
table.reorder_columns [0] position=Position.After_Other_Columns
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
reorder_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Position -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns
|
||||
reorder_columns self (columns = [self.columns.first.name]) (position = Position.Before_Other_Columns) (error_on_missing_columns = False) (on_problems = Report_Warning) =
|
||||
new_columns = self.columns_helper.reorder_columns selectors=columns position=position error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
reorder_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Position -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns
|
||||
reorder_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (position:Position=Position.Before_Other_Columns) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) =
|
||||
new_columns = self.columns_helper.reorder_columns columns position case_sensitivity error_on_missing_columns on_problems
|
||||
Table.new new_columns
|
||||
|
||||
## Returns a new table with the columns sorted by name according to the
|
||||
@ -441,7 +497,7 @@ type Table
|
||||
table.reorder_columns Sort_Direction.Descending
|
||||
sort_columns : Sort_Direction -> Text_Ordering -> Table
|
||||
sort_columns self order=Sort_Direction.Ascending text_ordering=Text_Ordering.Default =
|
||||
new_columns = Table_Helpers.sort_columns internal_columns=self.columns order text_ordering
|
||||
new_columns = Table_Helpers.sort_columns self.columns order text_ordering
|
||||
Table.new new_columns
|
||||
|
||||
## Returns a new table with the columns renamed based on either a mapping
|
||||
@ -449,7 +505,10 @@ type Table
|
||||
|
||||
Arguments:
|
||||
- column_map: Mapping from old column names to new or a vector of new
|
||||
column names to apply by position.
|
||||
column names to apply by position. `Regex` objects can be used
|
||||
within the mapping to do pattern based renaming.
|
||||
- case_sensitivity: Controls whether to be case sensitive when matching
|
||||
column names.
|
||||
- error_on_missing_columns: Specifies if a missing input column should
|
||||
result in an error regardless of the `on_problems` settings. Defaults
|
||||
to `True`.
|
||||
@ -500,12 +559,11 @@ type Table
|
||||
> Example
|
||||
For all columns starting with the prefix `name=`, replace it with `key:`.
|
||||
|
||||
by_name = Column_Selector.By_Name "name=(.*)" Case_Sensitivity.Sensitive use_regex=True
|
||||
table.rename_columns (Map.from_vector [[by_name, "key:$1"]])
|
||||
table.rename_columns (Map.from_vector [["name=(.*)".to_regex, "key:$1"]])
|
||||
@column_map Widget_Helpers.make_rename_name_vector_selector
|
||||
rename_columns : Map (Text | Integer | Column_Selector) Text | Vector Text | Vector Vector -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names
|
||||
rename_columns self column_map=["Column"] (error_on_missing_columns=True) (on_problems=Report_Warning) =
|
||||
new_names = Table_Helpers.rename_columns internal_columns=self.columns mapping=column_map error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
rename_columns : Map (Text | Integer | Regex) Text | Vector Text | Vector Vector -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names
|
||||
rename_columns self (column_map:(Map | Vector)=["Column"]) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) =
|
||||
new_names = Table_Helpers.rename_columns self.columns column_map case_sensitivity error_on_missing_columns on_problems
|
||||
Warning.with_suspended new_names names->
|
||||
Table.new (self.columns.map c-> c.rename (names.at c.name))
|
||||
|
||||
@ -673,10 +731,8 @@ type Table
|
||||
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns types_to_always_throw=[No_Input_Columns_Selected]
|
||||
columns_for_ordering = Table_Helpers.prepare_order_by self.columns columns problem_builder
|
||||
problem_builder.attach_problems_before on_problems <|
|
||||
java_columns = columns_for_ordering.map c->
|
||||
c.column.java_column
|
||||
directions = columns_for_ordering.map c->
|
||||
c.associated_selector.direction.to_sign
|
||||
java_columns = columns_for_ordering.map c->c.column.java_column
|
||||
directions = columns_for_ordering.map c->c.associated_selector.direction.to_sign
|
||||
|
||||
comparator = case text_ordering.sort_digits_as_numbers of
|
||||
True ->
|
||||
@ -727,9 +783,9 @@ type Table
|
||||
`Floating_Point_Equality` is reported according to the `on_problems`
|
||||
setting.
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
distinct : Vector (Integer | Text | Column_Selector) | Text | Integer -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
|
||||
distinct : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
|
||||
distinct self (columns = self.column_names) case_sensitivity=Case_Sensitivity.Default error_on_missing_columns=True on_problems=Report_Warning =
|
||||
key_columns = self.columns_helper.select_columns selectors=columns reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _->
|
||||
key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _->
|
||||
Error.throw No_Input_Columns_Selected
|
||||
java_columns = key_columns.map .java_column
|
||||
text_folding_strategy = Case_Sensitivity.folding_strategy case_sensitivity
|
||||
@ -827,7 +883,7 @@ type Table
|
||||
table.parse format=(Data_Formatter.Value.with_number_formatting decimal_point=',')
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
@type Widget_Helpers.parse_type_selector
|
||||
parse : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table
|
||||
parse : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table
|
||||
parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning =
|
||||
formatter = case format of
|
||||
_ : Text ->
|
||||
@ -839,7 +895,7 @@ type Table
|
||||
parser = formatter.make_value_type_parser type
|
||||
|
||||
select_problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
|
||||
selected_columns = self.columns_helper.select_columns_helper columns reorder=True select_problem_builder
|
||||
selected_columns = self.columns_helper.select_columns_helper columns Case_Sensitivity.Default True select_problem_builder
|
||||
select_problem_builder.attach_problems_before on_problems <|
|
||||
selected_column_names = case selected_columns.is_empty of
|
||||
True ->
|
||||
@ -914,9 +970,9 @@ type Table
|
||||
types. Due to this, a Mixed column containing values `[2, "3"]` will
|
||||
actually be converted into `[2, Nothing]` when casting to Integer type.
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
cast : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
|
||||
cast : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
|
||||
cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
|
||||
selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False
|
||||
selected.fold self table-> column_to_cast->
|
||||
new_column = column_to_cast.cast value_type on_problems
|
||||
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
|
||||
@ -1201,8 +1257,8 @@ type Table
|
||||
problem is reported.
|
||||
@group_by Widget_Helpers.make_column_name_vector_selector
|
||||
@order_by Widget_Helpers.make_order_by_selector
|
||||
add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Column_Selector) | Text | Integer -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
|
||||
add_row_number self (name:Text = "Row") (from:Integer = 1) (step:Integer = 1) group_by=[] order_by=[] on_problems=Problem_Behavior.Report_Warning =
|
||||
add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
|
||||
add_row_number self (name:Text="Row") (from:Integer=1) (step:Integer=1) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=Problem_Behavior.Report_Warning) =
|
||||
Add_Row_Number.add_row_number self name from step group_by order_by on_problems
|
||||
|
||||
## ALIAS Add Column, Update Column, New Column
|
||||
@ -1784,13 +1840,13 @@ type Table
|
||||
B | Name | Another
|
||||
B | Country | Germany
|
||||
@key_columns Widget_Helpers.make_column_name_vector_selector
|
||||
transpose : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names
|
||||
transpose : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names
|
||||
transpose self (key_columns = []) (attribute_column_name="Name") (value_column_name="Value") (error_on_missing_columns=True) (on_problems = Report_Warning) =
|
||||
columns_helper = self.columns_helper
|
||||
unique = Unique_Name_Strategy.new
|
||||
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
|
||||
|
||||
id_columns = columns_helper.select_columns_helper key_columns False problem_builder
|
||||
id_columns = columns_helper.select_columns_helper key_columns Case_Sensitivity.Default False problem_builder
|
||||
|
||||
selected_names = Map.from_vector (id_columns.map column-> [column.name, True])
|
||||
|
||||
@ -1855,7 +1911,7 @@ type Table
|
||||
@group_by Widget_Helpers.make_column_name_vector_selector
|
||||
@name_column Widget_Helpers.make_column_name_selector
|
||||
@values (Widget_Helpers.make_aggregate_column_selector include_group_by=False)
|
||||
cross_tab : Vector (Integer | Text | Column_Selector | Aggregate_Column) | Text | Integer -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings
|
||||
cross_tab : Vector (Integer | Text | Regex | Aggregate_Column) | Text | Integer | Regex -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings
|
||||
cross_tab self group_by name_column values=Aggregate_Column.Count (on_problems=Report_Warning) =
|
||||
columns_helper = self.columns_helper
|
||||
problem_builder = Problem_Builder.new error_on_missing_columns=True
|
||||
@ -1872,8 +1928,8 @@ type Table
|
||||
ix : Integer -> [ix]
|
||||
name : Text -> [name]
|
||||
_ -> Error.throw (Illegal_Argument.Error "name_column must be a column index or name.")
|
||||
matched_name = columns_helper.select_columns_helper name_column_selector True problem_builder
|
||||
grouping = columns_helper.select_columns_helper (normalize_group_by group_by) True problem_builder
|
||||
matched_name = columns_helper.select_columns_helper name_column_selector Case_Sensitivity.Default True problem_builder
|
||||
grouping = columns_helper.select_columns_helper (normalize_group_by group_by) Case_Sensitivity.Default True problem_builder
|
||||
|
||||
## Validate the values
|
||||
values_vector = case values of
|
||||
@ -2037,7 +2093,8 @@ type Table
|
||||
been replaced with the provided default(s).
|
||||
|
||||
Arguments:
|
||||
- columns: The column(s) to fill Nothing values.
|
||||
- columns: Specifies columns by a name, index or regular expression to
|
||||
match names, or a Vector of these.
|
||||
- default: The value to replace missing values with. If this argument
|
||||
is a column, the value from `default` at the corresponding position
|
||||
will be used.
|
||||
@ -2047,7 +2104,7 @@ type Table
|
||||
|
||||
fill_nothing = table.fill_nothing ["col0", "col1"] 20.5
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
fill_nothing : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table
|
||||
fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table
|
||||
fill_nothing self columns default =
|
||||
transformer col = col.fill_nothing default
|
||||
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
|
||||
@ -2058,7 +2115,8 @@ type Table
|
||||
provided default(s).
|
||||
|
||||
Arguments:
|
||||
- columns: The column(s) to fill empty values.
|
||||
- columns: Specifies columns by a name, index or regular expression to
|
||||
match names, or a Vector of these.
|
||||
- default: The value to replace empty values with. If this argument
|
||||
is a column, the value from `default` at the corresponding position
|
||||
will be used.
|
||||
@ -2068,7 +2126,7 @@ type Table
|
||||
|
||||
fill_empty = table.fill_empty ["col0", "col1"] "hello"
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
fill_empty : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table
|
||||
fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table
|
||||
fill_empty self columns default =
|
||||
transformer col = col.fill_empty default
|
||||
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
|
||||
@ -2104,7 +2162,7 @@ type Table
|
||||
|
||||
column.replace '"(.*?)"'.to_regex '($1)'
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
replace : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
|
||||
replace : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
|
||||
replace self columns term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False =
|
||||
transformer col = col.replace term new_text case_sensitivity only_first
|
||||
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
|
||||
|
@ -1,7 +1,6 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Column_Selector.Column_Selector
|
||||
import project.Data.Set_Mode.Set_Mode
|
||||
import project.Data.Sort_Column.Sort_Column
|
||||
import project.Data.Table.Table
|
||||
@ -18,10 +17,10 @@ polyglot java import org.enso.table.data.column.storage.numeric.LongRangeStorage
|
||||
polyglot java import org.enso.table.data.column.storage.numeric.LongStorage
|
||||
|
||||
## PRIVATE
|
||||
add_row_number : Table -> Text -> Integer -> Integer -> (Column_Selector | Vector Text) -> Vector (Text | Sort_Column) | Text | Sort_Column -> Problem_Behavior -> Table
|
||||
add_row_number : Table -> Text -> Integer -> Integer -> Text | Integer | Regex | Vector (Integer | Text | Regex) -> Vector (Text | Sort_Column) | Text | Sort_Column -> Problem_Behavior -> Table
|
||||
add_row_number table name from step group_by order_by on_problems =
|
||||
problem_builder = Problem_Builder.new error_on_missing_columns=True
|
||||
grouping_columns = table.columns_helper.select_columns_helper group_by True problem_builder
|
||||
grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder
|
||||
Unordered_Multi_Value_Key.validate_grouping_columns grouping_columns problem_builder
|
||||
ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder
|
||||
problem_builder.attach_problems_before on_problems <|
|
||||
|
@ -2,7 +2,6 @@ from Standard.Base import all hiding First, Last
|
||||
|
||||
import project.Data.Aggregate_Column.Aggregate_Column
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Column_Selector.Column_Selector
|
||||
import project.Data.Sort_Column.Sort_Column
|
||||
import project.Data.Table.Table
|
||||
import project.Data.Type.Value_Type.Value_Type
|
||||
@ -149,9 +148,9 @@ resolve_aggregate table problem_builder aggregate_column =
|
||||
res = columns_helper.resolve_column_or_expression c problem_builder
|
||||
res.if_nothing (Error.throw Internal_Missing_Column_Error)
|
||||
|
||||
resolve_selector_to_vector : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Vector Column ! Internal_Missing_Column_Error
|
||||
resolve_selector_to_vector : Text | Integer | Vector (Integer | Text) -> Vector Column ! Internal_Missing_Column_Error
|
||||
resolve_selector_to_vector selector =
|
||||
resolved = columns_helper.select_columns_helper selector reorder=True problem_builder
|
||||
resolved = columns_helper.select_columns_helper selector Case_Sensitivity.Default True problem_builder
|
||||
if resolved.is_empty then Error.throw Internal_Missing_Column_Error else resolved
|
||||
|
||||
resolve_order_by selector = case selector of
|
||||
|
@ -2,8 +2,8 @@ from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
|
||||
import project.Data.Aggregate_Column.Aggregate_Column
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Column_Selector.Column_Selector
|
||||
import project.Data.Position.Position
|
||||
import project.Data.Set_Mode.Set_Mode
|
||||
import project.Data.Sort_Column.Sort_Column
|
||||
@ -12,7 +12,6 @@ import project.Data.Type.Value_Type.Value_Type
|
||||
import project.Data.Type.Value_Type_Helpers
|
||||
import project.Internal.Problem_Builder.Problem_Builder
|
||||
import project.Internal.Unique_Name_Strategy.Unique_Name_Strategy
|
||||
from project.Data.Aggregate_Column.Aggregate_Column import Maximum, Minimum
|
||||
from project.Errors import Ambiguous_Column_Rename, Column_Type_Mismatch, Invalid_Aggregate_Column, Missing_Input_Columns, No_Common_Type, No_Input_Columns_Selected, No_Output_Columns, Too_Many_Column_Names_Provided
|
||||
|
||||
polyglot java import java.util.HashSet
|
||||
@ -41,7 +40,9 @@ type Table_Column_Helper
|
||||
|
||||
Arguments:
|
||||
- selectors: Single instance or a Vector of names, indexes or
|
||||
`Column_Selector`s.
|
||||
regular expressions.
|
||||
- case_sensitivity: Specifies whether the column name matching should be
|
||||
case sensitive.
|
||||
- reorder: Specifies whether to reorder the matched columns according to
|
||||
the order of the selection criteria.
|
||||
If `False`, the matched entries are returned in the same order as in
|
||||
@ -57,23 +58,14 @@ type Table_Column_Helper
|
||||
operation. By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
select_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Boolean -> Problem_Behavior -> Vector
|
||||
select_columns self selectors reorder error_on_missing_columns on_problems =
|
||||
- error_on_empty: Specifies if the operation should fail if no columns
|
||||
are selected.
|
||||
select_columns : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Case_Sensitivity -> Boolean -> Boolean -> Problem_Behavior -> Boolean -> Vector
|
||||
select_columns self (selectors:(Text | Integer | Regex | Vector)) (case_sensitivity:Case_Sensitivity) (reorder:Boolean) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) (error_on_empty:Boolean=True) =
|
||||
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
|
||||
result = self.select_columns_helper selectors reorder problem_builder
|
||||
result = self.select_columns_helper selectors case_sensitivity reorder problem_builder
|
||||
problem_builder.attach_problems_before on_problems <|
|
||||
if result.is_empty then Error.throw No_Output_Columns else result
|
||||
|
||||
## PRIVATE
|
||||
Works like `select_columns` but will not throw `No_Output_Columns` error
|
||||
and will return proper columns instead of internal columns.
|
||||
Useful, when selecting a subset of columns to transform.
|
||||
resolve_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Problem_Behavior -> Boolean -> Vector
|
||||
resolve_columns self selectors error_on_missing_columns on_problems reorder=False =
|
||||
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
|
||||
result = self.select_columns_helper selectors reorder problem_builder
|
||||
problem_builder.attach_problems_before on_problems <|
|
||||
result.map self.make_column
|
||||
if error_on_empty && result.is_empty then Error.throw No_Output_Columns else result
|
||||
|
||||
## PRIVATE
|
||||
A helper function encapsulating shared code for `remove_columns`
|
||||
@ -86,17 +78,19 @@ type Table_Column_Helper
|
||||
|
||||
Arguments:
|
||||
- selectors: Single instance or a Vector of names, indexes or
|
||||
`Column_Selector`s.
|
||||
regular expressions.
|
||||
- case_sensitivity: Specifies whether the column name matching should be
|
||||
case sensitive.
|
||||
- error_on_missing_columns: Specifies if missing columns should be raised
|
||||
as error regardless of `on_problems`.
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
operation. By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
remove_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Problem_Behavior -> Vector
|
||||
remove_columns self selectors error_on_missing_columns on_problems =
|
||||
remove_columns : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Vector
|
||||
remove_columns self (selectors:(Text | Integer | Regex | Vector)) (case_sensitivity:Case_Sensitivity) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) =
|
||||
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
|
||||
selection = self.select_columns_helper selectors reorder=False problem_builder
|
||||
selection = self.select_columns_helper selectors case_sensitivity False problem_builder
|
||||
selected_names = Map.from_vector (selection.map column-> [column.name, True])
|
||||
result = self.internal_columns.filter column->
|
||||
should_be_removed = selected_names.get column.name False
|
||||
@ -115,19 +109,21 @@ type Table_Column_Helper
|
||||
|
||||
Arguments:
|
||||
- selectors: Single instance or a Vector of names, indexes or
|
||||
`Column_Selector`s.
|
||||
regular expressions.
|
||||
- position: Specifies how to place the selected columns in relation to
|
||||
the columns which were not matched by the `selector` (if any).
|
||||
- case_sensitivity: Specifies whether the column name matching should be
|
||||
case sensitive.
|
||||
- error_on_missing_columns: Specifies if missing columns should be raised
|
||||
as error regardless of `on_problems`.
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
operation. By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
reorder_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Position -> Boolean -> Problem_Behavior -> Vector
|
||||
reorder_columns self selectors position error_on_missing_columns on_problems =
|
||||
reorder_columns : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Position -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Vector
|
||||
reorder_columns self (selectors:(Text | Integer | Regex | Vector)) (position:Position) (case_sensitivity:Case_Sensitivity) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) =
|
||||
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
|
||||
selection = self.select_columns_helper selectors reorder=True problem_builder
|
||||
selection = self.select_columns_helper selectors case_sensitivity True problem_builder
|
||||
problem_builder.attach_problems_before on_problems <|
|
||||
selected_names = Map.from_vector (selection.map column-> [column.name, True])
|
||||
other_columns = self.internal_columns.filter column->
|
||||
@ -143,31 +139,28 @@ type Table_Column_Helper
|
||||
provided selection criteria.
|
||||
|
||||
Arguments:
|
||||
- selectors: Single instance or a Vector of names, indexes or
|
||||
`Column_Selector`s.
|
||||
- selectors: Single instance or a Vector of names, indexes or regular
|
||||
expressions.
|
||||
- case_sensitivity: Specifies whether to match the column names ignoring
|
||||
case.
|
||||
- reorder: Specifies whether to reorder the matched columns according to
|
||||
the order of the selection criteria.
|
||||
If `False`, the matched entries are returned in the same order as in
|
||||
the input.
|
||||
If `True`, the matched entries are returned in the order of the
|
||||
criteria matching them. If a single object has been matched by multiple
|
||||
criteria, it is placed in the group belonging to the first matching
|
||||
criterion on the list. If a single criterion's group has more than one
|
||||
element, their relative order is the same as in the input.
|
||||
the order of the selection criteria. If `False`, the matched entries
|
||||
are returned in the same order as in the input. If `True`, the matched
|
||||
entries are returned in the order of the first criteria matching them.
|
||||
If a single criterion matches more than one column, their order is the
|
||||
same as in the input table.
|
||||
- problem_builder: Encapsulates the aggregation of encountered problems.
|
||||
select_columns_helper : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Problem_Builder -> Vector
|
||||
select_columns_helper self selectors reorder problem_builder =
|
||||
resolve_selector selector = case selector of
|
||||
name : Text -> resolve_selector (Column_Selector.By_Name name Case_Sensitivity.Sensitive False)
|
||||
select_columns_helper : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Case_Sensitivity -> Boolean -> Problem_Builder -> Vector
|
||||
select_columns_helper self (selectors:(Text | Integer | Regex | Vector)) (case_sensitivity:Case_Sensitivity) (reorder:Boolean) (problem_builder:Problem_Builder) =
|
||||
resolve_selector selector =
|
||||
case selector of
|
||||
ix : Integer -> if is_index_valid self.internal_columns.length ix then [self.internal_columns.at ix] else
|
||||
problem_builder.report_oob_indices [ix]
|
||||
[]
|
||||
Column_Selector.By_Name name case_sensitivity use_regex ->
|
||||
matches = match_columns name case_sensitivity use_regex self.internal_columns
|
||||
if matches.is_empty then problem_builder.report_missing_input_columns [name]
|
||||
_ ->
|
||||
matches = match_columns selector case_sensitivity self.internal_columns
|
||||
if matches.is_empty then problem_builder.report_missing_input_columns [selector]
|
||||
matches
|
||||
Column_Selector.Blank_Columns when_any treat_nans_as_blank ->
|
||||
get_blank_columns when_any treat_nans_as_blank self.internal_columns self.make_column self.table self.materialize
|
||||
|
||||
vector = case selectors of
|
||||
_ : Vector -> selectors
|
||||
@ -198,6 +191,56 @@ type Table_Column_Helper
|
||||
problem_builder.report_oob_indices [selector]
|
||||
Nothing
|
||||
|
||||
## PRIVATE
|
||||
A helper method that gets the columns from the provided table that are
|
||||
completely blank or have some blanks.
|
||||
|
||||
Arguments:
|
||||
- when_any: By default, only columns consisting of all blank cells are
|
||||
selected. If set to `True`, columns with one or more blank values are
|
||||
selected.
|
||||
- treat_nans_as_blank: If `True`, then `Number.nan` is considered as
|
||||
blank.
|
||||
- invert_selection: If `True`, then the selection is inverted.
|
||||
select_blank_columns_helper : Boolean -> Boolean -> Boolean -> Vector
|
||||
select_blank_columns_helper self when_any:Boolean treat_nans_as_blank:Boolean invert_selection:Boolean=False =
|
||||
blanks = self.internal_columns.map_with_index ix-> internal_column->
|
||||
column = self.make_column internal_column
|
||||
blank_indicator = column.is_blank treat_nans_as_blank
|
||||
blank_indicator.iif 1 0 . rename "blanks_"+ix.to_text
|
||||
|
||||
## We cannot just use a custom_column in the aggregate because of
|
||||
how the column selector works. We may need to revisit this. For
|
||||
now we need to use tricks like that:
|
||||
|
||||
To be backend agnostic, we cannot create a new table with the
|
||||
columns above. Instead, we add our blank columns to the table
|
||||
and then remove any other columns we. We do not have to deal
|
||||
with name conflicts, as adding a new column with a clashing
|
||||
name does not affect the old column or derived columns.
|
||||
table_with_blank_indicators = blanks.fold self.table tbl-> blanks_col-> tbl.set blanks_col
|
||||
just_indicators = table_with_blank_indicators.select_columns (blanks.map .name) on_problems=Problem_Behavior.Report_Error
|
||||
|
||||
# Maximum is equivalent to Exists and Minimum is equivalent to Forall.
|
||||
col_aggregate = if when_any then Aggregate_Column.Maximum _ else Aggregate_Column.Minimum _
|
||||
aggregates = blanks.map blanks_col-> col_aggregate blanks_col.name
|
||||
|
||||
aggregate_result = just_indicators.aggregate aggregates on_problems=Problem_Behavior.Report_Error
|
||||
materialized_result = self.materialize <| aggregate_result.catch Any error->
|
||||
msg = "Unexpected dataflow error has been thrown in an `select_blank_columns_helper`. This is a bug in the Table library. The unexpected error was: "+error.to_display_text
|
||||
Panic.throw (Illegal_State.Error message=msg cause=error)
|
||||
|
||||
counts = materialized_result.rows.first
|
||||
self.internal_columns.filter_with_index i-> _->
|
||||
include = case counts.at i of
|
||||
## No rows in input, so treating as blank by convention.
|
||||
Nothing -> True
|
||||
1 -> True
|
||||
0 -> False
|
||||
unexpected ->
|
||||
Panic.throw (Illegal_State.Error "Unexpected result: "+unexpected.to_display_text+". Perhaps an implementation bug of `select_blank_columns_helper`.")
|
||||
if invert_selection then include.not else include
|
||||
|
||||
## PRIVATE
|
||||
A helper function encapsulating shared code for `rename_columns`
|
||||
implementations of various Table variants. See the documentation for the
|
||||
@ -212,14 +255,16 @@ type Table_Column_Helper
|
||||
- internal_columns: A list of all columns in a table.
|
||||
- mapping: A selector specifying which columns should be moved and the order
|
||||
in which they should appear in the result.
|
||||
- case_sensitivity: Specifies whether to match the column names ignoring
|
||||
case.
|
||||
- error_on_missing_columns: If set to `True`, missing columns are treated as
|
||||
error regardless of `on_problems`.
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
operation. By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
rename_columns : Vector -> Map (Text | Integer | Column_Selector) Text | Vector Text -> Boolean -> Problem_Behavior -> Map Text Text
|
||||
rename_columns internal_columns mapping error_on_missing_columns on_problems =
|
||||
rename_columns : Vector -> Map (Text | Integer | Regex) Text | Vector Text -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Map Text Text
|
||||
rename_columns (internal_columns:Vector) (mapping:(Map | Vector)) (case_sensitivity:Case_Sensitivity) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) =
|
||||
## Convert Vector of Pairs to Map
|
||||
is_vec_pairs = mapping.is_a Vector && mapping.length > 0 && (mapping.first.is_a Text . not)
|
||||
case is_vec_pairs of
|
||||
@ -246,22 +291,18 @@ rename_columns internal_columns mapping error_on_missing_columns on_problems =
|
||||
internal_columns.take good_names.length . zip good_names
|
||||
_ : Map ->
|
||||
resolve_rename selector replacement = case selector of
|
||||
name : Text -> resolve_rename (Column_Selector.By_Name name Case_Sensitivity.Sensitive False) replacement
|
||||
ix : Integer -> if is_index_valid internal_columns.length ix then [Pair.new (internal_columns.at ix) replacement] else
|
||||
problem_builder.report_oob_indices [ix]
|
||||
[]
|
||||
Column_Selector.By_Name name case_sensitivity use_regex ->
|
||||
matches = match_columns name case_sensitivity use_regex internal_columns
|
||||
_ ->
|
||||
matches = match_columns selector case_sensitivity internal_columns
|
||||
case matches.is_empty of
|
||||
True ->
|
||||
problem_builder.report_missing_input_columns [name]
|
||||
problem_builder.report_missing_input_columns [selector]
|
||||
[]
|
||||
False ->
|
||||
if use_regex.not then matches.map c-> Pair.new c replacement else
|
||||
pattern = Regex.compile name case_insensitive=case_sensitivity.is_case_insensitive_in_memory
|
||||
matches.map c-> Pair.new c (pattern.replace c.name replacement)
|
||||
Column_Selector.Blank_Columns _ _ ->
|
||||
Error.throw (Illegal_Argument.Error "Cannot use `Blank_Columns` to rename.")
|
||||
if selector.is_a Regex . not then matches.map c-> Pair.new c replacement else
|
||||
matches.map c-> Pair.new c (selector.replace c.name replacement)
|
||||
builder = mapping.to_vector.fold Vector.new_builder builder-> pair-> builder.append_vector_range (resolve_rename pair.first pair.second)
|
||||
builder.to_vector
|
||||
|
||||
@ -344,10 +385,15 @@ is_index_valid length ix =
|
||||
|
||||
## PRIVATE
|
||||
A helper method to match columns by name
|
||||
match_columns : Text -> Case_Sensitivity -> Boolean -> Vector -> Vector
|
||||
match_columns name case_sensitivity use_regex columns =
|
||||
match = case_sensitivity.create_match_function use_regex
|
||||
columns.filter c-> match c.name name
|
||||
match_columns : Text | Regex -> Case_Sensitivity -> Vector -> Vector
|
||||
match_columns (name : Text | Regex) (case_sensitivity : Case_Sensitivity) (columns : Vector) =
|
||||
match = case name of
|
||||
_ : Text -> case case_sensitivity of
|
||||
Case_Sensitivity.Default -> (== name)
|
||||
Case_Sensitivity.Sensitive -> (== name)
|
||||
Case_Sensitivity.Insensitive locale -> name.equals_ignore_case locale=locale
|
||||
_ : Regex -> name.recompile case_sensitivity . matches
|
||||
columns.filter c-> match c.name
|
||||
|
||||
## PRIVATE
|
||||
A helper type used by transform helpers.
|
||||
@ -379,7 +425,8 @@ resolve_order_by internal_columns column_selectors problem_builder =
|
||||
problem_builder.report_oob_indices [ix]
|
||||
[]
|
||||
Sort_Column.Select_By_Name name _ case_sensitivity use_regex ->
|
||||
matches = match_columns name case_sensitivity use_regex internal_columns
|
||||
regex_or_text = if use_regex then name.to_regex else name
|
||||
matches = match_columns regex_or_text case_sensitivity internal_columns
|
||||
if matches.is_empty then problem_builder.report_missing_input_columns [name]
|
||||
matches.map c-> Column_Transform_Element.Value c selector
|
||||
selectors_vec = case column_selectors of
|
||||
@ -447,46 +494,6 @@ unify_result_type_for_union column_set all_tables allow_type_widening problem_bu
|
||||
problem_builder.report_other_warning (Column_Type_Mismatch.Error column_set.name first_type got_type)
|
||||
Nothing
|
||||
|
||||
## PRIVATE
|
||||
A helper method that gets the columns from the provided table that are
|
||||
complete blank or have some blanks.
|
||||
get_blank_columns when_any treat_nans_as_blank internal_columns make_column table materialize =
|
||||
blanks = internal_columns.map_with_index ix-> internal_column->
|
||||
column = make_column internal_column
|
||||
blank_indicator = column.is_blank treat_nans_as_blank
|
||||
blank_indicator.iif 1 0 . rename "blanks_"+ix.to_text
|
||||
|
||||
## We cannot just use a custom_column in the aggregate because of
|
||||
how the column selector works. We may need to revisit this. For
|
||||
now we need to use tricks like that:
|
||||
|
||||
To be backend agnostic, we cannot create a new table with the
|
||||
columns above. Instead, we add our blank columns to the table
|
||||
and then remove any other columns we. We do not have to deal
|
||||
with name conflicts, as adding a new column with a clashing
|
||||
name does not affect the old column or derived columns.
|
||||
table_with_blank_indicators = blanks.fold table table-> blanks_col-> table.set blanks_col
|
||||
just_indicators = table_with_blank_indicators.select_columns (blanks.map .name) on_problems=Problem_Behavior.Report_Error
|
||||
|
||||
# Maximum is equivalent to Exists and Minimum is equivalent to Forall.
|
||||
col_aggregate = if when_any then Maximum _ else Minimum _
|
||||
aggregates = blanks.map blanks_col-> col_aggregate blanks_col.name
|
||||
|
||||
aggregate_result = just_indicators.aggregate aggregates on_problems=Problem_Behavior.Report_Error
|
||||
materialized_result = materialize <| aggregate_result.catch Any error->
|
||||
msg = "Unexpected dataflow error has been thrown in an `select_columns_helper`. This is a bug in the Table library. The unexpected error was: "+error.to_display_text
|
||||
Panic.throw (Illegal_State.Error message=msg cause=error)
|
||||
|
||||
counts = materialized_result.rows.first
|
||||
internal_columns.filter_with_index i-> _->
|
||||
case counts.at i of
|
||||
## No rows in input, so treating as blank by convention.
|
||||
Nothing -> True
|
||||
1 -> True
|
||||
0 -> False
|
||||
unexpected ->
|
||||
Panic.throw (Illegal_State.Error "Unexpected result: "+unexpected.to_display_text+". Perhaps an implementation bug of Column_Selector.Blank_Columns.")
|
||||
|
||||
## PRIVATE
|
||||
Replace a set of columns in the table with a new set of columns. The old
|
||||
columns must all exist in the table, and the list of new columns must be the
|
||||
@ -503,9 +510,9 @@ replace_columns_with_columns table old_columns new_columns =
|
||||
Replace a set of columns in the table with a new set of columns produced by
|
||||
transforming the old columns. The old columns must all exist in the table,
|
||||
and the new columns must all have the same name.
|
||||
replace_columns_with_transformed_columns : Table -> Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> (Column -> Column) -> Boolean -> Problem_Behavior -> Table
|
||||
replace_columns_with_transformed_columns : Table -> Text | Integer | Regex | Vector (Integer | Text | Regex) -> (Column -> Column) -> Boolean -> Problem_Behavior -> Table
|
||||
replace_columns_with_transformed_columns table selectors transformer error_on_missing_columns=True on_problems=Report_Warning =
|
||||
internal_columns = table.columns_helper.select_columns selectors reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
internal_columns = table.columns_helper.select_columns selectors Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
columns = internal_columns.map table.columns_helper.make_column
|
||||
new_columns = columns.map transformer
|
||||
replace_columns_with_columns table columns new_columns
|
||||
|
@ -2,7 +2,6 @@ from Standard.Base import all
|
||||
|
||||
import project.Data.Aggregate_Column.Aggregate_Column
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Column_Selector.Column_Selector
|
||||
import project.Data.Column_Vector_Extensions
|
||||
import project.Data.Data_Formatter.Data_Formatter
|
||||
import project.Data.Join_Condition.Join_Condition
|
||||
@ -28,7 +27,6 @@ from project.Excel.Excel_Section.Excel_Section import Cell_Range, Range_Names, S
|
||||
|
||||
export project.Data.Aggregate_Column.Aggregate_Column
|
||||
export project.Data.Column.Column
|
||||
export project.Data.Column_Selector.Column_Selector
|
||||
export project.Data.Column_Vector_Extensions
|
||||
export project.Data.Data_Formatter.Data_Formatter
|
||||
export project.Data.Join_Condition.Join_Condition
|
||||
|
@ -1,6 +1,6 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table import Column_Selector, Value_Type
|
||||
from Standard.Table import Value_Type
|
||||
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Count_Distinct
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
@ -58,12 +58,21 @@ spec setup =
|
||||
t4.row_count . should_equal 0
|
||||
t4.at "X" . to_vector . should_equal []
|
||||
|
||||
Test.specify "should allow to select blank columns" <|
|
||||
r1 = t1.select_blank_columns
|
||||
r1.columns.map .name . should_equal ["f"]
|
||||
r1.at "f" . to_vector . should_equal [Nothing, "", Nothing, ""]
|
||||
|
||||
r2 = t1.select_blank_columns when_any=True
|
||||
r2.columns.map .name . should_equal ["a", "b", "d", "e", "f"]
|
||||
r2.at "d" . to_vector . should_equal [Nothing, True, False, True]
|
||||
|
||||
Test.specify "should allow to remove blank columns" <|
|
||||
r1 = t1.remove_columns (Column_Selector.Blank_Columns when_any=False)
|
||||
r1 = t1.remove_blank_columns
|
||||
r1.columns.map .name . should_equal ["a", "b", "c", "d", "e"]
|
||||
r1.at "a" . to_vector . should_equal [1, Nothing, 3, 4]
|
||||
|
||||
r2 = t1.remove_columns (Column_Selector.Blank_Columns when_any=True)
|
||||
r2 = t1.remove_blank_columns when_any=True
|
||||
r2.columns.map .name . should_equal ["c"]
|
||||
r2.at "c" . to_vector . should_equal [10, 20, 30, 40]
|
||||
|
||||
@ -84,14 +93,18 @@ spec setup =
|
||||
r2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN, NaN]"
|
||||
r2.at "Y" . to_vector . should_equal [Nothing, 2.0, Nothing, 5.0]
|
||||
|
||||
r3 = t4.remove_columns (Column_Selector.Blank_Columns when_any=False)
|
||||
r3 = t4.remove_blank_columns
|
||||
r3.columns.map .name . should_equal ["c", "g", "h"]
|
||||
r3.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
|
||||
|
||||
r4 = t4.remove_columns (Column_Selector.Blank_Columns when_any=True)
|
||||
r4 = t4.remove_blank_columns when_any=True
|
||||
r4.columns.map .name . should_equal ["c", "g"]
|
||||
r4.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
|
||||
|
||||
r5 = t4.select_blank_columns when_any=True
|
||||
r5.columns.map .name . should_equal ["h"]
|
||||
r5.at "h" . to_vector . to_text . should_equal "[NaN, Nothing, NaN, Nothing]"
|
||||
|
||||
Test.specify "should allow to treat NaNs as blank if asked" <|
|
||||
r1 = t3.filter_blank_rows when_any=True treat_nans_as_blank=True
|
||||
# We cannot use `Vector.==` because `NaN != NaN`.
|
||||
@ -102,42 +115,35 @@ spec setup =
|
||||
r2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN]"
|
||||
r2.at "Y" . to_vector . should_equal [Nothing, 2.0, 5.0]
|
||||
|
||||
r3 = t4.remove_columns (Column_Selector.Blank_Columns when_any=False treat_nans_as_blank=True)
|
||||
r3 = t4.remove_blank_columns when_any=False treat_nans_as_blank=True
|
||||
r3.columns.map .name . should_equal ["c", "g"]
|
||||
r3.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
|
||||
|
||||
r4 = t4.remove_columns (Column_Selector.Blank_Columns when_any=True treat_nans_as_blank=True)
|
||||
r4.columns.map .name . should_equal ["c"]
|
||||
r4.at "c" . to_vector . should_equal [10, 20, 40, 30]
|
||||
r4 = t4.select_blank_columns when_any=False treat_nans_as_blank=True
|
||||
r4.columns.map .name . should_equal ["h"]
|
||||
r4.at "h" . to_vector . to_text . should_equal "[NaN, Nothing, NaN, Nothing]"
|
||||
|
||||
r5 = t4.remove_blank_columns when_any=True treat_nans_as_blank=True
|
||||
r5.columns.map .name . should_equal ["c"]
|
||||
r5.at "c" . to_vector . should_equal [10, 20, 40, 30]
|
||||
|
||||
r6 = t4.select_blank_columns when_any=True treat_nans_as_blank=True
|
||||
r6.columns.map .name . should_equal ["g", "h"]
|
||||
r6.at "h" . to_vector . to_text . should_equal "[NaN, Nothing, NaN, Nothing]"
|
||||
|
||||
if test_selection.is_nan_and_nothing_distinct.not then
|
||||
Test.specify "this backend treats NaN as Nothing" <|
|
||||
t3.at "X" . to_vector . should_equal [2.0, 1.5, Nothing, Nothing]
|
||||
t3.at "X" . is_nan . to_vector . should_fail_with Unsupported_Database_Operation
|
||||
|
||||
Test.specify "Blank_Columns selector should work for all kinds of methods accepting Column_Selector" <|
|
||||
t = table_builder [["X", [1, 2, 3, 4]], ["Y", [Nothing, "", Nothing, Nothing]], ["Z", [Nothing, True, False, Nothing]]]
|
||||
|
||||
t.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["Y"]
|
||||
t.select_columns (Column_Selector.Blank_Columns when_any=True) . columns . map .name . should_equal ["Y", "Z"]
|
||||
|
||||
t.reorder_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["Y", "X", "Z"]
|
||||
|
||||
r1 = t.aggregate [Count_Distinct Column_Selector.Blank_Columns]
|
||||
r1.columns . map .name . should_equal ["Count Distinct Y"]
|
||||
r1.at "Count Distinct Y" . to_vector . should_equal [2]
|
||||
r2 = t.aggregate [(Count_Distinct Column_Selector.Blank_Columns ignore_nothing=True)]
|
||||
r2.columns . map .name . should_equal ["Count Distinct Y"]
|
||||
r2.at "Count Distinct Y" . to_vector . should_equal [1]
|
||||
|
||||
Test.specify "Blank_Columns selector should deal with edge cases" <|
|
||||
Test.specify "select_blank_columns and remove_blank_columns should deal with edge cases" <|
|
||||
t = table_builder [["X", [1, 2, 3, 4]]]
|
||||
no_rows = t.filter "X" (Filter_Condition.Equal to=0)
|
||||
no_rows.row_count . should_equal 0
|
||||
no_rows.at "X" . to_vector . should_equal []
|
||||
|
||||
no_rows.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["X"]
|
||||
no_rows.remove_columns Column_Selector.Blank_Columns . columns . map .name . should_fail_with No_Output_Columns
|
||||
no_rows.select_blank_columns . columns . map .name . should_equal ["X"]
|
||||
no_rows.remove_blank_columns . columns . map .name . should_fail_with No_Output_Columns
|
||||
|
||||
Test.group prefix+"Filling Missing Values" <|
|
||||
Test.specify "should coerce long and double types to double" <|
|
||||
|
@ -1,7 +1,6 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table import Position
|
||||
from Standard.Table.Data.Column_Selector.Column_Selector import By_Name
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
from Standard.Test import Test, Problems
|
||||
@ -28,7 +27,7 @@ spec setup =
|
||||
Test.group prefix+"Table.select_columns" <|
|
||||
Test.specify "should work as shown in the doc examples" <|
|
||||
expect_column_names ["foo", "bar"] <| table.select_columns ["bar", "foo"]
|
||||
expect_column_names ["bar", "Baz", "foo 1", "foo 2"] <| table.select_columns [By_Name "foo.+" use_regex=True, By_Name "b.*" use_regex=True]
|
||||
expect_column_names ["bar", "Baz", "foo 1", "foo 2"] <| table.select_columns ["foo.+".to_regex, "b.*".to_regex True]
|
||||
expect_column_names ["abcd123", "foo", "bar"] <| table.select_columns [-1, 0, 1] reorder=True
|
||||
|
||||
Test.specify "should allow to reorder columns if asked to" <|
|
||||
@ -38,11 +37,11 @@ spec setup =
|
||||
table_2 . at "foo" . to_vector . should_equal [1,2,3]
|
||||
|
||||
Test.specify "should correctly handle regex matching" <|
|
||||
expect_column_names ["foo"] <| table.select_columns [By_Name "foo" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns [By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns [By_Name "ab.+123" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names ["foo"] <| table.select_columns ["foo".to_regex]
|
||||
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns ["a.*".to_regex]
|
||||
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns ["ab.+123".to_regex]
|
||||
expect_column_names ["ab.+123"] <| table.select_columns ["ab.+123"]
|
||||
expect_column_names ["abcd123"] <| table.select_columns [By_Name "abcd123" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names ["abcd123"] <| table.select_columns ["abcd123".to_regex]
|
||||
|
||||
Test.specify "should allow negative indices" <|
|
||||
expect_column_names ["foo", "bar", "foo 2"] <| table.select_columns [-3, 0, 1]
|
||||
@ -50,8 +49,8 @@ spec setup =
|
||||
Test.specify "should allow mixed names and indexes" <|
|
||||
expect_column_names ["foo", "bar", "foo 2"] <| table.select_columns [-3, "bar", 0]
|
||||
expect_column_names ["foo 2", "bar", "foo"] <| table.select_columns [-3, "bar", 0] reorder=True
|
||||
expect_column_names ["foo", "bar", "foo 1", "foo 2", "abcd123"] <| table.select_columns [-1, "bar", By_Name "foo.*" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names ["foo", "foo 1", "foo 2", "bar", "abcd123"] <| table.select_columns [By_Name "foo.*" Case_Sensitivity.Sensitive use_regex=True, "bar", "foo", -1] reorder=True
|
||||
expect_column_names ["foo", "bar", "foo 1", "foo 2", "abcd123"] <| table.select_columns [-1, "bar", "foo.*".to_regex]
|
||||
expect_column_names ["foo", "foo 1", "foo 2", "bar", "abcd123"] <| table.select_columns ["foo.*".to_regex, "bar", "foo", -1] reorder=True
|
||||
|
||||
if test_selection.supports_case_sensitive_columns then
|
||||
Test.specify "should correctly handle exact matches matching multiple names due to case insensitivity" <|
|
||||
@ -60,11 +59,11 @@ spec setup =
|
||||
col2 = ["bar", [4,5,6]]
|
||||
col3 = ["Bar", [7,8,9]]
|
||||
table_builder [col1, col2, col3]
|
||||
expect_column_names ["bar", "Bar"] <| table.select_columns [By_Name "bar"]
|
||||
expect_column_names ["bar", "Bar"] <| table.select_columns ["bar"] Case_Sensitivity.Insensitive
|
||||
|
||||
Test.specify "should correctly handle regexes matching multiple names" <|
|
||||
expect_column_names ["foo", "bar", "foo 1", "foo 2"] <| table.select_columns [By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True, By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names ["bar", "foo", "foo 1", "foo 2"] <| table.select_columns [By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True, By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True] reorder=True
|
||||
expect_column_names ["foo", "bar", "foo 1", "foo 2"] <| table.select_columns ["b.*".to_regex, "f.+".to_regex]
|
||||
expect_column_names ["bar", "foo", "foo 1", "foo 2"] <| table.select_columns ["b.*".to_regex, "f.+".to_regex] reorder=True
|
||||
|
||||
Test.specify "should correctly handle problems: out of bounds indices" <|
|
||||
selector = [1, 0, 100, -200, 300]
|
||||
@ -104,12 +103,12 @@ spec setup =
|
||||
table.select_columns ["bar", "foo", "foo", "bar"] reorder=False
|
||||
|
||||
Test.specify "should correctly handle edge-cases: duplicate matches due to case insensitivity" <|
|
||||
selector = [By_Name "FOO", By_Name "foo"]
|
||||
t = table.select_columns selector on_problems=Problem_Behavior.Report_Error
|
||||
selector = ["FOO", "foo"]
|
||||
t = table.select_columns selector Case_Sensitivity.Insensitive on_problems=Problem_Behavior.Report_Error
|
||||
expect_column_names ["foo"] t
|
||||
|
||||
expect_column_names ["bar", "foo"] <|
|
||||
table.select_columns [By_Name "BAR", By_Name "foo", By_Name "bar"] reorder=True
|
||||
table.select_columns ["BAR", "foo", "bar"] Case_Sensitivity.Insensitive reorder=True
|
||||
|
||||
Test.specify "should correctly handle problems: unmatched names" <|
|
||||
weird_name = '.*?-!@#!"'
|
||||
@ -139,17 +138,17 @@ spec setup =
|
||||
Test.group prefix+"Table.remove_columns" <|
|
||||
Test.specify "should work as shown in the doc examples" <|
|
||||
expect_column_names ["Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] <| table.remove_columns ["bar", "foo"]
|
||||
expect_column_names ["foo", "ab.+123", "abcd123"] <| table.remove_columns [By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True, By_Name "b.*" Case_Sensitivity.Insensitive use_regex=True]
|
||||
expect_column_names ["foo", "ab.+123", "abcd123"] <| table.remove_columns ["foo.+".to_regex, "b.*".to_regex] Case_Sensitivity.Insensitive
|
||||
expect_column_names ["Baz", "foo 1", "foo 2", "ab.+123"] <| table.remove_columns [-1, 0, 1]
|
||||
|
||||
Test.specify "should correctly handle regex matching" <|
|
||||
last_ones = table.columns.drop 1 . map .name
|
||||
expect_column_names last_ones <| table.remove_columns [By_Name "foo" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names last_ones <| table.remove_columns ["foo".to_regex]
|
||||
first_ones = ["foo", "bar", "Baz", "foo 1", "foo 2"]
|
||||
expect_column_names first_ones <| table.remove_columns [By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names first_ones <| table.remove_columns [By_Name "ab.+123" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names first_ones+["abcd123"] <| table.remove_columns [By_Name "ab.+123"]
|
||||
expect_column_names first_ones+["ab.+123"] <| table.remove_columns [By_Name "abcd123" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names first_ones <| table.remove_columns ["a.*".to_regex]
|
||||
expect_column_names first_ones <| table.remove_columns ["ab.+123".to_regex]
|
||||
expect_column_names first_ones+["abcd123"] <| table.remove_columns ["ab.+123"] Case_Sensitivity.Insensitive
|
||||
expect_column_names first_ones+["ab.+123"] <| table.remove_columns ["abcd123".to_regex]
|
||||
|
||||
Test.specify "should allow negative indices" <|
|
||||
expect_column_names ["Baz", "foo 1", "ab.+123"] <| table.remove_columns [-1, -3, 0, 1]
|
||||
@ -161,10 +160,10 @@ spec setup =
|
||||
col2 = ["bar", [4,5,6]]
|
||||
col3 = ["Bar", [7,8,9]]
|
||||
table_builder [col1, col2, col3]
|
||||
expect_column_names ["foo"] <| table.remove_columns (By_Name "bar")
|
||||
expect_column_names ["foo"] <| table.remove_columns "bar" Case_Sensitivity.Insensitive
|
||||
|
||||
Test.specify "should correctly handle regexes matching multiple names" <|
|
||||
expect_column_names ["Baz", "ab.+123", "abcd123"] <| table.remove_columns [By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True, By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names ["Baz", "ab.+123", "abcd123"] <| table.remove_columns ["f.+".to_regex, "b.*".to_regex]
|
||||
|
||||
Test.specify "should correctly handle problems: out of bounds indices" <|
|
||||
selector = [1, 0, 100, -200, 300]
|
||||
@ -192,8 +191,8 @@ spec setup =
|
||||
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] t
|
||||
|
||||
Test.specify "should correctly handle edge-cases: duplicate matches due to case insensitivity" <|
|
||||
selector = [By_Name "FOO", By_Name "foo"]
|
||||
t = table.remove_columns selector on_problems=Problem_Behavior.Report_Error
|
||||
selector = ["FOO", "foo"]
|
||||
t = table.remove_columns selector Case_Sensitivity.Insensitive on_problems=Problem_Behavior.Report_Error
|
||||
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] t
|
||||
|
||||
Test.specify "should correctly handle problems: unmatched names" <|
|
||||
@ -209,11 +208,11 @@ spec setup =
|
||||
|
||||
Test.specify "should correctly handle problems: no columns in the output" <|
|
||||
[Problem_Behavior.Ignore, Problem_Behavior.Report_Warning, Problem_Behavior.Report_Error].each pb->
|
||||
selector = [By_Name ".*" Case_Sensitivity.Sensitive use_regex=True]
|
||||
selector = [".*".to_regex]
|
||||
t = table.remove_columns selector on_problems=pb
|
||||
t.should_fail_with No_Output_Columns
|
||||
|
||||
selector_2 = [By_Name ".*" Case_Sensitivity.Sensitive use_regex=True, By_Name "hmmm" Case_Sensitivity.Sensitive use_regex=True]
|
||||
selector_2 = [".*".to_regex, "hmmm".to_regex]
|
||||
t1 = table.remove_columns selector_2
|
||||
t1.should_fail_with No_Output_Columns
|
||||
|
||||
@ -221,17 +220,17 @@ spec setup =
|
||||
Test.specify "should work as shown in the doc examples" <|
|
||||
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns "foo" Position.After_Other_Columns
|
||||
expect_column_names ["Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo", "bar"] <| table.reorder_columns ["foo", "bar"] Position.After_Other_Columns
|
||||
expect_column_names ["foo 1", "foo 2", "bar", "Baz", "foo", "ab.+123", "abcd123"] <| table.reorder_columns [By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True, By_Name "b.*" Case_Sensitivity.Insensitive use_regex=True]
|
||||
expect_column_names ["foo 1", "foo 2", "bar", "Baz", "foo", "ab.+123", "abcd123"] <| table.reorder_columns ["foo.+".to_regex, "b.*".to_regex] case_sensitivity=Case_Sensitivity.Insensitive
|
||||
expect_column_names ["bar", "foo", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] <| table.reorder_columns [1, 0] Position.Before_Other_Columns
|
||||
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns [0] Position.After_Other_Columns
|
||||
|
||||
Test.specify "should correctly handle regex matching" <|
|
||||
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns [By_Name "foo" Case_Sensitivity.Sensitive use_regex=True] Position.After_Other_Columns
|
||||
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns ["foo".to_regex] Position.After_Other_Columns
|
||||
rest = ["foo", "bar", "Baz", "foo 1", "foo 2"]
|
||||
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns [By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns [By_Name "ab.+123" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns ["a.*".to_regex]
|
||||
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns ["ab.+123".to_regex]
|
||||
expect_column_names ["ab.+123"]+rest+["abcd123"] <| table.reorder_columns ["ab.+123"]
|
||||
expect_column_names ["abcd123"]+rest+["ab.+123"] <| table.reorder_columns [By_Name "abcd123" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names ["abcd123"]+rest+["ab.+123"] <| table.reorder_columns ["abcd123".to_regex]
|
||||
|
||||
Test.specify "should allow negative indices" <|
|
||||
expect_column_names ["abcd123", "foo 2", "foo", "bar", "Baz", "foo 1", "ab.+123"] <| table.reorder_columns [-1, -3, 0, 1]
|
||||
@ -243,10 +242,10 @@ spec setup =
|
||||
col2 = ["bar", [4,5,6]]
|
||||
col3 = ["Bar", [7,8,9]]
|
||||
table_builder [col1, col2, col3]
|
||||
expect_column_names ["bar", "Bar", "foo"] <| table.reorder_columns [By_Name "bar"]
|
||||
expect_column_names ["bar", "Bar", "foo"] <| table.reorder_columns ["bar"] case_sensitivity=Case_Sensitivity.Insensitive
|
||||
|
||||
Test.specify "should correctly handle regexes matching multiple names" <|
|
||||
expect_column_names ["bar", "foo", "foo 1", "foo 2", "Baz", "ab.+123", "abcd123"] <| table.reorder_columns [By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True, By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True]
|
||||
expect_column_names ["bar", "foo", "foo 1", "foo 2", "Baz", "ab.+123", "abcd123"] <| table.reorder_columns ["b.*".to_regex, "f.+".to_regex]
|
||||
|
||||
Test.specify "should correctly handle problems: out of bounds indices" <|
|
||||
selector = [1, 0, 100, -200, 300]
|
||||
@ -329,7 +328,7 @@ spec setup =
|
||||
|
||||
t1 = table_builder [["alpha", [1]], ["name=123", [2]], ["name= foo bar", [3]]]
|
||||
expect_column_names ["alpha", "key:123", "key: foo bar"] <|
|
||||
t1.rename_columns (Map.from_vector [[By_Name "name=(.*)" Case_Sensitivity.Sensitive use_regex=True, "key:$1"]])
|
||||
t1.rename_columns (Map.from_vector [["name=(.*)".to_regex, "key:$1"]])
|
||||
|
||||
Test.specify "should work by index" <|
|
||||
map = Map.from_vector [[0, "FirstColumn"], [-2, "Another"]]
|
||||
@ -357,17 +356,17 @@ spec setup =
|
||||
table.rename_columns map
|
||||
|
||||
Test.specify "should work by name case-insensitively" <|
|
||||
map = Map.from_vector [[By_Name "ALPHA", "FirstColumn"], [By_Name "DELTA", "Another"]]
|
||||
map = Map.from_vector [["ALPHA", "FirstColumn"], ["DELTA", "Another"]]
|
||||
expect_column_names ["FirstColumn", "beta", "gamma", "Another"] <|
|
||||
table.rename_columns map
|
||||
table.rename_columns map Case_Sensitivity.Insensitive
|
||||
|
||||
Test.specify "should work by name using regex" <|
|
||||
map = Map.from_vector [[By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True, "FirstColumn"]]
|
||||
map = Map.from_vector [["a.*".to_regex, "FirstColumn"]]
|
||||
expect_column_names ["FirstColumn", "beta", "gamma", "delta"] <|
|
||||
table.rename_columns map
|
||||
|
||||
Test.specify "should work by name using regex substitution" <|
|
||||
map = Map.from_vector [[By_Name "a(.*)" Case_Sensitivity.Sensitive use_regex=True, "$1"]]
|
||||
map = Map.from_vector [["a(.*)".to_regex, "$1"]]
|
||||
expect_column_names ["lpha", "beta", "gamma", "delta"] <|
|
||||
table.rename_columns map
|
||||
|
||||
@ -411,12 +410,12 @@ spec setup =
|
||||
|
||||
Test.specify "should correctly handle edge-cases: aliased selectors" <|
|
||||
t = table_builder [["alpha", [1,2,3]], ["bet", [4,5,6]]]
|
||||
map1 = Map.from_vector [[By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True, "AA"], [By_Name ".*a" Case_Sensitivity.Sensitive use_regex=True, "AA"]]
|
||||
map1 = Map.from_vector [["a.*".to_regex, "AA"], [".*a".to_regex, "AA"]]
|
||||
t1 = t.rename_columns map1 on_problems=Problem_Behavior.Report_Error
|
||||
Problems.assume_no_problems t1
|
||||
expect_column_names ["AA", "bet"] t1
|
||||
|
||||
map2 = Map.from_vector [[By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True, "StartsWithA"], [By_Name ".*a" Case_Sensitivity.Sensitive use_regex=True, "EndsWithA"]]
|
||||
map2 = Map.from_vector [["a.*".to_regex, "StartsWithA"], [".*a".to_regex, "EndsWithA"]]
|
||||
t2 = t.rename_columns map2 on_problems=Problem_Behavior.Report_Error
|
||||
t2.should_fail_with Ambiguous_Column_Rename
|
||||
err = t2.catch
|
||||
@ -429,7 +428,7 @@ spec setup =
|
||||
This is to show that even if distinct rename patterns match the
|
||||
same column, if the resulting rename is unambiguous, no error is
|
||||
raised.
|
||||
map3 = Map.from_vector [[By_Name "a(.*)" Case_Sensitivity.Sensitive use_regex=True, "$1A"], [By_Name "(.*)aa" Case_Sensitivity.Sensitive use_regex=True, "$1aA"]]
|
||||
map3 = Map.from_vector [["a(.*)".to_regex, "$1A"], ["(.*)aa".to_regex, "$1aA"]]
|
||||
t4 = t3.rename_columns map3 on_problems=Problem_Behavior.Report_Error
|
||||
Problems.assume_no_problems t4
|
||||
expect_column_names ["aaA", "bbb"] t4
|
||||
|
@ -1,7 +1,7 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
from Standard.Table import Table, Data_Formatter, Column, Column_Selector
|
||||
from Standard.Table import Table, Data_Formatter, Column
|
||||
from Standard.Table.Data.Type.Value_Type import Value_Type, Auto
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
@ -309,7 +309,7 @@ spec =
|
||||
|
||||
Test.specify "should allow selecting columns by regex" <|
|
||||
t1 = Table.new [["An", ["1", "2", "3"]], ["Am", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]]
|
||||
r1 = t1.parse columns=[Column_Selector.By_Name "A.*" use_regex=True]
|
||||
r1 = t1.parse columns="A.*".to_regex
|
||||
r1.at "An" . to_vector . should_equal [1, 2, 3]
|
||||
r1.at "Am" . to_vector . should_equal [4, 5, 6]
|
||||
r1.at "C" . to_vector . should_equal ["7", "8", "9"]
|
||||
@ -321,9 +321,10 @@ spec =
|
||||
r1.should_fail_with Missing_Input_Columns
|
||||
r1.catch.criteria . should_equal ["B", "C", "E"]
|
||||
|
||||
r2 = t1.parse columns=[Column_Selector.By_Name "A.+" use_regex=True]
|
||||
regex = "A.+".to_regex case_insensitive=True
|
||||
r2 = t1.parse columns=regex
|
||||
r2.should_fail_with Missing_Input_Columns
|
||||
r2.catch.criteria . should_equal ["A.+"]
|
||||
r2.catch.criteria . should_equal [regex]
|
||||
|
||||
action = t1.parse columns=["A", "B", "C", "E"] error_on_missing_columns=False on_problems=_
|
||||
tester table =
|
||||
@ -345,7 +346,7 @@ spec =
|
||||
|
||||
Test.specify "should allow mixed column selectors" <|
|
||||
t1 = Table.new [["Am", ["1", "2", "3"]], ["B", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]]
|
||||
r1 = t1.parse columns=[(Column_Selector.By_Name "A.*" use_regex=True), -2, "D"]
|
||||
r1 = t1.parse columns=["A.*".to_regex, -2, "D"]
|
||||
r1.at "Am" . to_vector . should_equal [1, 2, 3]
|
||||
r1.at "B" . to_vector . should_equal ["4", "5", "6"]
|
||||
r1.at "C" . to_vector . should_equal [7, 8, 9]
|
||||
@ -353,7 +354,7 @@ spec =
|
||||
|
||||
Test.specify "should handle edge-cases: overlapping selectors" <|
|
||||
t1 = Table.new [["Am", ["1", "2", "3"]], ["B", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]]
|
||||
r1 = t1.parse columns=[(Column_Selector.By_Name "A.*" use_regex=True), 0, "D", -1, -1, 0, 3]
|
||||
r1 = t1.parse columns=["A.*".to_regex, 0, "D", -1, -1, 0, 3]
|
||||
r1.at "Am" . to_vector . should_equal [1, 2, 3]
|
||||
r1.at "B" . to_vector . should_equal ["4", "5", "6"]
|
||||
r1.at "C" . to_vector . should_equal ["7", "8", "9"]
|
||||
|
@ -4,7 +4,7 @@ import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
||||
import Standard.Base.Errors.Common.Type_Error
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
from Standard.Table import Table, Column, Sort_Column, Column_Selector, Aggregate_Column
|
||||
from Standard.Table import Table, Column, Sort_Column, Aggregate_Column
|
||||
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all hiding First, Last
|
||||
import Standard.Table.Data.Type.Value_Type.Value_Type
|
||||
from Standard.Table.Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, No_Input_Columns_Selected, Missing_Input_Columns, No_Such_Column, Floating_Point_Equality, Invalid_Value_Type, Row_Count_Mismatch
|
||||
@ -315,15 +315,9 @@ spec =
|
||||
t2.at "Y" . to_vector . should_equal ['A', 0]
|
||||
|
||||
t3 = Table.new [["X", [1, 2, 3]], ["Y", ["", Nothing, Number.nan]]]
|
||||
t4 = t3.remove_columns (Column_Selector.Blank_Columns treat_nans_as_blank=True)
|
||||
t4 = t3.remove_blank_columns treat_nans_as_blank=True
|
||||
t4.columns . map .name . should_equal ["X"]
|
||||
|
||||
Test.specify "Blank_Columns selector should work for all kinds of methods accepting Column_Selector" <|
|
||||
t = Table.new [["X", [1, 2, 3, 4, 5]], ["Y", ["", Nothing, Nothing, Number.nan, ""]]]
|
||||
r1 = t.distinct (Column_Selector.Blank_Columns treat_nans_as_blank=True)
|
||||
r1.at "Y" . to_vector . to_text . should_equal "[, Nothing, NaN]"
|
||||
r1.at "X" . to_vector . should_equal [1, 2, 4]
|
||||
|
||||
Test.group "Info" <|
|
||||
Test.specify "should return Table information" <|
|
||||
a = ["strs", ["a", "b", Nothing, "a"]]
|
||||
|
Loading…
Reference in New Issue
Block a user