Add drop down for replace, remove Column_Selector (#7295)

- Add dropdowns for `replace` functions.
- Retire `Column_Selector` type.
- Add `select_blank_columns` and `remove_blank_columns` functions to table types.
- Allow Regex to be used to pick columns.
This commit is contained in:
James Dunkerley 2023-07-14 17:30:52 +00:00 committed by GitHub
parent 3273ab654d
commit aaa235fbad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 458 additions and 369 deletions

View File

@ -520,6 +520,7 @@
- [Improving date/time support in Table - added `date_diff`, `date_add`,
`date_part` and some shorthands. Extended `Time_Period` with milli-, micro-
and nanosecond periods.][7221]
- [Retire `Column_Selector` and allow regex based selection of columns.][7295]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -749,6 +750,7 @@
[7223]: https://github.com/enso-org/enso/pull/7223
[7234]: https://github.com/enso-org/enso/pull/7234
[7221]: https://github.com/enso-org/enso/pull/7221
[7295]: https://github.com/enso-org/enso/pull/7295
#### Enso Compiler

View File

@ -60,13 +60,3 @@ type Case_Sensitivity
to_explicit_sensitivity_in_memory self = case self of
Case_Sensitivity.Default -> Case_Sensitivity.Sensitive
_ -> self
## PRIVATE
Create matcher function
create_match_function : Boolean -> (Text -> Text -> Boolean)
create_match_function self use_regex=False = case use_regex of
True -> (name-> pattern-> Regex.compile pattern case_insensitive=self.is_case_insensitive_in_memory . matches name)
False -> case self of
Case_Sensitivity.Default -> (==)
Case_Sensitivity.Sensitive -> (==)
Case_Sensitivity.Insensitive locale -> (name-> criterion-> name.equals_ignore_case criterion locale)

View File

@ -35,7 +35,7 @@ from project.Data.Boolean import Boolean, False, True
from project.Data.Json import Invalid_JSON, JS_Object, Json
from project.Data.Numbers import Decimal, Integer, Number, Number_Parse_Error
from project.Data.Range.Extensions import all
from project.Widget_Helpers import make_date_format_selector, make_date_time_format_selector, make_delimiter_selector, make_time_format_selector
from project.Widget_Helpers import make_date_format_selector, make_date_time_format_selector, make_delimiter_selector, make_regex_text_widget, make_time_format_selector
polyglot java import com.ibm.icu.lang.UCharacter
polyglot java import com.ibm.icu.text.BreakIterator
@ -477,8 +477,9 @@ Text.tokenize self pattern="." case_sensitivity=Case_Sensitivity.Sensitive =
Regexp replace.
'<a href="url">content</a>'.replace '<a href="(.*?)">(.*?)</a>'.to_regex '$2 is at $1'== 'content is at url'
Text.replace : Text | Regex -> Text-> Case_Sensitivity -> Boolean -> Text ! Illegal_Argument
Text.replace self term replacement case_sensitivity=Case_Sensitivity.Default only_first=False =
@term make_regex_text_widget
Text.replace : Text | Regex -> Text -> Case_Sensitivity -> Boolean -> Text ! Illegal_Argument
Text.replace self term:(Text | Regex) replacement:Text (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default) only_first:Boolean=False =
case term of
_ : Text -> if term.is_empty then self else
array_from_single_result result = case result of
@ -496,11 +497,7 @@ Text.replace self term replacement case_sensitivity=Case_Sensitivity.Default onl
Text_Utils.span_of_case_insensitive self term locale.java_locale False
Text_Utils.replace_spans self spans_array replacement
_ : Regex ->
updated_regex = case case_sensitivity of
Case_Sensitivity.Default -> term
_ ->
case_insensitive = case_sensitivity.is_case_insensitive_in_memory
term.recompile case_insensitive
updated_regex = term.recompile case_sensitivity
updated_regex.replace self replacement only_first
## ALIAS Get Words

View File

@ -3,6 +3,7 @@ import project.Data.Filter_Condition.Filter_Condition
import project.Data.Map.Map
import project.Data.Numbers.Integer
import project.Data.Range.Range
import project.Data.Text.Case_Sensitivity.Case_Sensitivity
import project.Data.Text.Helpers
import project.Data.Text.Prim_Text_Helper
import project.Data.Text.Regex.Internal.Match_Iterator.Match_Iterator
@ -370,11 +371,14 @@ type Regex
Recompile the underlying regex string; used to change the
case-sensitivity of a compiled Regex.
recompile : Boolean | Nothing -> Regex ! Regex_Syntax_Error | Illegal_Argument
recompile self case_insensitive=False =
should_recompile = self.case_insensitive != case_insensitive
if should_recompile.not then self else
Regex.compile self.internal_regex_object.pattern case_insensitive
recompile : Case_Sensitivity -> Regex ! Regex_Syntax_Error | Illegal_Argument
recompile self case_sensitivity:Case_Sensitivity = case case_sensitivity of
Case_Sensitivity.Default -> self
_ ->
case_insensitive = case_sensitivity.is_case_insensitive_in_memory
should_recompile = self.case_insensitive != case_insensitive
if should_recompile.not then self else
Regex.compile self.internal_regex_object.pattern case_insensitive
## PRIVATE
Convert the polyglot map to a Map.

View File

@ -10,9 +10,9 @@ polyglot java import java.time.temporal.ChronoUnit
polyglot java import java.time.temporal.TemporalAdjuster
polyglot java import java.time.temporal.TemporalAdjusters
polyglot java import java.time.temporal.TemporalUnit
polyglot java import org.enso.base.Time_Utils
polyglot java import org.enso.base.time.Date_Period_Utils
polyglot java import org.enso.base.time.CustomTemporalUnits
polyglot java import org.enso.base.time.Date_Period_Utils
polyglot java import org.enso.base.Time_Utils
## Represents a unit of time longer on the scale of days (longer than a day).
type Date_Period

View File

@ -5,8 +5,8 @@ from project.Data.Boolean import Boolean, False, True
polyglot java import java.time.temporal.ChronoUnit
polyglot java import java.time.temporal.TemporalUnit
polyglot java import org.enso.base.Time_Utils
polyglot java import org.enso.base.time.CustomTemporalUnits
polyglot java import org.enso.base.Time_Utils
## Represents a unit of time of a day or shorter.
type Time_Period

View File

@ -5,6 +5,12 @@ import project.Data.Time.Time_Of_Day.Time_Of_Day
import project.Metadata.Widget
from project.Metadata import make_single_choice
## PRIVATE
Creates a Regex / Text Widget for search and replace.
make_regex_text_widget : Widget
make_regex_text_widget =
make_single_choice [["Text", '""'], ["Regular Expression", '(Regex.compile "^$")']]
## PRIVATE
Creates a Single_Choice Widget for delimiters.
make_delimiter_selector : Widget

View File

@ -1,8 +1,8 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Internal.Rounding_Helpers
from Standard.Base.Widget_Helpers import make_regex_text_widget
import Standard.Table.Data.Column.Column as Materialized_Column
import Standard.Table.Data.Type.Enso_Types
@ -1124,7 +1124,6 @@ type Column
- case_sensitivity: Specifies if the text values should be compared case
sensitively.
- only_first: If True, only replace the first match.
- use_regex: If true, the term is used as a regular expression.
> Example
Replace dashes with underscores.
@ -1140,7 +1139,8 @@ type Column
Replace texts in quotes with parentheses.
column.replace '"(.*?)"'.to_regex '($1)'
replace : Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
@term make_regex_text_widget
replace : Text | Regex | Column -> Text | Column -> Case_Sensitivity -> Boolean -> Column
replace self term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False =
_ = [term, new_text, case_sensitivity, only_first]
msg = "`Column.replace` is not yet implemented."

View File

@ -3,7 +3,7 @@ import Standard.Base.Errors.Unimplemented.Unimplemented
import Standard.Table.Internal.Naming_Helpers.Naming_Helpers
import Standard.Table.Internal.Problem_Builder.Problem_Builder
from Standard.Table import Aggregate_Column, Column_Selector, Join_Kind, Value_Type
from Standard.Table import Aggregate_Column, Join_Kind, Value_Type
import project.Connection.Connection.Connection
import project.Data.SQL.Builder
@ -255,6 +255,6 @@ default_fetch_primary_key connection table_name =
rs = metadata.getPrimaryKeys Nothing Nothing table_name
keys_table = result_set_to_table rs connection.dialect.make_column_fetcher_for_type
# The names of the columns are sometimes lowercase and sometimes uppercase, so we do a case insensitive select first.
selected = keys_table.select_columns [Column_Selector.By_Name "COLUMN_NAME", Column_Selector.By_Name "KEY_SEQ"] reorder=True
selected = keys_table.select_columns ["COLUMN_NAME", "KEY_SEQ"] case_sensitivity=Case_Sensitivity.Insensitive reorder=True
key_column_names = selected.order_by 1 . at 0 . to_vector
if key_column_names.is_empty then Nothing else key_column_names

View File

@ -28,7 +28,7 @@ import Standard.Table.Internal.Table_Helpers
import Standard.Table.Internal.Table_Helpers.Table_Column_Helper
import Standard.Table.Internal.Unique_Name_Strategy.Unique_Name_Strategy
import Standard.Table.Internal.Widget_Helpers
from Standard.Table import Aggregate_Column, Auto, Column_Selector, Data_Formatter, Match_Columns, Position, Set_Mode, Sort_Column, Value_Type
from Standard.Table import Aggregate_Column, Auto, Data_Formatter, Match_Columns, Position, Set_Mode, Sort_Column, Value_Type
from Standard.Table.Data.Column import get_item_string, normalize_string_for_display
from Standard.Table.Data.Table import print_table
from Standard.Table.Errors import all
@ -147,8 +147,10 @@ type Table
dropped from the output.
Arguments:
- columns: Column selection criteria - a single instance or Vector of
names, indexes or `Column_Selector`.
- columns: Specifies columns by a single instance or Vector of names,
indexes or regular expressions to match names.
- case_sensitivity: Controls whether to be case sensitive when matching
column names.
- reorder: By default, or if set to `False`, columns in the output will
be in the same order as in the input table. If `True`, the order in the
output table will match the order in the columns list. If a column is
@ -182,16 +184,16 @@ type Table
> Example
Select columns matching a regular expression.
table.select_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
table.select_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive
> Example
Select the first two columns and the last column, moving the last one to front.
table.select_columns [-1, 0, 1] reorder=True
@columns Widget_Helpers.make_column_name_vector_selector
select_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
select_columns self (columns = [self.columns.first.name]) (reorder = False) (error_on_missing_columns = True) (on_problems = Report_Warning) =
new_columns = self.columns_helper.select_columns selectors=columns reorder=reorder error_on_missing_columns=error_on_missing_columns on_problems=on_problems
select_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
select_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (reorder:Boolean=False) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) =
new_columns = self.columns_helper.select_columns columns case_sensitivity reorder error_on_missing_columns on_problems
self.updated_columns new_columns
## ALIAS drop_columns
@ -201,8 +203,10 @@ type Table
input.
Arguments:
- columns: Column selection criteria - a single instance or Vector of
names, indexes or `Column_Selector`, which are to be removed.
- columns: Specifies columns by a name, index or regular expression to
match names, or a Vector of these.
- case_sensitivity: Controls whether to be case sensitive when matching
column names.
- error_on_missing_columns: Specifies if a missing input column should
result in an error regardless of the `on_problems` settings. Defaults
to `False`.
@ -232,27 +236,80 @@ type Table
> Example
Remove columns matching a regular expression.
table.remove_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
table.remove_columns "foo.+".to_regex Case_Sensitivity.Insensitive
> Example
Remove the first two columns and the last column.
table.remove_columns [-1, 0, 1]
@columns Widget_Helpers.make_column_name_vector_selector
remove_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
remove_columns self (columns = [self.columns.first.name]) (error_on_missing_columns = False) (on_problems = Report_Warning) =
new_columns = self.columns_helper.remove_columns selectors=columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
remove_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
remove_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) =
new_columns = self.columns_helper.remove_columns columns case_sensitivity error_on_missing_columns=error_on_missing_columns on_problems=on_problems
self.updated_columns new_columns
## ALIAS select_na
ALIAS select_missing_columns
Select columns which are either all blank or contain blank values. If no
rows are present, all columns are considered blank.
Arguments:
- when_any: By default, only columns consisting of all blank cells are
selected. If set to `True`, columns with one or more blank values are
selected.
- treat_nans_as_blank: specified whether `Number.nan` is considered as
blank. By default, it is not.
? Blank values
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
> Example
Select completely blank columns from a table.
table.select_blank_columns
select_blank_columns : Boolean -> Boolean -> Table
select_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) =
new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank
if new_columns.length == 0 then Error.throw (No_Output_Columns) else
self.updated_columns new_columns
## ALIAS drop_na
ALIAS drop_missing_columns
Remove columns which are either all blank or contain blank values. If no
rows are present, all columns are considered blank.
Arguments:
- when_any: By default, only columns consisting of all blank cells are
selected. If set to `True`, columns with one or more blank values are
selected.
- treat_nans_as_blank: specified whether `Number.nan` is considered as
blank. By default, it is not.
? Blank values
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
> Example
Remove completely blank columns from a table.
table.remove_blank_columns
remove_blank_columns : Boolean -> Boolean -> Table
remove_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) =
new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank invert_selection=True
if new_columns.length == 0 then Error.throw (No_Output_Columns) else
self.updated_columns new_columns
## Returns a new table with the specified selection of columns moved to
either the start or the end in the specified order.
Arguments:
- columns: Column selection criteria - a single instance or Vector of
names, indexes or `Column_Selector`, which should be reordered and
specifying their order.
- columns: Specifies columns by a name, index or regular expression to
match names, or a Vector of these.
- position: Specifies how to place the selected columns in relation to
the remaining columns which were not matched by `columns` (if any).
- case_sensitivity: Controls whether to be case sensitive when matching
column names.
- error_on_missing_columns: Specifies if a missing input column should
result in an error regardless of the `on_problems` settings. Defaults
to `False`.
@ -279,7 +336,7 @@ type Table
> Example
Move columns matching a regular expression to front, keeping columns matching "foo.+" before columns matching "b.*".
table.reorder_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
table.reorder_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive
> Example
Swap the first two columns.
@ -291,9 +348,9 @@ type Table
table.reorder_columns [0] position=Position.After_Other_Columns
@columns Widget_Helpers.make_column_name_vector_selector
reorder_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Position -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns
reorder_columns self (columns = [self.columns.first.name]) (position = Position.Before_Other_Columns) (error_on_missing_columns = False) (on_problems = Report_Warning) =
new_columns = self.columns_helper.reorder_columns selectors=columns position=position error_on_missing_columns on_problems=on_problems
reorder_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Position -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns
reorder_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (position:Position=Position.Before_Other_Columns) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) =
new_columns = self.columns_helper.reorder_columns columns position case_sensitivity error_on_missing_columns on_problems
self.updated_columns new_columns
## Returns a new table with the columns sorted by name according to the
@ -328,7 +385,10 @@ type Table
Arguments:
- column_map: Mapping from old column names to new or a vector of new
column names to apply by position.
column names to apply by position. `Regex` objects can be used
within the mapping to do pattern based renaming.
- case_sensitivity: Controls whether to be case sensitive when matching
column names.
- error_on_missing_columns: Specifies if a missing input column should
result in an error regardless of the `on_problems` settings. Defaults
to `True`.
@ -379,12 +439,11 @@ type Table
> Example
For all columns starting with the prefix `name=`, replace it with `key:`.
by_name = Column_Selector.By_Name "name=(.*)" Case_Sensitivity.Sensitive use_regex=True
table.rename_columns (Map.from_vector [[by_name, "key:$1"]])
table.rename_columns (Map.from_vector [["name=(.*)".to_regex, "key:$1"]])
@column_map Widget_Helpers.make_rename_name_vector_selector
rename_columns : Map (Text | Integer | Column_Selector) Text | Vector Text | Vector Vector -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names
rename_columns self column_map=["Column"] (error_on_missing_columns=True) (on_problems=Report_Warning) =
new_names = Table_Helpers.rename_columns internal_columns=self.internal_columns mapping=column_map error_on_missing_columns=error_on_missing_columns on_problems=on_problems
rename_columns : Map (Text | Integer | Regex) Text | Vector Text | Vector Vector -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names
rename_columns self (column_map:(Map | Vector)=["Column"]) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) =
new_names = Table_Helpers.rename_columns self.internal_columns column_map case_sensitivity error_on_missing_columns on_problems
Warning.with_suspended new_names names->
self.updated_columns (self.internal_columns.map c-> c.rename (names.at c.name))
@ -566,10 +625,10 @@ type Table
problem is reported.
@group_by Widget_Helpers.make_column_name_vector_selector
@order_by Widget_Helpers.make_order_by_selector
add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Column_Selector) | Text | Integer -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
add_row_number self (name:Text = "Row") (from:Integer = 1) (step:Integer = 1) group_by=[] order_by=[] on_problems=Problem_Behavior.Report_Warning =
add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
add_row_number self (name:Text="Row") (from:Integer=1) (step:Integer=1) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=Problem_Behavior.Report_Warning) =
problem_builder = Problem_Builder.new error_on_missing_columns=True
grouping_columns = self.columns_helper.select_columns_helper group_by True problem_builder
grouping_columns = self.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder
grouping_columns.each internal_column->
column = self.make_column internal_column
if column.value_type.is_floating_point then
@ -944,9 +1003,9 @@ type Table
`Floating_Point_Equality` is reported according to the `on_problems`
setting.
@columns Widget_Helpers.make_column_name_vector_selector
distinct : Vector (Integer | Text | Column_Selector) | Text | Integer -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
distinct : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
distinct self columns=self.column_names case_sensitivity=Case_Sensitivity.Default error_on_missing_columns=True on_problems=Report_Warning =
key_columns = self.columns_helper.select_columns selectors=columns reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _->
key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _->
Error.throw No_Input_Columns_Selected
problem_builder = Problem_Builder.new
new_table = self.connection.dialect.prepare_distinct self key_columns case_sensitivity problem_builder
@ -1494,7 +1553,7 @@ type Table
B | Name | Another
B | Country | Germany
@key_columns Widget_Helpers.make_column_name_vector_selector
transpose : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names
transpose : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names
transpose self key_columns=[] (attribute_column_name="Name") (value_column_name="Value") (error_on_missing_columns=True) (on_problems = Report_Warning) =
## Avoid unused arguments warning. We cannot rename arguments to `_`,
because we need to keep the API consistent with the in-memory table.
@ -1552,7 +1611,7 @@ type Table
@group_by Widget_Helpers.make_column_name_vector_selector
@name_column Widget_Helpers.make_column_name_selector
@values (Widget_Helpers.make_aggregate_column_selector include_group_by=False)
cross_tab : Vector (Integer | Text | Column_Selector | Aggregate_Column) | Text | Integer -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings
cross_tab : Vector (Integer | Text | Regex | Aggregate_Column) | Text | Integer | Regex -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings
cross_tab self group_by name_column values=Aggregate_Column.Count (on_problems=Report_Warning) =
## Avoid unused arguments warning. We cannot rename arguments to `_`,
because we need to keep the API consistent with the in-memory table.
@ -1610,9 +1669,9 @@ type Table
table.parse "birthday" Value_Type.Date
@type (Widget_Helpers.parse_type_selector include_auto=False)
@columns Widget_Helpers.make_column_name_vector_selector
parse : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table
parse : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table
parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type format=Nothing error_on_missing_columns=True on_problems=Report_Warning =
selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False . map self.make_column
selected.fold self table-> column_to_parse->
new_column = column_to_parse.parse type format on_problems
table.set new_column new_name=column_to_parse.name set_mode=Set_Mode.Update
@ -1779,9 +1838,9 @@ type Table
types. Due to this, a Mixed column containing values `[2, "3"]` will
actually be converted into `[2, Nothing]` when casting to Integer type.
@columns Widget_Helpers.make_column_name_vector_selector
cast : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
cast : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False . map self.make_column
selected.fold self table-> column_to_cast->
new_column = column_to_cast.cast value_type on_problems
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
@ -2022,7 +2081,8 @@ type Table
been replaced with the provided default(s).
Arguments:
- columns: The column(s) to fill missing values of.
- columns: Specifies columns by a name, index or regular expression to
match names, or a Vector of these.
- default: The value to replace missing values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
@ -2032,7 +2092,7 @@ type Table
fill_nothing = table.fill_nothing ["col0", "col1"] 20.5
@columns Widget_Helpers.make_column_name_vector_selector
fill_nothing : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table
fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table
fill_nothing self columns default =
transformer col = col.fill_nothing default
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
@ -2040,10 +2100,11 @@ type Table
## ALIAS Fill Empty, if_empty
Returns a new column where empty Text values have been replaced with the
provided default(s).
provided default.
Arguments:
- columns: The column(s) to fill empty values.
- columns: Specifies columns by a name, index or regular expression to
match names, or a Vector of these.
- default: The value to replace empty values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
@ -2053,7 +2114,7 @@ type Table
fill_empty = table.fill_empty ["col0", "col1"] "hello"
@columns Widget_Helpers.make_column_name_vector_selector
fill_empty : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table
fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table
fill_empty self columns default =
transformer col = col.fill_empty default
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
@ -2062,11 +2123,11 @@ type Table
row of the specified column. If `term` is empty, the function returns the
table unchanged.
This method follows the exact replacement semantics of the
`Text.replace` method.
This method follows the exact replacement semantics of `Text.replace`.
Arguments:
- columns: The column(s) to replace values on.
- columns: Specifies columns by a name, index or regular expression to
match names, or a Vector of these.
- term: The term to find. Can be `Text`, `Regex`, or a `Column` of
strings.
- replacement: The text to replace matches with.
@ -2077,19 +2138,19 @@ type Table
> Example
Replace dashes with underscores.
table.replace "-" "_"
table.replace ["col0", "col1"] "-" "_"
> Example
Remove leading and trailing spaces from cells.
column.replace "^\s*(.*?)\s*$".to_regex "$1"
table.replace ["col.*".to_regex] "^\s*(.*?)\s*$".to_regex "$1"
> Example
Replace texts in quotes with parentheses.
column.replace '"(.*?)"'.to_regex '($1)'
column.replace ["col0"] '"(.*?)"'.to_regex '($1)'
@columns Widget_Helpers.make_column_name_vector_selector
replace : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
replace : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
replace self columns term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False =
_ = [columns, term, new_text, case_sensitivity, only_first]
Error.throw (Unsupported_Database_Operation.Error "Text replace is currently not supported in the database backend.")
@ -2101,7 +2162,8 @@ type Table
Arguments:
- connection: The connection to a database.
- table_name: The name of the table to get.
- columns: List of columns to fetch. Each column is represented by a pair of column name and its expected SQL Type.
- columns: List of columns to fetch. Each column is represented by a pair of
column name and its expected SQL Type.
- ctx: The context to use for the table.
make_table : Connection -> Text -> Vector -> Context -> Table
make_table connection table_name columns ctx =

View File

@ -2,7 +2,6 @@ from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Table.Internal.Widget_Helpers
from Standard.Table import Column_Selector
from Standard.Table.Errors import all
import project.Connection.Connection.Connection

View File

@ -3,7 +3,6 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Table.Data.Table.Table
import Standard.Table.Internal.Widget_Helpers
from Standard.Table import Column_Selector
from Standard.Table.Errors import all
import project.Connection.Connection.Connection

View File

@ -268,8 +268,8 @@ make_is_in_column arguments = case arguments.length of
_ -> Error.throw <| Illegal_State.Error ("The operation IS_IN_COLUMN requires at exactly 3 arguments: the expression, the IN subquery, the subquery checking for nulls.")
## PRIVATE
make_row_number : Vector Builder -> Builder
make_row_number arguments (metadata : Row_Number_Metadata) = if arguments.length < 3 then Error.throw (Illegal_State.Error "Wrong amount of parameters in ROW_NUMBER IR. This is a bug in the Database library.") else
make_row_number : Vector Builder -> Row_Number_Metadata -> Builder
make_row_number (arguments : Vector) (metadata : Row_Number_Metadata) = if arguments.length < 3 then Error.throw (Illegal_State.Error "Wrong amount of parameters in ROW_NUMBER IR. This is a bug in the Database library.") else
offset = arguments.at 0
step = arguments.at 1

View File

@ -6,7 +6,7 @@ import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Runtime.Context
import Standard.Table.Data.Table.Table as In_Memory_Table
from Standard.Table import Aggregate_Column, Column_Selector, Join_Kind, Value_Type
from Standard.Table import Aggregate_Column, Join_Kind, Value_Type
from Standard.Table.Errors import all
import project.Connection.Connection.Connection

View File

@ -29,7 +29,7 @@
import Standard.Examples
example_drop_missing_cols =
Examples.inventory_table.remove (Column_Selector.Blank_Columns when_any=True)
Examples.inventory_table.remove_blank_columns when_any=True
> Example
Fill missing values in a column with the value 20.5.

View File

@ -1,7 +1,6 @@
from Standard.Base import all
import project.Data.Column.Column
import project.Data.Column_Selector.Column_Selector
import project.Data.Sort_Column.Sort_Column
## Defines an Aggregate Column
@ -32,7 +31,7 @@ type Aggregate_Column
multiple selection.
- new_name: name of new column.
- ignore_nothing: if all values are Nothing won't be included.
Count_Distinct (columns:(Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector | Column))=0) (new_name:Text="") (ignore_nothing:Boolean=False) # Column needed because of 6866
Count_Distinct (columns:(Text | Integer | Regex | Vector (Integer | Text | Regex | Column))=0) (new_name:Text="") (ignore_nothing:Boolean=False) # Column needed because of 6866
## ALIAS Count_Not_Null

View File

@ -5,6 +5,7 @@ import Standard.Base.Errors.Common.Index_Out_Of_Bounds
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Internal.Rounding_Helpers
from Standard.Base.Widget_Helpers import make_regex_text_widget
import project.Data.Data_Formatter.Data_Formatter
import project.Data.Table.Table
@ -1212,7 +1213,8 @@ type Column
Replace texts in quotes with parentheses.
column.replace '"(.*?)"'.to_regex '($1)'
replace : Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
@term make_regex_text_widget
replace : Text | Regex | Column -> Text | Column -> Case_Sensitivity -> Boolean -> Column
replace self term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False =
Value_Type.expect_text self <|
term_fn = wrap_text_or_regex_argument_as_value_provider term

View File

@ -1,32 +0,0 @@
from Standard.Base import all
## Specifies a selection of columns from the table on which an operation is
going to be performed.
type Column_Selector
## Selects columns based on their names.
It can do regex-based and case insensitive matching if requested.
It is possible for it to match multiple columns, in which case all the
matched ones will be included in the same relative order as in the table.
By_Name name:Text case_sensitivity:Case_Sensitivity=Case_Sensitivity.Insensitive use_regex:Boolean=False
## ALIAS dropna
ALIAS drop_missing_columns
Select columns which are either all blank or contain blank values.
Arguments:
- when_any: By default, only columns consisting of all blank cells are
selected. If set to `True`, columns containing at least one blank value
will be selected too. If there are no rows, the column is treated as
blank regardless of this argument.
- treat_nans_as_blank: If `True`, then `Number.nan` is considered as
blank.
? Blank values
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
> Example
Remove completely blank columns from a table.
table.remove_columns Column_Selector.Blank_Columns
Blank_Columns when_any:Boolean=False treat_nans_as_blank:Boolean=False

View File

@ -15,7 +15,6 @@ from Standard.Base.Widget_Helpers import make_delimiter_selector
import project.Data.Aggregate_Column.Aggregate_Column
import project.Data.Column as Column_Module
import project.Data.Column.Column
import project.Data.Column_Selector.Column_Selector
import project.Data.Data_Formatter.Data_Formatter
import project.Data.Expression.Expression
import project.Data.Expression.Expression_Error
@ -267,8 +266,10 @@ type Table
dropped from the output.
Arguments:
- columns: Column selection criteria - a single instance or Vector of
names, indexes or `Column_Selector`.
- columns: Specifies columns by a name, index or regular expression to
match names, or a Vector of these.
- case_sensitivity: Controls whether to be case sensitive when matching
column names.
- reorder: By default, or if set to `False`, columns in the output will
be in the same order as in the input table. If `True`, the order in the
output table will match the order in the columns list. If a column is
@ -303,27 +304,29 @@ type Table
> Example
Select columns matching a regular expression.
table.select_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
table.select_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive
> Example
Select the first two columns and the last column, moving the last one to front.
table.select_columns [-1, 0, 1] reorder=True
@columns Widget_Helpers.make_column_name_vector_selector
select_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
select_columns self columns=[self.columns.first.name] (reorder = False) (error_on_missing_columns = True) (on_problems = Report_Warning) =
new_columns = self.columns_helper.select_columns selectors=columns reorder=reorder error_on_missing_columns=error_on_missing_columns on_problems=on_problems
select_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
select_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (reorder:Boolean=False) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) =
new_columns = self.columns_helper.select_columns columns case_sensitivity reorder error_on_missing_columns on_problems
Table.new new_columns
## ALIAS drop_columns
Returns a new table with the chosen set of columns, as specified by thez
Returns a new table with the chosen set of columns, as specified by the
`columns`, removed from the input table. Any unmatched input columns will
be kept in the output. Columns are returned in the same order as in the
input.
Arguments:
- columns: Column selection criteria - a single instance or Vector of
names, indexes or `Column_Selector`, which are to be removed.
- columns: Specifies columns by a name, index or regular expression to
match names, or a Vector of these.
- case_sensitivity: Controls whether to be case sensitive when matching
column names.
- error_on_missing_columns: Specifies if a missing input column should
result in an error regardless of the `on_problems` settings. Defaults
to `False`.
@ -353,27 +356,80 @@ type Table
> Example
Remove columns matching a regular expression.
table.remove_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
table.remove_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive
> Example
Remove the first two columns and the last column.
table.remove_columns [-1, 0, 1]
@columns Widget_Helpers.make_column_name_vector_selector
remove_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
remove_columns self (columns=[self.columns.first.name]) (error_on_missing_columns = False) (on_problems = Report_Warning) =
new_columns = self.columns_helper.remove_columns selectors=columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
remove_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns
remove_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) =
new_columns = self.columns_helper.remove_columns columns case_sensitivity error_on_missing_columns=error_on_missing_columns on_problems=on_problems
Table.new new_columns
## ALIAS select_na
ALIAS select_missing_columns
Select columns which are either all blank or contain blank values. If no
rows are present, all columns are considered blank.
Arguments:
- when_any: By default, only columns consisting of all blank cells are
selected. If set to `True`, columns with one or more blank values are
selected.
- treat_nans_as_blank: specifies whether `Number.nan` is considered as
blank. By default, it is not.
? Blank values
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
> Example
Select completely blank columns from a table.
table.select_blank_columns
select_blank_columns : Boolean -> Boolean -> Table ! No_Output_Columns
select_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) =
new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank
if new_columns.length == 0 then Error.throw (No_Output_Columns) else
Table.new new_columns
## ALIAS drop_na
ALIAS drop_missing_columns
Remove columns which are either all blank or contain blank values. If no
rows are present, all columns are considered blank.
Arguments:
- when_any: By default, only columns consisting of all blank cells are
selected. If set to `True`, columns with one or more blank values are
selected.
- treat_nans_as_blank: specified whether `Number.nan` is considered as
blank. By default, it is not.
? Blank values
Blank values are `Nothing`, `""` and depending on setting `Number.nan`.
> Example
Remove completely blank columns from a table.
table.remove_blank_columns
remove_blank_columns : Boolean -> Boolean -> Table ! No_Output_Columns
remove_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) =
new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank invert_selection=True
if new_columns.length == 0 then Error.throw (No_Output_Columns) else
Table.new new_columns
## Returns a new table with the specified selection of columns moved to
either the start or the end in the specified order.
Arguments:
- columns: Column selection criteria - a single instance or Vector of
names, indexes or `Column_Selector`, which should be reordered and
specifying their order.
- columns: Specifies columns by a name, index or regular expression to
match names, or a Vector of these.
- position: Specifies how to place the selected columns in relation to
the remaining columns which were not matched by `columns` (if any).
- case_sensitivity: Controls whether to be case sensitive when matching
column names.
- error_on_missing_columns: Specifies if a missing input column should
result in an error regardless of the `on_problems` settings. Defaults
to `False`.
@ -400,7 +456,7 @@ type Table
> Example
Move columns matching a regular expression to front, keeping columns matching "foo.+" before columns matching "b.*".
table.reorder_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True]
table.reorder_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive
> Example
Swap the first two columns.
@ -412,9 +468,9 @@ type Table
table.reorder_columns [0] position=Position.After_Other_Columns
@columns Widget_Helpers.make_column_name_vector_selector
reorder_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Position -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns
reorder_columns self (columns = [self.columns.first.name]) (position = Position.Before_Other_Columns) (error_on_missing_columns = False) (on_problems = Report_Warning) =
new_columns = self.columns_helper.reorder_columns selectors=columns position=position error_on_missing_columns=error_on_missing_columns on_problems=on_problems
reorder_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Position -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns
reorder_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (position:Position=Position.Before_Other_Columns) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) =
new_columns = self.columns_helper.reorder_columns columns position case_sensitivity error_on_missing_columns on_problems
Table.new new_columns
## Returns a new table with the columns sorted by name according to the
@ -441,7 +497,7 @@ type Table
table.reorder_columns Sort_Direction.Descending
sort_columns : Sort_Direction -> Text_Ordering -> Table
sort_columns self order=Sort_Direction.Ascending text_ordering=Text_Ordering.Default =
new_columns = Table_Helpers.sort_columns internal_columns=self.columns order text_ordering
new_columns = Table_Helpers.sort_columns self.columns order text_ordering
Table.new new_columns
## Returns a new table with the columns renamed based on either a mapping
@ -449,7 +505,10 @@ type Table
Arguments:
- column_map: Mapping from old column names to new or a vector of new
column names to apply by position.
column names to apply by position. `Regex` objects can be used
within the mapping to do pattern based renaming.
- case_sensitivity: Controls whether to be case sensitive when matching
column names.
- error_on_missing_columns: Specifies if a missing input column should
result in an error regardless of the `on_problems` settings. Defaults
to `True`.
@ -500,12 +559,11 @@ type Table
> Example
For all columns starting with the prefix `name=`, replace it with `key:`.
by_name = Column_Selector.By_Name "name=(.*)" Case_Sensitivity.Sensitive use_regex=True
table.rename_columns (Map.from_vector [[by_name, "key:$1"]])
table.rename_columns (Map.from_vector [["name=(.*)".to_regex, "key:$1"]])
@column_map Widget_Helpers.make_rename_name_vector_selector
rename_columns : Map (Text | Integer | Column_Selector) Text | Vector Text | Vector Vector -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names
rename_columns self column_map=["Column"] (error_on_missing_columns=True) (on_problems=Report_Warning) =
new_names = Table_Helpers.rename_columns internal_columns=self.columns mapping=column_map error_on_missing_columns=error_on_missing_columns on_problems=on_problems
rename_columns : Map (Text | Integer | Regex) Text | Vector Text | Vector Vector -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names
rename_columns self (column_map:(Map | Vector)=["Column"]) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) =
new_names = Table_Helpers.rename_columns self.columns column_map case_sensitivity error_on_missing_columns on_problems
Warning.with_suspended new_names names->
Table.new (self.columns.map c-> c.rename (names.at c.name))
@ -673,10 +731,8 @@ type Table
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns types_to_always_throw=[No_Input_Columns_Selected]
columns_for_ordering = Table_Helpers.prepare_order_by self.columns columns problem_builder
problem_builder.attach_problems_before on_problems <|
java_columns = columns_for_ordering.map c->
c.column.java_column
directions = columns_for_ordering.map c->
c.associated_selector.direction.to_sign
java_columns = columns_for_ordering.map c->c.column.java_column
directions = columns_for_ordering.map c->c.associated_selector.direction.to_sign
comparator = case text_ordering.sort_digits_as_numbers of
True ->
@ -727,9 +783,9 @@ type Table
`Floating_Point_Equality` is reported according to the `on_problems`
setting.
@columns Widget_Helpers.make_column_name_vector_selector
distinct : Vector (Integer | Text | Column_Selector) | Text | Integer -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
distinct : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
distinct self (columns = self.column_names) case_sensitivity=Case_Sensitivity.Default error_on_missing_columns=True on_problems=Report_Warning =
key_columns = self.columns_helper.select_columns selectors=columns reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _->
key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _->
Error.throw No_Input_Columns_Selected
java_columns = key_columns.map .java_column
text_folding_strategy = Case_Sensitivity.folding_strategy case_sensitivity
@ -827,7 +883,7 @@ type Table
table.parse format=(Data_Formatter.Value.with_number_formatting decimal_point=',')
@columns Widget_Helpers.make_column_name_vector_selector
@type Widget_Helpers.parse_type_selector
parse : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table
parse : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table
parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning =
formatter = case format of
_ : Text ->
@ -839,7 +895,7 @@ type Table
parser = formatter.make_value_type_parser type
select_problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
selected_columns = self.columns_helper.select_columns_helper columns reorder=True select_problem_builder
selected_columns = self.columns_helper.select_columns_helper columns Case_Sensitivity.Default True select_problem_builder
select_problem_builder.attach_problems_before on_problems <|
selected_column_names = case selected_columns.is_empty of
True ->
@ -914,9 +970,9 @@ type Table
types. Due to this, a Mixed column containing values `[2, "3"]` will
actually be converted into `[2, Nothing]` when casting to Integer type.
@columns Widget_Helpers.make_column_name_vector_selector
cast : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
cast : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False
selected.fold self table-> column_to_cast->
new_column = column_to_cast.cast value_type on_problems
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
@ -1201,8 +1257,8 @@ type Table
problem is reported.
@group_by Widget_Helpers.make_column_name_vector_selector
@order_by Widget_Helpers.make_order_by_selector
add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Column_Selector) | Text | Integer -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
add_row_number self (name:Text = "Row") (from:Integer = 1) (step:Integer = 1) group_by=[] order_by=[] on_problems=Problem_Behavior.Report_Warning =
add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
add_row_number self (name:Text="Row") (from:Integer=1) (step:Integer=1) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=Problem_Behavior.Report_Warning) =
Add_Row_Number.add_row_number self name from step group_by order_by on_problems
## ALIAS Add Column, Update Column, New Column
@ -1784,13 +1840,13 @@ type Table
B | Name | Another
B | Country | Germany
@key_columns Widget_Helpers.make_column_name_vector_selector
transpose : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names
transpose : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names
transpose self (key_columns = []) (attribute_column_name="Name") (value_column_name="Value") (error_on_missing_columns=True) (on_problems = Report_Warning) =
columns_helper = self.columns_helper
unique = Unique_Name_Strategy.new
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
id_columns = columns_helper.select_columns_helper key_columns False problem_builder
id_columns = columns_helper.select_columns_helper key_columns Case_Sensitivity.Default False problem_builder
selected_names = Map.from_vector (id_columns.map column-> [column.name, True])
@ -1855,7 +1911,7 @@ type Table
@group_by Widget_Helpers.make_column_name_vector_selector
@name_column Widget_Helpers.make_column_name_selector
@values (Widget_Helpers.make_aggregate_column_selector include_group_by=False)
cross_tab : Vector (Integer | Text | Column_Selector | Aggregate_Column) | Text | Integer -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings
cross_tab : Vector (Integer | Text | Regex | Aggregate_Column) | Text | Integer | Regex -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings
cross_tab self group_by name_column values=Aggregate_Column.Count (on_problems=Report_Warning) =
columns_helper = self.columns_helper
problem_builder = Problem_Builder.new error_on_missing_columns=True
@ -1872,8 +1928,8 @@ type Table
ix : Integer -> [ix]
name : Text -> [name]
_ -> Error.throw (Illegal_Argument.Error "name_column must be a column index or name.")
matched_name = columns_helper.select_columns_helper name_column_selector True problem_builder
grouping = columns_helper.select_columns_helper (normalize_group_by group_by) True problem_builder
matched_name = columns_helper.select_columns_helper name_column_selector Case_Sensitivity.Default True problem_builder
grouping = columns_helper.select_columns_helper (normalize_group_by group_by) Case_Sensitivity.Default True problem_builder
## Validate the values
values_vector = case values of
@ -2037,7 +2093,8 @@ type Table
been replaced with the provided default(s).
Arguments:
- columns: The column(s) to fill Nothing values.
- columns: Specifies columns by a name, index or regular expression to
match names, or a Vector of these.
- default: The value to replace missing values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
@ -2047,7 +2104,7 @@ type Table
fill_nothing = table.fill_nothing ["col0", "col1"] 20.5
@columns Widget_Helpers.make_column_name_vector_selector
fill_nothing : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table
fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table
fill_nothing self columns default =
transformer col = col.fill_nothing default
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
@ -2058,7 +2115,8 @@ type Table
provided default(s).
Arguments:
- columns: The column(s) to fill empty values.
- columns: Specifies columns by a name, index or regular expression to
match names, or a Vector of these.
- default: The value to replace empty values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
@ -2068,7 +2126,7 @@ type Table
fill_empty = table.fill_empty ["col0", "col1"] "hello"
@columns Widget_Helpers.make_column_name_vector_selector
fill_empty : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table
fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table
fill_empty self columns default =
transformer col = col.fill_empty default
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
@ -2104,7 +2162,7 @@ type Table
column.replace '"(.*?)"'.to_regex '($1)'
@columns Widget_Helpers.make_column_name_vector_selector
replace : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
replace : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column
replace self columns term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False =
transformer col = col.replace term new_text case_sensitivity only_first
Table_Helpers.replace_columns_with_transformed_columns self columns transformer

View File

@ -1,7 +1,6 @@
from Standard.Base import all
import project.Data.Column.Column
import project.Data.Column_Selector.Column_Selector
import project.Data.Set_Mode.Set_Mode
import project.Data.Sort_Column.Sort_Column
import project.Data.Table.Table
@ -18,10 +17,10 @@ polyglot java import org.enso.table.data.column.storage.numeric.LongRangeStorage
polyglot java import org.enso.table.data.column.storage.numeric.LongStorage
## PRIVATE
add_row_number : Table -> Text -> Integer -> Integer -> (Column_Selector | Vector Text) -> Vector (Text | Sort_Column) | Text | Sort_Column -> Problem_Behavior -> Table
add_row_number : Table -> Text -> Integer -> Integer -> Text | Integer | Regex | Vector (Integer | Text | Regex) -> Vector (Text | Sort_Column) | Text | Sort_Column -> Problem_Behavior -> Table
add_row_number table name from step group_by order_by on_problems =
problem_builder = Problem_Builder.new error_on_missing_columns=True
grouping_columns = table.columns_helper.select_columns_helper group_by True problem_builder
grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder
Unordered_Multi_Value_Key.validate_grouping_columns grouping_columns problem_builder
ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder
problem_builder.attach_problems_before on_problems <|

View File

@ -2,7 +2,6 @@ from Standard.Base import all hiding First, Last
import project.Data.Aggregate_Column.Aggregate_Column
import project.Data.Column.Column
import project.Data.Column_Selector.Column_Selector
import project.Data.Sort_Column.Sort_Column
import project.Data.Table.Table
import project.Data.Type.Value_Type.Value_Type
@ -149,9 +148,9 @@ resolve_aggregate table problem_builder aggregate_column =
res = columns_helper.resolve_column_or_expression c problem_builder
res.if_nothing (Error.throw Internal_Missing_Column_Error)
resolve_selector_to_vector : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Vector Column ! Internal_Missing_Column_Error
resolve_selector_to_vector : Text | Integer | Vector (Integer | Text) -> Vector Column ! Internal_Missing_Column_Error
resolve_selector_to_vector selector =
resolved = columns_helper.select_columns_helper selector reorder=True problem_builder
resolved = columns_helper.select_columns_helper selector Case_Sensitivity.Default True problem_builder
if resolved.is_empty then Error.throw Internal_Missing_Column_Error else resolved
resolve_order_by selector = case selector of

View File

@ -2,8 +2,8 @@ from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import project.Data.Aggregate_Column.Aggregate_Column
import project.Data.Column.Column
import project.Data.Column_Selector.Column_Selector
import project.Data.Position.Position
import project.Data.Set_Mode.Set_Mode
import project.Data.Sort_Column.Sort_Column
@ -12,7 +12,6 @@ import project.Data.Type.Value_Type.Value_Type
import project.Data.Type.Value_Type_Helpers
import project.Internal.Problem_Builder.Problem_Builder
import project.Internal.Unique_Name_Strategy.Unique_Name_Strategy
from project.Data.Aggregate_Column.Aggregate_Column import Maximum, Minimum
from project.Errors import Ambiguous_Column_Rename, Column_Type_Mismatch, Invalid_Aggregate_Column, Missing_Input_Columns, No_Common_Type, No_Input_Columns_Selected, No_Output_Columns, Too_Many_Column_Names_Provided
polyglot java import java.util.HashSet
@ -41,7 +40,9 @@ type Table_Column_Helper
Arguments:
- selectors: Single instance or a Vector of names, indexes or
`Column_Selector`s.
regular expressions.
- case_sensitivity: Specifies whether the column name matching should be
case sensitive.
- reorder: Specifies whether to reorder the matched columns according to
the order of the selection criteria.
If `False`, the matched entries are returned in the same order as in
@ -57,23 +58,14 @@ type Table_Column_Helper
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
select_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Boolean -> Problem_Behavior -> Vector
select_columns self selectors reorder error_on_missing_columns on_problems =
- error_on_empty: Specifies if the operation should fail if no columns
are selected.
select_columns : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Case_Sensitivity -> Boolean -> Boolean -> Problem_Behavior -> Boolean -> Vector
select_columns self (selectors:(Text | Integer | Regex | Vector)) (case_sensitivity:Case_Sensitivity) (reorder:Boolean) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) (error_on_empty:Boolean=True) =
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
result = self.select_columns_helper selectors reorder problem_builder
result = self.select_columns_helper selectors case_sensitivity reorder problem_builder
problem_builder.attach_problems_before on_problems <|
if result.is_empty then Error.throw No_Output_Columns else result
## PRIVATE
Works like `select_columns` but will not throw `No_Output_Columns` error
and will return proper columns instead of internal columns.
Useful, when selecting a subset of columns to transform.
resolve_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Problem_Behavior -> Boolean -> Vector
resolve_columns self selectors error_on_missing_columns on_problems reorder=False =
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
result = self.select_columns_helper selectors reorder problem_builder
problem_builder.attach_problems_before on_problems <|
result.map self.make_column
if error_on_empty && result.is_empty then Error.throw No_Output_Columns else result
## PRIVATE
A helper function encapsulating shared code for `remove_columns`
@ -86,17 +78,19 @@ type Table_Column_Helper
Arguments:
- selectors: Single instance or a Vector of names, indexes or
`Column_Selector`s.
regular expressions.
- case_sensitivity: Specifies whether the column name matching should be
case sensitive.
- error_on_missing_columns: Specifies if missing columns should be raised
as error regardless of `on_problems`.
- on_problems: Specifies the behavior when a problem occurs during the
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
remove_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Problem_Behavior -> Vector
remove_columns self selectors error_on_missing_columns on_problems =
remove_columns : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Vector
remove_columns self (selectors:(Text | Integer | Regex | Vector)) (case_sensitivity:Case_Sensitivity) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) =
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
selection = self.select_columns_helper selectors reorder=False problem_builder
selection = self.select_columns_helper selectors case_sensitivity False problem_builder
selected_names = Map.from_vector (selection.map column-> [column.name, True])
result = self.internal_columns.filter column->
should_be_removed = selected_names.get column.name False
@ -115,19 +109,21 @@ type Table_Column_Helper
Arguments:
- selectors: Single instance or a Vector of names, indexes or
`Column_Selector`s.
regular expressions.
- position: Specifies how to place the selected columns in relation to
the columns which were not matched by the `selector` (if any).
- case_sensitivity: Specifies whether the column name matching should be
case sensitive.
- error_on_missing_columns: Specifies if missing columns should be raised
as error regardless of `on_problems`.
- on_problems: Specifies the behavior when a problem occurs during the
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
reorder_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Position -> Boolean -> Problem_Behavior -> Vector
reorder_columns self selectors position error_on_missing_columns on_problems =
reorder_columns : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Position -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Vector
reorder_columns self (selectors:(Text | Integer | Regex | Vector)) (position:Position) (case_sensitivity:Case_Sensitivity) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) =
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
selection = self.select_columns_helper selectors reorder=True problem_builder
selection = self.select_columns_helper selectors case_sensitivity True problem_builder
problem_builder.attach_problems_before on_problems <|
selected_names = Map.from_vector (selection.map column-> [column.name, True])
other_columns = self.internal_columns.filter column->
@ -143,31 +139,28 @@ type Table_Column_Helper
provided selection criteria.
Arguments:
- selectors: Single instance or a Vector of names, indexes or
`Column_Selector`s.
- selectors: Single instance or a Vector of names, indexes or regular
expressions.
- case_sensitivity: Specifies whether to match the column names ignoring
case.
- reorder: Specifies whether to reorder the matched columns according to
the order of the selection criteria.
If `False`, the matched entries are returned in the same order as in
the input.
If `True`, the matched entries are returned in the order of the
criteria matching them. If a single object has been matched by multiple
criteria, it is placed in the group belonging to the first matching
criterion on the list. If a single criterion's group has more than one
element, their relative order is the same as in the input.
the order of the selection criteria. If `False`, the matched entries
are returned in the same order as in the input. If `True`, the matched
entries are returned in the order of the first criteria matching them.
If a single criterion matches more than one column, their order is the
same as in the input table.
- problem_builder: Encapsulates the aggregation of encountered problems.
select_columns_helper : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Problem_Builder -> Vector
select_columns_helper self selectors reorder problem_builder =
resolve_selector selector = case selector of
name : Text -> resolve_selector (Column_Selector.By_Name name Case_Sensitivity.Sensitive False)
ix : Integer -> if is_index_valid self.internal_columns.length ix then [self.internal_columns.at ix] else
problem_builder.report_oob_indices [ix]
[]
Column_Selector.By_Name name case_sensitivity use_regex ->
matches = match_columns name case_sensitivity use_regex self.internal_columns
if matches.is_empty then problem_builder.report_missing_input_columns [name]
matches
Column_Selector.Blank_Columns when_any treat_nans_as_blank ->
get_blank_columns when_any treat_nans_as_blank self.internal_columns self.make_column self.table self.materialize
select_columns_helper : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Case_Sensitivity -> Boolean -> Problem_Builder -> Vector
select_columns_helper self (selectors:(Text | Integer | Regex | Vector)) (case_sensitivity:Case_Sensitivity) (reorder:Boolean) (problem_builder:Problem_Builder) =
resolve_selector selector =
case selector of
ix : Integer -> if is_index_valid self.internal_columns.length ix then [self.internal_columns.at ix] else
problem_builder.report_oob_indices [ix]
[]
_ ->
matches = match_columns selector case_sensitivity self.internal_columns
if matches.is_empty then problem_builder.report_missing_input_columns [selector]
matches
vector = case selectors of
_ : Vector -> selectors
@ -198,6 +191,56 @@ type Table_Column_Helper
problem_builder.report_oob_indices [selector]
Nothing
## PRIVATE
A helper method that gets the columns from the provided table that are
completely blank or have some blanks.
Arguments:
- when_any: By default, only columns consisting of all blank cells are
selected. If set to `True`, columns with one or more blank values are
selected.
- treat_nans_as_blank: If `True`, then `Number.nan` is considered as
blank.
- invert_selection: If `True`, then the selection is inverted.
select_blank_columns_helper : Boolean -> Boolean -> Boolean -> Vector
select_blank_columns_helper self when_any:Boolean treat_nans_as_blank:Boolean invert_selection:Boolean=False =
blanks = self.internal_columns.map_with_index ix-> internal_column->
column = self.make_column internal_column
blank_indicator = column.is_blank treat_nans_as_blank
blank_indicator.iif 1 0 . rename "blanks_"+ix.to_text
## We cannot just use a custom_column in the aggregate because of
how the column selector works. We may need to revisit this. For
now we need to use tricks like that:
To be backend agnostic, we cannot create a new table with the
columns above. Instead, we add our blank columns to the table
and then remove any other columns we. We do not have to deal
with name conflicts, as adding a new column with a clashing
name does not affect the old column or derived columns.
table_with_blank_indicators = blanks.fold self.table tbl-> blanks_col-> tbl.set blanks_col
just_indicators = table_with_blank_indicators.select_columns (blanks.map .name) on_problems=Problem_Behavior.Report_Error
# Maximum is equivalent to Exists and Minimum is equivalent to Forall.
col_aggregate = if when_any then Aggregate_Column.Maximum _ else Aggregate_Column.Minimum _
aggregates = blanks.map blanks_col-> col_aggregate blanks_col.name
aggregate_result = just_indicators.aggregate aggregates on_problems=Problem_Behavior.Report_Error
materialized_result = self.materialize <| aggregate_result.catch Any error->
msg = "Unexpected dataflow error has been thrown in an `select_blank_columns_helper`. This is a bug in the Table library. The unexpected error was: "+error.to_display_text
Panic.throw (Illegal_State.Error message=msg cause=error)
counts = materialized_result.rows.first
self.internal_columns.filter_with_index i-> _->
include = case counts.at i of
## No rows in input, so treating as blank by convention.
Nothing -> True
1 -> True
0 -> False
unexpected ->
Panic.throw (Illegal_State.Error "Unexpected result: "+unexpected.to_display_text+". Perhaps an implementation bug of `select_blank_columns_helper`.")
if invert_selection then include.not else include
## PRIVATE
A helper function encapsulating shared code for `rename_columns`
implementations of various Table variants. See the documentation for the
@ -212,14 +255,16 @@ type Table_Column_Helper
- internal_columns: A list of all columns in a table.
- mapping: A selector specifying which columns should be moved and the order
in which they should appear in the result.
- case_sensitivity: Specifies whether to match the column names ignoring
case.
- error_on_missing_columns: If set to `True`, missing columns are treated as
error regardless of `on_problems`.
- on_problems: Specifies the behavior when a problem occurs during the
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
rename_columns : Vector -> Map (Text | Integer | Column_Selector) Text | Vector Text -> Boolean -> Problem_Behavior -> Map Text Text
rename_columns internal_columns mapping error_on_missing_columns on_problems =
rename_columns : Vector -> Map (Text | Integer | Regex) Text | Vector Text -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Map Text Text
rename_columns (internal_columns:Vector) (mapping:(Map | Vector)) (case_sensitivity:Case_Sensitivity) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) =
## Convert Vector of Pairs to Map
is_vec_pairs = mapping.is_a Vector && mapping.length > 0 && (mapping.first.is_a Text . not)
case is_vec_pairs of
@ -246,22 +291,18 @@ rename_columns internal_columns mapping error_on_missing_columns on_problems =
internal_columns.take good_names.length . zip good_names
_ : Map ->
resolve_rename selector replacement = case selector of
name : Text -> resolve_rename (Column_Selector.By_Name name Case_Sensitivity.Sensitive False) replacement
ix : Integer -> if is_index_valid internal_columns.length ix then [Pair.new (internal_columns.at ix) replacement] else
problem_builder.report_oob_indices [ix]
[]
Column_Selector.By_Name name case_sensitivity use_regex ->
matches = match_columns name case_sensitivity use_regex internal_columns
_ ->
matches = match_columns selector case_sensitivity internal_columns
case matches.is_empty of
True ->
problem_builder.report_missing_input_columns [name]
problem_builder.report_missing_input_columns [selector]
[]
False ->
if use_regex.not then matches.map c-> Pair.new c replacement else
pattern = Regex.compile name case_insensitive=case_sensitivity.is_case_insensitive_in_memory
matches.map c-> Pair.new c (pattern.replace c.name replacement)
Column_Selector.Blank_Columns _ _ ->
Error.throw (Illegal_Argument.Error "Cannot use `Blank_Columns` to rename.")
if selector.is_a Regex . not then matches.map c-> Pair.new c replacement else
matches.map c-> Pair.new c (selector.replace c.name replacement)
builder = mapping.to_vector.fold Vector.new_builder builder-> pair-> builder.append_vector_range (resolve_rename pair.first pair.second)
builder.to_vector
@ -344,10 +385,15 @@ is_index_valid length ix =
## PRIVATE
A helper method to match columns by name
match_columns : Text -> Case_Sensitivity -> Boolean -> Vector -> Vector
match_columns name case_sensitivity use_regex columns =
match = case_sensitivity.create_match_function use_regex
columns.filter c-> match c.name name
match_columns : Text | Regex -> Case_Sensitivity -> Vector -> Vector
match_columns (name : Text | Regex) (case_sensitivity : Case_Sensitivity) (columns : Vector) =
match = case name of
_ : Text -> case case_sensitivity of
Case_Sensitivity.Default -> (== name)
Case_Sensitivity.Sensitive -> (== name)
Case_Sensitivity.Insensitive locale -> name.equals_ignore_case locale=locale
_ : Regex -> name.recompile case_sensitivity . matches
columns.filter c-> match c.name
## PRIVATE
A helper type used by transform helpers.
@ -379,7 +425,8 @@ resolve_order_by internal_columns column_selectors problem_builder =
problem_builder.report_oob_indices [ix]
[]
Sort_Column.Select_By_Name name _ case_sensitivity use_regex ->
matches = match_columns name case_sensitivity use_regex internal_columns
regex_or_text = if use_regex then name.to_regex else name
matches = match_columns regex_or_text case_sensitivity internal_columns
if matches.is_empty then problem_builder.report_missing_input_columns [name]
matches.map c-> Column_Transform_Element.Value c selector
selectors_vec = case column_selectors of
@ -447,46 +494,6 @@ unify_result_type_for_union column_set all_tables allow_type_widening problem_bu
problem_builder.report_other_warning (Column_Type_Mismatch.Error column_set.name first_type got_type)
Nothing
## PRIVATE
A helper method that gets the columns from the provided table that are
complete blank or have some blanks.
get_blank_columns when_any treat_nans_as_blank internal_columns make_column table materialize =
blanks = internal_columns.map_with_index ix-> internal_column->
column = make_column internal_column
blank_indicator = column.is_blank treat_nans_as_blank
blank_indicator.iif 1 0 . rename "blanks_"+ix.to_text
## We cannot just use a custom_column in the aggregate because of
how the column selector works. We may need to revisit this. For
now we need to use tricks like that:
To be backend agnostic, we cannot create a new table with the
columns above. Instead, we add our blank columns to the table
and then remove any other columns we. We do not have to deal
with name conflicts, as adding a new column with a clashing
name does not affect the old column or derived columns.
table_with_blank_indicators = blanks.fold table table-> blanks_col-> table.set blanks_col
just_indicators = table_with_blank_indicators.select_columns (blanks.map .name) on_problems=Problem_Behavior.Report_Error
# Maximum is equivalent to Exists and Minimum is equivalent to Forall.
col_aggregate = if when_any then Maximum _ else Minimum _
aggregates = blanks.map blanks_col-> col_aggregate blanks_col.name
aggregate_result = just_indicators.aggregate aggregates on_problems=Problem_Behavior.Report_Error
materialized_result = materialize <| aggregate_result.catch Any error->
msg = "Unexpected dataflow error has been thrown in an `select_columns_helper`. This is a bug in the Table library. The unexpected error was: "+error.to_display_text
Panic.throw (Illegal_State.Error message=msg cause=error)
counts = materialized_result.rows.first
internal_columns.filter_with_index i-> _->
case counts.at i of
## No rows in input, so treating as blank by convention.
Nothing -> True
1 -> True
0 -> False
unexpected ->
Panic.throw (Illegal_State.Error "Unexpected result: "+unexpected.to_display_text+". Perhaps an implementation bug of Column_Selector.Blank_Columns.")
## PRIVATE
Replace a set of columns in the table with a new set of columns. The old
columns must all exist in the table, and the list of new columns must be the
@ -503,9 +510,9 @@ replace_columns_with_columns table old_columns new_columns =
Replace a set of columns in the table with a new set of columns produced by
transforming the old columns. The old columns must all exist in the table,
and the new columns must all have the same name.
replace_columns_with_transformed_columns : Table -> Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> (Column -> Column) -> Boolean -> Problem_Behavior -> Table
replace_columns_with_transformed_columns : Table -> Text | Integer | Regex | Vector (Integer | Text | Regex) -> (Column -> Column) -> Boolean -> Problem_Behavior -> Table
replace_columns_with_transformed_columns table selectors transformer error_on_missing_columns=True on_problems=Report_Warning =
internal_columns = table.columns_helper.select_columns selectors reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems
internal_columns = table.columns_helper.select_columns selectors Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems
columns = internal_columns.map table.columns_helper.make_column
new_columns = columns.map transformer
replace_columns_with_columns table columns new_columns

View File

@ -2,7 +2,6 @@ from Standard.Base import all
import project.Data.Aggregate_Column.Aggregate_Column
import project.Data.Column.Column
import project.Data.Column_Selector.Column_Selector
import project.Data.Column_Vector_Extensions
import project.Data.Data_Formatter.Data_Formatter
import project.Data.Join_Condition.Join_Condition
@ -28,7 +27,6 @@ from project.Excel.Excel_Section.Excel_Section import Cell_Range, Range_Names, S
export project.Data.Aggregate_Column.Aggregate_Column
export project.Data.Column.Column
export project.Data.Column_Selector.Column_Selector
export project.Data.Column_Vector_Extensions
export project.Data.Data_Formatter.Data_Formatter
export project.Data.Join_Condition.Join_Condition

View File

@ -1,6 +1,6 @@
from Standard.Base import all
from Standard.Table import Column_Selector, Value_Type
from Standard.Table import Value_Type
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Count_Distinct
from Standard.Table.Errors import all
@ -58,12 +58,21 @@ spec setup =
t4.row_count . should_equal 0
t4.at "X" . to_vector . should_equal []
Test.specify "should allow to select blank columns" <|
r1 = t1.select_blank_columns
r1.columns.map .name . should_equal ["f"]
r1.at "f" . to_vector . should_equal [Nothing, "", Nothing, ""]
r2 = t1.select_blank_columns when_any=True
r2.columns.map .name . should_equal ["a", "b", "d", "e", "f"]
r2.at "d" . to_vector . should_equal [Nothing, True, False, True]
Test.specify "should allow to remove blank columns" <|
r1 = t1.remove_columns (Column_Selector.Blank_Columns when_any=False)
r1 = t1.remove_blank_columns
r1.columns.map .name . should_equal ["a", "b", "c", "d", "e"]
r1.at "a" . to_vector . should_equal [1, Nothing, 3, 4]
r2 = t1.remove_columns (Column_Selector.Blank_Columns when_any=True)
r2 = t1.remove_blank_columns when_any=True
r2.columns.map .name . should_equal ["c"]
r2.at "c" . to_vector . should_equal [10, 20, 30, 40]
@ -84,14 +93,18 @@ spec setup =
r2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN, NaN]"
r2.at "Y" . to_vector . should_equal [Nothing, 2.0, Nothing, 5.0]
r3 = t4.remove_columns (Column_Selector.Blank_Columns when_any=False)
r3 = t4.remove_blank_columns
r3.columns.map .name . should_equal ["c", "g", "h"]
r3.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
r4 = t4.remove_columns (Column_Selector.Blank_Columns when_any=True)
r4 = t4.remove_blank_columns when_any=True
r4.columns.map .name . should_equal ["c", "g"]
r4.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
r5 = t4.select_blank_columns when_any=True
r5.columns.map .name . should_equal ["h"]
r5.at "h" . to_vector . to_text . should_equal "[NaN, Nothing, NaN, Nothing]"
Test.specify "should allow to treat NaNs as blank if asked" <|
r1 = t3.filter_blank_rows when_any=True treat_nans_as_blank=True
# We cannot use `Vector.==` because `NaN != NaN`.
@ -102,42 +115,35 @@ spec setup =
r2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN]"
r2.at "Y" . to_vector . should_equal [Nothing, 2.0, 5.0]
r3 = t4.remove_columns (Column_Selector.Blank_Columns when_any=False treat_nans_as_blank=True)
r3 = t4.remove_blank_columns when_any=False treat_nans_as_blank=True
r3.columns.map .name . should_equal ["c", "g"]
r3.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]"
r4 = t4.remove_columns (Column_Selector.Blank_Columns when_any=True treat_nans_as_blank=True)
r4.columns.map .name . should_equal ["c"]
r4.at "c" . to_vector . should_equal [10, 20, 40, 30]
r4 = t4.select_blank_columns when_any=False treat_nans_as_blank=True
r4.columns.map .name . should_equal ["h"]
r4.at "h" . to_vector . to_text . should_equal "[NaN, Nothing, NaN, Nothing]"
r5 = t4.remove_blank_columns when_any=True treat_nans_as_blank=True
r5.columns.map .name . should_equal ["c"]
r5.at "c" . to_vector . should_equal [10, 20, 40, 30]
r6 = t4.select_blank_columns when_any=True treat_nans_as_blank=True
r6.columns.map .name . should_equal ["g", "h"]
r6.at "h" . to_vector . to_text . should_equal "[NaN, Nothing, NaN, Nothing]"
if test_selection.is_nan_and_nothing_distinct.not then
Test.specify "this backend treats NaN as Nothing" <|
t3.at "X" . to_vector . should_equal [2.0, 1.5, Nothing, Nothing]
t3.at "X" . is_nan . to_vector . should_fail_with Unsupported_Database_Operation
Test.specify "Blank_Columns selector should work for all kinds of methods accepting Column_Selector" <|
t = table_builder [["X", [1, 2, 3, 4]], ["Y", [Nothing, "", Nothing, Nothing]], ["Z", [Nothing, True, False, Nothing]]]
t.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["Y"]
t.select_columns (Column_Selector.Blank_Columns when_any=True) . columns . map .name . should_equal ["Y", "Z"]
t.reorder_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["Y", "X", "Z"]
r1 = t.aggregate [Count_Distinct Column_Selector.Blank_Columns]
r1.columns . map .name . should_equal ["Count Distinct Y"]
r1.at "Count Distinct Y" . to_vector . should_equal [2]
r2 = t.aggregate [(Count_Distinct Column_Selector.Blank_Columns ignore_nothing=True)]
r2.columns . map .name . should_equal ["Count Distinct Y"]
r2.at "Count Distinct Y" . to_vector . should_equal [1]
Test.specify "Blank_Columns selector should deal with edge cases" <|
Test.specify "select_blank_columns and remove_blank_columns should deal with edge cases" <|
t = table_builder [["X", [1, 2, 3, 4]]]
no_rows = t.filter "X" (Filter_Condition.Equal to=0)
no_rows.row_count . should_equal 0
no_rows.at "X" . to_vector . should_equal []
no_rows.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["X"]
no_rows.remove_columns Column_Selector.Blank_Columns . columns . map .name . should_fail_with No_Output_Columns
no_rows.select_blank_columns . columns . map .name . should_equal ["X"]
no_rows.remove_blank_columns . columns . map .name . should_fail_with No_Output_Columns
Test.group prefix+"Filling Missing Values" <|
Test.specify "should coerce long and double types to double" <|

View File

@ -1,7 +1,6 @@
from Standard.Base import all
from Standard.Table import Position
from Standard.Table.Data.Column_Selector.Column_Selector import By_Name
from Standard.Table.Errors import all
from Standard.Test import Test, Problems
@ -28,7 +27,7 @@ spec setup =
Test.group prefix+"Table.select_columns" <|
Test.specify "should work as shown in the doc examples" <|
expect_column_names ["foo", "bar"] <| table.select_columns ["bar", "foo"]
expect_column_names ["bar", "Baz", "foo 1", "foo 2"] <| table.select_columns [By_Name "foo.+" use_regex=True, By_Name "b.*" use_regex=True]
expect_column_names ["bar", "Baz", "foo 1", "foo 2"] <| table.select_columns ["foo.+".to_regex, "b.*".to_regex True]
expect_column_names ["abcd123", "foo", "bar"] <| table.select_columns [-1, 0, 1] reorder=True
Test.specify "should allow to reorder columns if asked to" <|
@ -38,11 +37,11 @@ spec setup =
table_2 . at "foo" . to_vector . should_equal [1,2,3]
Test.specify "should correctly handle regex matching" <|
expect_column_names ["foo"] <| table.select_columns [By_Name "foo" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns [By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns [By_Name "ab.+123" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names ["foo"] <| table.select_columns ["foo".to_regex]
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns ["a.*".to_regex]
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns ["ab.+123".to_regex]
expect_column_names ["ab.+123"] <| table.select_columns ["ab.+123"]
expect_column_names ["abcd123"] <| table.select_columns [By_Name "abcd123" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names ["abcd123"] <| table.select_columns ["abcd123".to_regex]
Test.specify "should allow negative indices" <|
expect_column_names ["foo", "bar", "foo 2"] <| table.select_columns [-3, 0, 1]
@ -50,8 +49,8 @@ spec setup =
Test.specify "should allow mixed names and indexes" <|
expect_column_names ["foo", "bar", "foo 2"] <| table.select_columns [-3, "bar", 0]
expect_column_names ["foo 2", "bar", "foo"] <| table.select_columns [-3, "bar", 0] reorder=True
expect_column_names ["foo", "bar", "foo 1", "foo 2", "abcd123"] <| table.select_columns [-1, "bar", By_Name "foo.*" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names ["foo", "foo 1", "foo 2", "bar", "abcd123"] <| table.select_columns [By_Name "foo.*" Case_Sensitivity.Sensitive use_regex=True, "bar", "foo", -1] reorder=True
expect_column_names ["foo", "bar", "foo 1", "foo 2", "abcd123"] <| table.select_columns [-1, "bar", "foo.*".to_regex]
expect_column_names ["foo", "foo 1", "foo 2", "bar", "abcd123"] <| table.select_columns ["foo.*".to_regex, "bar", "foo", -1] reorder=True
if test_selection.supports_case_sensitive_columns then
Test.specify "should correctly handle exact matches matching multiple names due to case insensitivity" <|
@ -60,11 +59,11 @@ spec setup =
col2 = ["bar", [4,5,6]]
col3 = ["Bar", [7,8,9]]
table_builder [col1, col2, col3]
expect_column_names ["bar", "Bar"] <| table.select_columns [By_Name "bar"]
expect_column_names ["bar", "Bar"] <| table.select_columns ["bar"] Case_Sensitivity.Insensitive
Test.specify "should correctly handle regexes matching multiple names" <|
expect_column_names ["foo", "bar", "foo 1", "foo 2"] <| table.select_columns [By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True, By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names ["bar", "foo", "foo 1", "foo 2"] <| table.select_columns [By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True, By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True] reorder=True
expect_column_names ["foo", "bar", "foo 1", "foo 2"] <| table.select_columns ["b.*".to_regex, "f.+".to_regex]
expect_column_names ["bar", "foo", "foo 1", "foo 2"] <| table.select_columns ["b.*".to_regex, "f.+".to_regex] reorder=True
Test.specify "should correctly handle problems: out of bounds indices" <|
selector = [1, 0, 100, -200, 300]
@ -104,12 +103,12 @@ spec setup =
table.select_columns ["bar", "foo", "foo", "bar"] reorder=False
Test.specify "should correctly handle edge-cases: duplicate matches due to case insensitivity" <|
selector = [By_Name "FOO", By_Name "foo"]
t = table.select_columns selector on_problems=Problem_Behavior.Report_Error
selector = ["FOO", "foo"]
t = table.select_columns selector Case_Sensitivity.Insensitive on_problems=Problem_Behavior.Report_Error
expect_column_names ["foo"] t
expect_column_names ["bar", "foo"] <|
table.select_columns [By_Name "BAR", By_Name "foo", By_Name "bar"] reorder=True
table.select_columns ["BAR", "foo", "bar"] Case_Sensitivity.Insensitive reorder=True
Test.specify "should correctly handle problems: unmatched names" <|
weird_name = '.*?-!@#!"'
@ -139,17 +138,17 @@ spec setup =
Test.group prefix+"Table.remove_columns" <|
Test.specify "should work as shown in the doc examples" <|
expect_column_names ["Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] <| table.remove_columns ["bar", "foo"]
expect_column_names ["foo", "ab.+123", "abcd123"] <| table.remove_columns [By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True, By_Name "b.*" Case_Sensitivity.Insensitive use_regex=True]
expect_column_names ["foo", "ab.+123", "abcd123"] <| table.remove_columns ["foo.+".to_regex, "b.*".to_regex] Case_Sensitivity.Insensitive
expect_column_names ["Baz", "foo 1", "foo 2", "ab.+123"] <| table.remove_columns [-1, 0, 1]
Test.specify "should correctly handle regex matching" <|
last_ones = table.columns.drop 1 . map .name
expect_column_names last_ones <| table.remove_columns [By_Name "foo" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names last_ones <| table.remove_columns ["foo".to_regex]
first_ones = ["foo", "bar", "Baz", "foo 1", "foo 2"]
expect_column_names first_ones <| table.remove_columns [By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names first_ones <| table.remove_columns [By_Name "ab.+123" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names first_ones+["abcd123"] <| table.remove_columns [By_Name "ab.+123"]
expect_column_names first_ones+["ab.+123"] <| table.remove_columns [By_Name "abcd123" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names first_ones <| table.remove_columns ["a.*".to_regex]
expect_column_names first_ones <| table.remove_columns ["ab.+123".to_regex]
expect_column_names first_ones+["abcd123"] <| table.remove_columns ["ab.+123"] Case_Sensitivity.Insensitive
expect_column_names first_ones+["ab.+123"] <| table.remove_columns ["abcd123".to_regex]
Test.specify "should allow negative indices" <|
expect_column_names ["Baz", "foo 1", "ab.+123"] <| table.remove_columns [-1, -3, 0, 1]
@ -161,10 +160,10 @@ spec setup =
col2 = ["bar", [4,5,6]]
col3 = ["Bar", [7,8,9]]
table_builder [col1, col2, col3]
expect_column_names ["foo"] <| table.remove_columns (By_Name "bar")
expect_column_names ["foo"] <| table.remove_columns "bar" Case_Sensitivity.Insensitive
Test.specify "should correctly handle regexes matching multiple names" <|
expect_column_names ["Baz", "ab.+123", "abcd123"] <| table.remove_columns [By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True, By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names ["Baz", "ab.+123", "abcd123"] <| table.remove_columns ["f.+".to_regex, "b.*".to_regex]
Test.specify "should correctly handle problems: out of bounds indices" <|
selector = [1, 0, 100, -200, 300]
@ -192,8 +191,8 @@ spec setup =
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] t
Test.specify "should correctly handle edge-cases: duplicate matches due to case insensitivity" <|
selector = [By_Name "FOO", By_Name "foo"]
t = table.remove_columns selector on_problems=Problem_Behavior.Report_Error
selector = ["FOO", "foo"]
t = table.remove_columns selector Case_Sensitivity.Insensitive on_problems=Problem_Behavior.Report_Error
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] t
Test.specify "should correctly handle problems: unmatched names" <|
@ -209,11 +208,11 @@ spec setup =
Test.specify "should correctly handle problems: no columns in the output" <|
[Problem_Behavior.Ignore, Problem_Behavior.Report_Warning, Problem_Behavior.Report_Error].each pb->
selector = [By_Name ".*" Case_Sensitivity.Sensitive use_regex=True]
selector = [".*".to_regex]
t = table.remove_columns selector on_problems=pb
t.should_fail_with No_Output_Columns
selector_2 = [By_Name ".*" Case_Sensitivity.Sensitive use_regex=True, By_Name "hmmm" Case_Sensitivity.Sensitive use_regex=True]
selector_2 = [".*".to_regex, "hmmm".to_regex]
t1 = table.remove_columns selector_2
t1.should_fail_with No_Output_Columns
@ -221,17 +220,17 @@ spec setup =
Test.specify "should work as shown in the doc examples" <|
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns "foo" Position.After_Other_Columns
expect_column_names ["Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo", "bar"] <| table.reorder_columns ["foo", "bar"] Position.After_Other_Columns
expect_column_names ["foo 1", "foo 2", "bar", "Baz", "foo", "ab.+123", "abcd123"] <| table.reorder_columns [By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True, By_Name "b.*" Case_Sensitivity.Insensitive use_regex=True]
expect_column_names ["foo 1", "foo 2", "bar", "Baz", "foo", "ab.+123", "abcd123"] <| table.reorder_columns ["foo.+".to_regex, "b.*".to_regex] case_sensitivity=Case_Sensitivity.Insensitive
expect_column_names ["bar", "foo", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] <| table.reorder_columns [1, 0] Position.Before_Other_Columns
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns [0] Position.After_Other_Columns
Test.specify "should correctly handle regex matching" <|
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns [By_Name "foo" Case_Sensitivity.Sensitive use_regex=True] Position.After_Other_Columns
expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns ["foo".to_regex] Position.After_Other_Columns
rest = ["foo", "bar", "Baz", "foo 1", "foo 2"]
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns [By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns [By_Name "ab.+123" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns ["a.*".to_regex]
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns ["ab.+123".to_regex]
expect_column_names ["ab.+123"]+rest+["abcd123"] <| table.reorder_columns ["ab.+123"]
expect_column_names ["abcd123"]+rest+["ab.+123"] <| table.reorder_columns [By_Name "abcd123" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names ["abcd123"]+rest+["ab.+123"] <| table.reorder_columns ["abcd123".to_regex]
Test.specify "should allow negative indices" <|
expect_column_names ["abcd123", "foo 2", "foo", "bar", "Baz", "foo 1", "ab.+123"] <| table.reorder_columns [-1, -3, 0, 1]
@ -243,10 +242,10 @@ spec setup =
col2 = ["bar", [4,5,6]]
col3 = ["Bar", [7,8,9]]
table_builder [col1, col2, col3]
expect_column_names ["bar", "Bar", "foo"] <| table.reorder_columns [By_Name "bar"]
expect_column_names ["bar", "Bar", "foo"] <| table.reorder_columns ["bar"] case_sensitivity=Case_Sensitivity.Insensitive
Test.specify "should correctly handle regexes matching multiple names" <|
expect_column_names ["bar", "foo", "foo 1", "foo 2", "Baz", "ab.+123", "abcd123"] <| table.reorder_columns [By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True, By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True]
expect_column_names ["bar", "foo", "foo 1", "foo 2", "Baz", "ab.+123", "abcd123"] <| table.reorder_columns ["b.*".to_regex, "f.+".to_regex]
Test.specify "should correctly handle problems: out of bounds indices" <|
selector = [1, 0, 100, -200, 300]
@ -329,7 +328,7 @@ spec setup =
t1 = table_builder [["alpha", [1]], ["name=123", [2]], ["name= foo bar", [3]]]
expect_column_names ["alpha", "key:123", "key: foo bar"] <|
t1.rename_columns (Map.from_vector [[By_Name "name=(.*)" Case_Sensitivity.Sensitive use_regex=True, "key:$1"]])
t1.rename_columns (Map.from_vector [["name=(.*)".to_regex, "key:$1"]])
Test.specify "should work by index" <|
map = Map.from_vector [[0, "FirstColumn"], [-2, "Another"]]
@ -357,17 +356,17 @@ spec setup =
table.rename_columns map
Test.specify "should work by name case-insensitively" <|
map = Map.from_vector [[By_Name "ALPHA", "FirstColumn"], [By_Name "DELTA", "Another"]]
map = Map.from_vector [["ALPHA", "FirstColumn"], ["DELTA", "Another"]]
expect_column_names ["FirstColumn", "beta", "gamma", "Another"] <|
table.rename_columns map
table.rename_columns map Case_Sensitivity.Insensitive
Test.specify "should work by name using regex" <|
map = Map.from_vector [[By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True, "FirstColumn"]]
map = Map.from_vector [["a.*".to_regex, "FirstColumn"]]
expect_column_names ["FirstColumn", "beta", "gamma", "delta"] <|
table.rename_columns map
Test.specify "should work by name using regex substitution" <|
map = Map.from_vector [[By_Name "a(.*)" Case_Sensitivity.Sensitive use_regex=True, "$1"]]
map = Map.from_vector [["a(.*)".to_regex, "$1"]]
expect_column_names ["lpha", "beta", "gamma", "delta"] <|
table.rename_columns map
@ -411,12 +410,12 @@ spec setup =
Test.specify "should correctly handle edge-cases: aliased selectors" <|
t = table_builder [["alpha", [1,2,3]], ["bet", [4,5,6]]]
map1 = Map.from_vector [[By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True, "AA"], [By_Name ".*a" Case_Sensitivity.Sensitive use_regex=True, "AA"]]
map1 = Map.from_vector [["a.*".to_regex, "AA"], [".*a".to_regex, "AA"]]
t1 = t.rename_columns map1 on_problems=Problem_Behavior.Report_Error
Problems.assume_no_problems t1
expect_column_names ["AA", "bet"] t1
map2 = Map.from_vector [[By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True, "StartsWithA"], [By_Name ".*a" Case_Sensitivity.Sensitive use_regex=True, "EndsWithA"]]
map2 = Map.from_vector [["a.*".to_regex, "StartsWithA"], [".*a".to_regex, "EndsWithA"]]
t2 = t.rename_columns map2 on_problems=Problem_Behavior.Report_Error
t2.should_fail_with Ambiguous_Column_Rename
err = t2.catch
@ -429,7 +428,7 @@ spec setup =
This is to show that even if distinct rename patterns match the
same column, if the resulting rename is unambiguous, no error is
raised.
map3 = Map.from_vector [[By_Name "a(.*)" Case_Sensitivity.Sensitive use_regex=True, "$1A"], [By_Name "(.*)aa" Case_Sensitivity.Sensitive use_regex=True, "$1aA"]]
map3 = Map.from_vector [["a(.*)".to_regex, "$1A"], ["(.*)aa".to_regex, "$1aA"]]
t4 = t3.rename_columns map3 on_problems=Problem_Behavior.Report_Error
Problems.assume_no_problems t4
expect_column_names ["aaA", "bbb"] t4

View File

@ -1,7 +1,7 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
from Standard.Table import Table, Data_Formatter, Column, Column_Selector
from Standard.Table import Table, Data_Formatter, Column
from Standard.Table.Data.Type.Value_Type import Value_Type, Auto
from Standard.Table.Errors import all
@ -309,7 +309,7 @@ spec =
Test.specify "should allow selecting columns by regex" <|
t1 = Table.new [["An", ["1", "2", "3"]], ["Am", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]]
r1 = t1.parse columns=[Column_Selector.By_Name "A.*" use_regex=True]
r1 = t1.parse columns="A.*".to_regex
r1.at "An" . to_vector . should_equal [1, 2, 3]
r1.at "Am" . to_vector . should_equal [4, 5, 6]
r1.at "C" . to_vector . should_equal ["7", "8", "9"]
@ -321,9 +321,10 @@ spec =
r1.should_fail_with Missing_Input_Columns
r1.catch.criteria . should_equal ["B", "C", "E"]
r2 = t1.parse columns=[Column_Selector.By_Name "A.+" use_regex=True]
regex = "A.+".to_regex case_insensitive=True
r2 = t1.parse columns=regex
r2.should_fail_with Missing_Input_Columns
r2.catch.criteria . should_equal ["A.+"]
r2.catch.criteria . should_equal [regex]
action = t1.parse columns=["A", "B", "C", "E"] error_on_missing_columns=False on_problems=_
tester table =
@ -345,7 +346,7 @@ spec =
Test.specify "should allow mixed column selectors" <|
t1 = Table.new [["Am", ["1", "2", "3"]], ["B", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]]
r1 = t1.parse columns=[(Column_Selector.By_Name "A.*" use_regex=True), -2, "D"]
r1 = t1.parse columns=["A.*".to_regex, -2, "D"]
r1.at "Am" . to_vector . should_equal [1, 2, 3]
r1.at "B" . to_vector . should_equal ["4", "5", "6"]
r1.at "C" . to_vector . should_equal [7, 8, 9]
@ -353,7 +354,7 @@ spec =
Test.specify "should handle edge-cases: overlapping selectors" <|
t1 = Table.new [["Am", ["1", "2", "3"]], ["B", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]]
r1 = t1.parse columns=[(Column_Selector.By_Name "A.*" use_regex=True), 0, "D", -1, -1, 0, 3]
r1 = t1.parse columns=["A.*".to_regex, 0, "D", -1, -1, 0, 3]
r1.at "Am" . to_vector . should_equal [1, 2, 3]
r1.at "B" . to_vector . should_equal ["4", "5", "6"]
r1.at "C" . to_vector . should_equal ["7", "8", "9"]

View File

@ -4,7 +4,7 @@ import Standard.Base.Errors.Common.Index_Out_Of_Bounds
import Standard.Base.Errors.Common.Type_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
from Standard.Table import Table, Column, Sort_Column, Column_Selector, Aggregate_Column
from Standard.Table import Table, Column, Sort_Column, Aggregate_Column
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all hiding First, Last
import Standard.Table.Data.Type.Value_Type.Value_Type
from Standard.Table.Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, No_Input_Columns_Selected, Missing_Input_Columns, No_Such_Column, Floating_Point_Equality, Invalid_Value_Type, Row_Count_Mismatch
@ -315,15 +315,9 @@ spec =
t2.at "Y" . to_vector . should_equal ['A', 0]
t3 = Table.new [["X", [1, 2, 3]], ["Y", ["", Nothing, Number.nan]]]
t4 = t3.remove_columns (Column_Selector.Blank_Columns treat_nans_as_blank=True)
t4 = t3.remove_blank_columns treat_nans_as_blank=True
t4.columns . map .name . should_equal ["X"]
Test.specify "Blank_Columns selector should work for all kinds of methods accepting Column_Selector" <|
t = Table.new [["X", [1, 2, 3, 4, 5]], ["Y", ["", Nothing, Nothing, Number.nan, ""]]]
r1 = t.distinct (Column_Selector.Blank_Columns treat_nans_as_blank=True)
r1.at "Y" . to_vector . to_text . should_equal "[, Nothing, NaN]"
r1.at "X" . to_vector . should_equal [1, 2, 4]
Test.group "Info" <|
Test.specify "should return Table information" <|
a = ["strs", ["a", "b", Nothing, "a"]]