mirror of
https://github.com/enso-org/enso.git
synced 2024-11-26 17:06:48 +03:00
Add replace_text
method to In-Memory Table
(#3793)
Implements https://www.pivotaltracker.com/n/projects/2539304/stories/183415329
This commit is contained in:
parent
5873af88c5
commit
ce6267f098
@ -209,6 +209,7 @@
|
||||
- [Added `Filter_Condition` to `Vector`, `Range` and `List`.][3770]
|
||||
- [Extended `Filter_Condition` with `Is_Empty`, `Not_Empty`, `Like` and
|
||||
`Not_Like`.][3775]
|
||||
- [Implemented `Table.replace_text` for in-memory table.][3793]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -335,6 +336,7 @@
|
||||
[3750]: https://github.com/enso-org/enso/pull/3750
|
||||
[3770]: https://github.com/enso-org/enso/pull/3770
|
||||
[3775]: https://github.com/enso-org/enso/pull/3775
|
||||
[3793]: https://github.com/enso-org/enso/pull/3793
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -385,9 +385,9 @@ Text.split self delimiter="," matcher=Text_Matcher.Case_Sensitive = if delimiter
|
||||
- new_text: The new text to replace occurrences of `term` with.
|
||||
If `matcher` is a `Regex_Matcher`, `new_text` can include replacement
|
||||
patterns (such as `$<n>`) for a marked group.
|
||||
- mode: Specifies which instances of term the engine tries to find. When the
|
||||
mode is `First` or `Last`, this method replaces the first or last instance
|
||||
of term in the input. If set to `All`, it replaces all instances of term in
|
||||
- mode: Specifies which occurences of term the engine tries to find. When the
|
||||
mode is `First` or `Last`, this method replaces the first or last occurence
|
||||
of term in the input. If set to `All`, it replaces all occurences of term in
|
||||
the input.
|
||||
- matcher: If a `Text_Matcher`, the text is compared using case-sensitivity
|
||||
rules specified in the matcher. If a `Regex_Matcher`, the term is used as a
|
||||
|
@ -12,6 +12,8 @@ import project.Data.Column_Selector.Column_Selector
|
||||
import project.Data.Data_Formatter.Data_Formatter
|
||||
import project.Data.Match_Columns.Match_Columns
|
||||
import project.Data.Position.Position
|
||||
import project.Data.Storage.Storage
|
||||
import project.Data.Value_Type.Value_Type
|
||||
import project.Data.Sort_Column_Selector.Sort_Column_Selector
|
||||
import project.Data.Sort_Column.Sort_Column
|
||||
import project.Data.Aggregate_Column.Aggregate_Column
|
||||
@ -21,10 +23,11 @@ import project.Internal.Parse_Values_Helper
|
||||
import project.Internal.Problem_Builder.Problem_Builder
|
||||
import project.IO.Auto_Detect.Auto_Detect
|
||||
|
||||
from project.Data.Column import get_item_string
|
||||
from project.Data.Column_Type_Selection import Column_Type_Selection, Auto
|
||||
from project.Delimited.Delimited_Format import Delimited
|
||||
from project.Internal.Filter_Condition_Helpers import make_filter_column
|
||||
from project.Errors import Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector, No_Index_Set_Error, No_Such_Column_Error, No_Such_Column_Error_Data, No_Input_Columns_Selected, No_Output_Columns
|
||||
from project.Errors import Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector, No_Index_Set_Error, No_Such_Column_Error, No_Such_Column_Error_Data, No_Input_Columns_Selected, No_Output_Columns, Invalid_Value_Type
|
||||
|
||||
import Standard.Visualization
|
||||
|
||||
@ -112,7 +115,7 @@ type Table
|
||||
display_rows = Math.min num_rows show_rows
|
||||
rows = Vector.new display_rows row_num->
|
||||
cols = col_vals.map col->
|
||||
if col.isNa row_num then "Nothing" else Column.get_item_string col row_num
|
||||
if col.isNa row_num then "Nothing" else get_item_string col row_num
|
||||
[index.ilocString row_num] + cols
|
||||
table = print_table col_names rows 1 format_terminal
|
||||
if num_rows - display_rows <= 0 then table else
|
||||
@ -726,6 +729,87 @@ type Table
|
||||
result = Table.new new_columns
|
||||
on_problems.attach_problems_after result problem_builder.to_vector
|
||||
|
||||
## Replaces the first, last, or all occurrences of `term` with
|
||||
`new_text` in each text row of selected columns.
|
||||
If `term` is empty, the function returns the table unchanged.
|
||||
|
||||
This method follows the exact replacement semantics of the
|
||||
`Text.replace` method.
|
||||
|
||||
Arguments:
|
||||
- columns: Column selection criteria or a column name or index.
|
||||
- term: The term to find.
|
||||
- new_text: The new text to replace occurrences of `term` with.
|
||||
If `matcher` is a `Regex_Matcher`, `new_text` can include replacement
|
||||
patterns (such as `$<n>`) for a marked group.
|
||||
- mode: Specifies which occurences of term the engine tries to find. When the
|
||||
mode is `First` or `Last`, this method replaces the first or last occurence
|
||||
of term in each individual table cell. If set to `All`, it replaces all
|
||||
occurences of term.
|
||||
- matcher: If a `Text_Matcher`, the text is compared using case-sensitivity
|
||||
rules specified in the matcher. If a `Regex_Matcher`, the term is used as a
|
||||
regular expression and matched using the associated options.
|
||||
- on_problems: Specifies how to handle if a problem occurs, raising as a
|
||||
warning by default.
|
||||
|
||||
The following problems can occur:
|
||||
- If a column in columns is not in the input table, a `Missing_Input_Columns`.
|
||||
- If duplicate columns, names or indices are provided, a
|
||||
`Duplicate_Column_Selectors`.
|
||||
- If a column index is out of range, a `Column_Indexes_Out_Of_Range`.
|
||||
- If two distinct indices refer to the same column, an
|
||||
`Input_Indices_Already_Matched`, with the column included the first
|
||||
time it is matched.
|
||||
- If a column in columns does not have a storage type of `Text`, or `Any`,
|
||||
thus it is guaranteed that it can't contain any text values, a
|
||||
`Invalid_Value_Type`.
|
||||
|
||||
> Example
|
||||
Replace dashes with underscores in a column named "variable_names".
|
||||
|
||||
table.replace_text "variable_names" "-" "_"
|
||||
|
||||
> Example
|
||||
Remove leading and trailing spaces from cells in multiple columns.
|
||||
|
||||
table.replace_text (Column_Selector.By_Name ["foo", "bar"]) "^\s*(.*?)\s*$" "$1" matcher=Regex_Matcher.Regex_Matcher_Data
|
||||
|
||||
> Example
|
||||
Replace texts in quotes with parentheses in column at index 1.
|
||||
|
||||
table.replace_text 1 '"(.*?)"' '($1)' matcher=Regex_Matcher.Regex_Matcher_Data
|
||||
replace_text : (Text | Integer | Column_Selector) -> Text -> Text -> Matching_Mode | Regex_Mode -> (Text_Matcher | Regex_Matcher) -> Problem_Behavior -> Table
|
||||
replace_text self columns=(Column_Selector.By_Index [0]) term="" new_text="" mode=Regex_Mode.All matcher=Text_Matcher.Case_Sensitive on_problems=Problem_Behavior.Report_Warning = if term.is_empty then self else
|
||||
problem_builder = Problem_Builder.new
|
||||
|
||||
selector = case columns of
|
||||
_ : Column_Selector -> columns
|
||||
name : Text -> Column_Selector.By_Name [name]
|
||||
index : Integer -> Column_Selector.By_Index [index]
|
||||
selection = Table_Helpers.select_columns_helper self.columns selector reorder=False problem_builder
|
||||
selected_names = Map.from_vector (selection.map column-> [column.name, True])
|
||||
|
||||
map_preserve_name column f = column.map f . rename column.name
|
||||
do_replace = _.replace term new_text mode matcher
|
||||
do_replace_only_text = case _ of
|
||||
item : Text -> do_replace item
|
||||
item -> item
|
||||
|
||||
transform column = case column.storage_type of
|
||||
Storage.Text -> map_preserve_name column do_replace
|
||||
Storage.Any -> map_preserve_name column do_replace_only_text
|
||||
_ ->
|
||||
problem = Invalid_Value_Type.Invalid_Value_Type_Data Value_Type.Char column.value_type
|
||||
problem_builder.report_other_warning problem
|
||||
column
|
||||
|
||||
new_columns = self.columns.map column->
|
||||
is_selected = selected_names.get_or_else column.name False
|
||||
if is_selected then transform column else column
|
||||
|
||||
result = Table.new new_columns
|
||||
problem_builder.attach_problems_after on_problems result
|
||||
|
||||
## ALIAS Filter Rows
|
||||
|
||||
Selects only the rows of this table that correspond to `True` values of
|
||||
|
@ -363,7 +363,7 @@ type Column_Transform_Element
|
||||
Value column associated_selector
|
||||
|
||||
## PRIVATE
|
||||
prepare_order_by : Vector -> Problem_Builder -> Vector Column_Transform_Element
|
||||
prepare_order_by : Vector -> Vector Text | Sort_Column_Selector -> Problem_Builder -> Vector Column_Transform_Element
|
||||
prepare_order_by internal_columns column_selectors problem_builder =
|
||||
selected_elements = case column_selectors of
|
||||
_ : Vector.Vector ->
|
||||
|
@ -252,7 +252,7 @@ case class DocParserDef() extends Parser[Doc] {
|
||||
logger.trace {
|
||||
var listOfFormattedAST: List[Elem] = Nil
|
||||
while (
|
||||
result.stack.head != Elem.Formatter(typ) && result.stack.nonEmpty
|
||||
result.stack.nonEmpty && result.stack.head != Elem.Formatter(typ)
|
||||
) {
|
||||
result.pop()
|
||||
result.current match {
|
||||
|
@ -4,14 +4,14 @@ from Standard.Base.Error.Problem_Behavior import Report_Error
|
||||
from Standard.Table import Table, Column, Sort_Column, Column_Selector, Sort_Column_Selector, Aggregate_Column
|
||||
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all hiding First, Last
|
||||
from Standard.Table.Data.Table import Empty_Error
|
||||
from Standard.Table.Errors import Invalid_Output_Column_Names_Data, Duplicate_Output_Column_Names_Data, No_Input_Columns_Selected, Missing_Input_Columns_Data, No_Such_Column_Error_Data
|
||||
from Standard.Table.Data.Storage import Storage
|
||||
from Standard.Table.Errors import Floating_Point_Grouping_Data
|
||||
from Standard.Table.Errors import Invalid_Output_Column_Names_Data, Duplicate_Output_Column_Names_Data, No_Input_Columns_Selected, Missing_Input_Columns_Data, No_Such_Column_Error_Data, Floating_Point_Grouping_Data, Invalid_Value_Type
|
||||
|
||||
import Standard.Visualization
|
||||
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
import Standard.Table.Data.Value_Type.Value_Type
|
||||
|
||||
import project.Common_Table_Spec
|
||||
from project.Util import all
|
||||
@ -715,6 +715,58 @@ spec =
|
||||
problems = [Duplicate_Output_Column_Names_Data ["A", "A", "A"]]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.group "Table.replace_text" <|
|
||||
Test.specify "should replace text in full-text table columns" <|
|
||||
bools = ["bools", [False, False, True, True, False]]
|
||||
texts = ["texts", ["foo", "foo", "bar", "baz", "spam"]]
|
||||
table = Table.new [bools, texts]
|
||||
actual = table.replace_text "texts" "a" "o"
|
||||
actual.at "bools" . to_vector . should_equal [False, False, True, True, False]
|
||||
actual.at "texts" . to_vector . should_equal ["foo", "foo", "bor", "boz", "spom"]
|
||||
Problems.assume_no_problems actual
|
||||
|
||||
Test.specify "should replace text in mixed columns" <|
|
||||
bools = ["bools", [False, False, True, True, False]]
|
||||
mixed = ["mixed", ["foo", 5, "bar", False, "spam"]]
|
||||
table = Table.new [bools, mixed]
|
||||
actual = table.replace_text "mixed" "a" "o"
|
||||
actual.at "bools" . to_vector . should_equal [False, False, True, True, False]
|
||||
actual.at "mixed" . to_vector . should_equal ["foo", 5, "bor", False, "spom"]
|
||||
Problems.assume_no_problems actual
|
||||
|
||||
Test.specify "should support operating on multiple columns at once" <|
|
||||
bools = ["bools", [False, False, True]]
|
||||
texts1 = ["texts1", ["foo", "bar", "baz"]]
|
||||
texts2 = ["texts2", ["baz", "quux", "spam"]]
|
||||
table = Table.new [bools, texts1, texts2]
|
||||
actual = table.replace_text (Column_Selector.By_Name ["texts1", "texts2"]) "a" "o"
|
||||
actual.at "bools" . to_vector . should_equal [False, False, True]
|
||||
actual.at "texts1" . to_vector . should_equal ["foo", "bor", "boz"]
|
||||
actual.at "texts2" . to_vector . should_equal ["boz", "quux", "spom"]
|
||||
Problems.assume_no_problems actual
|
||||
|
||||
Test.specify "should support regex replacement" <|
|
||||
bools = ["bools", [False, False, True, True, False]]
|
||||
texts = ["texts", ["foo", "bar", "baz", "spam"]]
|
||||
table = Table.new [bools, texts]
|
||||
actual = table.replace_text "texts" "(a|o)" "$1e" matcher=Regex_Matcher.Regex_Matcher_Data
|
||||
actual.at "texts" . to_vector . should_equal ["foeoe", "baer", "baez", "spaem"]
|
||||
Problems.assume_no_problems actual
|
||||
|
||||
Test.specify 'should return warnings and errors when passed a non-existent column' <|
|
||||
table = Table.new [["bools", [False, True]], ["texts", ["foo", "bar"]]]
|
||||
action = table.replace_text "invalid_name" "a" "b" on_problems=_
|
||||
tester = _.should_equal table
|
||||
problems = [Missing_Input_Columns_Data ['invalid_name']]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.specify "should return warnings and errors when selected non-text column" <|
|
||||
table = Table.new [["bools", [False, True]], ["texts", ["foo", "bar"]]]
|
||||
action = table.replace_text "bools" "a" "b" on_problems=_
|
||||
tester = _.should_equal table
|
||||
problems = [Invalid_Value_Type.Invalid_Value_Type_Data Value_Type.Char Value_Type.Boolean]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.group "[In-Memory] Table.aggregate" <|
|
||||
Test.specify "should return columns with correct types" <|
|
||||
dates = ["dates", [Date.new 1999, Date.new 2000, Date.new 2000, Date.new 2000]]
|
||||
|
Loading…
Reference in New Issue
Block a user