mirror of
https://github.com/enso-org/enso.git
synced 2024-12-31 17:55:35 +03:00
Add newlines option to text cleanse/replace (#10761)
* Auto-commit work in progress before clean build on 2024-08-06 11:32:46 * Fixed Regex and additional test * changelog * . * Make non-capturing
This commit is contained in:
parent
f0de43a970
commit
0f688d0a25
@ -6,9 +6,11 @@
|
||||
comparisons.][10614]
|
||||
- [Relative paths are now resolved relative to the project location, also in the
|
||||
Cloud.][10660]
|
||||
- [Added Newline option to Text_Cleanse/Text_Replace.][10761]
|
||||
|
||||
[10614]: https://github.com/enso-org/enso/pull/10614
|
||||
[10660]: https://github.com/enso-org/enso/pull/10660
|
||||
[10761]: https://github.com/enso-org/enso/pull/10761
|
||||
|
||||
# Enso 2023.3
|
||||
|
||||
|
@ -553,6 +553,7 @@ Text.replace self term:(Text | Regex) replacement:Text (case_sensitivity:Case_Se
|
||||
- ..Trailing_Whitespace: Removes all whitespace from the end of the string.
|
||||
- ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
|
||||
- ..All_Whitespace: Removes all whitespace from the string.
|
||||
- ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
|
||||
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
||||
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
||||
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
||||
|
@ -14,6 +14,9 @@ type Named_Pattern
|
||||
## Matches one or more whitespace characters anywhere in a string.
|
||||
All_Whitespace
|
||||
|
||||
## Matches one or more newline characters.
|
||||
Newlines
|
||||
|
||||
## Matches one or more digits at the beginning of a string.
|
||||
Leading_Numbers
|
||||
|
||||
@ -44,6 +47,7 @@ type Named_Pattern
|
||||
Named_Pattern.Trailing_Whitespace -> "\s+$"
|
||||
Named_Pattern.Duplicate_Whitespace -> "(?<=\s)\s+"
|
||||
Named_Pattern.All_Whitespace -> "\s+"
|
||||
Named_Pattern.Newlines -> "(?:\r\n?|\n)"
|
||||
Named_Pattern.Leading_Numbers -> "^\d+"
|
||||
Named_Pattern.Trailing_Numbers -> "\d+$"
|
||||
Named_Pattern.Non_ASCII -> "[^\x00-\x7F]"
|
||||
|
@ -127,7 +127,7 @@ make_format_chooser include_number:Boolean=True include_date:Boolean=True includ
|
||||
Creates a Multiple_Choice Widget for selecting data cleanse operations.
|
||||
make_data_cleanse_vector_selector : Display -> Widget
|
||||
make_data_cleanse_vector_selector display:Display=Display.Always =
|
||||
patterns = ['Leading_Whitespace', 'Trailing_Whitespace', 'Duplicate_Whitespace', 'All_Whitespace', 'Leading_Numbers', 'Trailing_Numbers', 'Non_ASCII', 'Tabs', 'Letters', 'Numbers', 'Punctuation', 'Symbols']
|
||||
patterns = ['Leading_Whitespace', 'Trailing_Whitespace', 'Duplicate_Whitespace', 'All_Whitespace', "Newlines", 'Leading_Numbers', 'Trailing_Numbers', 'Non_ASCII', 'Tabs', 'Letters', 'Numbers', 'Punctuation', 'Symbols']
|
||||
options = patterns.map f-> Option f (".." + f)
|
||||
Widget.Multiple_Choice values=options display=display
|
||||
|
||||
@ -139,7 +139,7 @@ make_any_selector display:Display=..Always add_text:Boolean=False add_regex:Bool
|
||||
if add_text then builder.append (Option "<Text Value>" '""')
|
||||
if add_regex then builder.append (Option "<Regular Expression>" '(regex "^$")')
|
||||
if add_named_pattern then
|
||||
patterns = ["Leading_Whitespace", "Trailing_Whitespace", "All_Whitespace", "Leading_Numbers", "Trailing_Numbers", "Non_ASCII", "Tabs", "Letters", "Numbers", "Punctuation", "Symbols"]
|
||||
patterns = ["Leading_Whitespace", "Trailing_Whitespace", "All_Whitespace", "Newlines", "Leading_Numbers", "Trailing_Numbers", "Non_ASCII", "Tabs", "Letters", "Numbers", "Punctuation", "Symbols"]
|
||||
patterns.each p-> builder.append (Option "<"+p+">" "Named_Pattern."+p)
|
||||
if add_number then builder.append (Option "<Number Value>" "0")
|
||||
if add_boolean then builder.append (Option "<True/False>" "True")
|
||||
|
@ -1484,6 +1484,7 @@ type DB_Column
|
||||
- ..Trailing_Whitespace: Removes all whitspace from the end of the string.
|
||||
- ..Duplicate_Whitespace: Removes all duplicate whitspace from the string replacing it with the first whitespace character of the duplicated block.
|
||||
- ..All_Whitespace: Removes all whitspace from the string.
|
||||
- ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
|
||||
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
||||
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
||||
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
||||
|
@ -2951,6 +2951,7 @@ type DB_Table
|
||||
- ..Trailing_Whitespace: Removes all whitespace from the end of the string.
|
||||
- ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
|
||||
- ..All_Whitespace: Removes all whitespace from the string.
|
||||
- ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
|
||||
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
||||
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
||||
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
||||
|
@ -1483,6 +1483,7 @@ type Column
|
||||
- ..Trailing_Whitespace: Removes all whitspace from the end of the string.
|
||||
- ..Duplicate_Whitespace: Removes all duplicate whitspace from the string replacing it with the first whitespace character of the duplicated block.
|
||||
- ..All_Whitespace: Removes all whitspace from the string.
|
||||
- ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
|
||||
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
||||
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
||||
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
||||
|
@ -3002,6 +3002,7 @@ type Table
|
||||
- ..Trailing_Whitespace: Removes all whitespace from the end of the string.
|
||||
- ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
|
||||
- ..All_Whitespace: Removes all whitespace from the string.
|
||||
- ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
|
||||
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
||||
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
||||
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
||||
|
@ -1658,6 +1658,11 @@ add_specs suite_builder =
|
||||
expected = "XItXwasXtheXbestXofXtimesXitXwasXtheXworstXofXtimesX"
|
||||
res = input.replace Named_Pattern.All_Whitespace "X"
|
||||
res.should_equal expected
|
||||
input_with_newlines = 'It was\r the best of times\n it was the worst\r\n of times'
|
||||
group_builder.specify "should replace newlines" <|
|
||||
expected = 'It wasX the best of timesX it was the worstX of times'
|
||||
res = input_with_newlines.replace Named_Pattern.Newlines "X"
|
||||
res.should_equal expected
|
||||
input2 = "1922 It was the best of times it was the worst of times 1804"
|
||||
group_builder.specify "should replace leading numbers" <|
|
||||
expected = "X It was the best of times it was the worst of times 1804"
|
||||
@ -1727,6 +1732,11 @@ add_specs suite_builder =
|
||||
expected = "Itwasthebestoftimesitwastheworstoftimes"
|
||||
res = input.cleanse [Named_Pattern.All_Whitespace]
|
||||
res.should_equal expected
|
||||
input_with_newlines = 'It was\r the best of times\n it was the worst\r\n of times'
|
||||
group_builder.specify "should remove newlines" <|
|
||||
expected = 'It was the best of times it was the worst of times'
|
||||
res = input_with_newlines.cleanse [Named_Pattern.Newlines]
|
||||
res.should_equal expected
|
||||
input2 = "1922 It was the best of times it was the worst of times 1804"
|
||||
group_builder.specify "should remove leading numbers" <|
|
||||
expected = " It was the best of times it was the worst of times 1804"
|
||||
|
@ -90,6 +90,11 @@ add_specs suite_builder setup =
|
||||
expected_col = Column.from_vector "Test" ["Itwas", "thebest", "oftimes", "itwastheworstoftimes"]
|
||||
res = test_col.text_cleanse [..All_Whitespace]
|
||||
res.should_equal expected_col
|
||||
test_col_with_newlines = Column.from_vector "Test" ['\nIt was', 'the best\r', '\rof times\n', '\r\nit was \r\n the worst of times\n\r']
|
||||
group_builder.specify "should remove all whitespace" <|
|
||||
expected_col = Column.from_vector "Test" ["It was", "the best", "of times", "it was the worst of times"]
|
||||
res = test_col_with_newlines.text_cleanse [..Newlines]
|
||||
res.should_equal expected_col
|
||||
test_col_with_nums = Column.from_vector "Test" ["1It was", "the best2", "3of times4", " 1984 it was the worst of times 72"]
|
||||
group_builder.specify "should remove leading numbers" <|
|
||||
expected_col = Column.from_vector "Test" ["It was", "the best2", "of times4", " 1984 it was the worst of times 72"]
|
||||
|
Loading…
Reference in New Issue
Block a user