mirror of
https://github.com/enso-org/enso.git
synced 2025-01-03 16:23:25 +03:00
Add newlines option to text cleanse/replace (#10761)
* Auto-commit work in progress before clean build on 2024-08-06 11:32:46 * Fixed Regex and additional test * changelog * . * Make non-capturing
This commit is contained in:
parent
f0de43a970
commit
0f688d0a25
@ -6,9 +6,11 @@
|
|||||||
comparisons.][10614]
|
comparisons.][10614]
|
||||||
- [Relative paths are now resolved relative to the project location, also in the
|
- [Relative paths are now resolved relative to the project location, also in the
|
||||||
Cloud.][10660]
|
Cloud.][10660]
|
||||||
|
- [Added Newline option to Text_Cleanse/Text_Replace.][10761]
|
||||||
|
|
||||||
[10614]: https://github.com/enso-org/enso/pull/10614
|
[10614]: https://github.com/enso-org/enso/pull/10614
|
||||||
[10660]: https://github.com/enso-org/enso/pull/10660
|
[10660]: https://github.com/enso-org/enso/pull/10660
|
||||||
|
[10761]: https://github.com/enso-org/enso/pull/10761
|
||||||
|
|
||||||
# Enso 2023.3
|
# Enso 2023.3
|
||||||
|
|
||||||
|
@ -553,6 +553,7 @@ Text.replace self term:(Text | Regex) replacement:Text (case_sensitivity:Case_Se
|
|||||||
- ..Trailing_Whitespace: Removes all whitespace from the end of the string.
|
- ..Trailing_Whitespace: Removes all whitespace from the end of the string.
|
||||||
- ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
|
- ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
|
||||||
- ..All_Whitespace: Removes all whitespace from the string.
|
- ..All_Whitespace: Removes all whitespace from the string.
|
||||||
|
- ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
|
||||||
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
||||||
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
||||||
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
||||||
|
@ -14,6 +14,9 @@ type Named_Pattern
|
|||||||
## Matches one or more whitespace characters anywhere in a string.
|
## Matches one or more whitespace characters anywhere in a string.
|
||||||
All_Whitespace
|
All_Whitespace
|
||||||
|
|
||||||
|
## Matches one or more newline characters.
|
||||||
|
Newlines
|
||||||
|
|
||||||
## Matches one or more digits at the beginning of a string.
|
## Matches one or more digits at the beginning of a string.
|
||||||
Leading_Numbers
|
Leading_Numbers
|
||||||
|
|
||||||
@ -44,6 +47,7 @@ type Named_Pattern
|
|||||||
Named_Pattern.Trailing_Whitespace -> "\s+$"
|
Named_Pattern.Trailing_Whitespace -> "\s+$"
|
||||||
Named_Pattern.Duplicate_Whitespace -> "(?<=\s)\s+"
|
Named_Pattern.Duplicate_Whitespace -> "(?<=\s)\s+"
|
||||||
Named_Pattern.All_Whitespace -> "\s+"
|
Named_Pattern.All_Whitespace -> "\s+"
|
||||||
|
Named_Pattern.Newlines -> "(?:\r\n?|\n)"
|
||||||
Named_Pattern.Leading_Numbers -> "^\d+"
|
Named_Pattern.Leading_Numbers -> "^\d+"
|
||||||
Named_Pattern.Trailing_Numbers -> "\d+$"
|
Named_Pattern.Trailing_Numbers -> "\d+$"
|
||||||
Named_Pattern.Non_ASCII -> "[^\x00-\x7F]"
|
Named_Pattern.Non_ASCII -> "[^\x00-\x7F]"
|
||||||
|
@ -127,7 +127,7 @@ make_format_chooser include_number:Boolean=True include_date:Boolean=True includ
|
|||||||
Creates a Multiple_Choice Widget for selecting data cleanse operations.
|
Creates a Multiple_Choice Widget for selecting data cleanse operations.
|
||||||
make_data_cleanse_vector_selector : Display -> Widget
|
make_data_cleanse_vector_selector : Display -> Widget
|
||||||
make_data_cleanse_vector_selector display:Display=Display.Always =
|
make_data_cleanse_vector_selector display:Display=Display.Always =
|
||||||
patterns = ['Leading_Whitespace', 'Trailing_Whitespace', 'Duplicate_Whitespace', 'All_Whitespace', 'Leading_Numbers', 'Trailing_Numbers', 'Non_ASCII', 'Tabs', 'Letters', 'Numbers', 'Punctuation', 'Symbols']
|
patterns = ['Leading_Whitespace', 'Trailing_Whitespace', 'Duplicate_Whitespace', 'All_Whitespace', "Newlines", 'Leading_Numbers', 'Trailing_Numbers', 'Non_ASCII', 'Tabs', 'Letters', 'Numbers', 'Punctuation', 'Symbols']
|
||||||
options = patterns.map f-> Option f (".." + f)
|
options = patterns.map f-> Option f (".." + f)
|
||||||
Widget.Multiple_Choice values=options display=display
|
Widget.Multiple_Choice values=options display=display
|
||||||
|
|
||||||
@ -139,7 +139,7 @@ make_any_selector display:Display=..Always add_text:Boolean=False add_regex:Bool
|
|||||||
if add_text then builder.append (Option "<Text Value>" '""')
|
if add_text then builder.append (Option "<Text Value>" '""')
|
||||||
if add_regex then builder.append (Option "<Regular Expression>" '(regex "^$")')
|
if add_regex then builder.append (Option "<Regular Expression>" '(regex "^$")')
|
||||||
if add_named_pattern then
|
if add_named_pattern then
|
||||||
patterns = ["Leading_Whitespace", "Trailing_Whitespace", "All_Whitespace", "Leading_Numbers", "Trailing_Numbers", "Non_ASCII", "Tabs", "Letters", "Numbers", "Punctuation", "Symbols"]
|
patterns = ["Leading_Whitespace", "Trailing_Whitespace", "All_Whitespace", "Newlines", "Leading_Numbers", "Trailing_Numbers", "Non_ASCII", "Tabs", "Letters", "Numbers", "Punctuation", "Symbols"]
|
||||||
patterns.each p-> builder.append (Option "<"+p+">" "Named_Pattern."+p)
|
patterns.each p-> builder.append (Option "<"+p+">" "Named_Pattern."+p)
|
||||||
if add_number then builder.append (Option "<Number Value>" "0")
|
if add_number then builder.append (Option "<Number Value>" "0")
|
||||||
if add_boolean then builder.append (Option "<True/False>" "True")
|
if add_boolean then builder.append (Option "<True/False>" "True")
|
||||||
|
@ -1484,6 +1484,7 @@ type DB_Column
|
|||||||
- ..Trailing_Whitespace: Removes all whitspace from the end of the string.
|
- ..Trailing_Whitespace: Removes all whitspace from the end of the string.
|
||||||
- ..Duplicate_Whitespace: Removes all duplicate whitspace from the string replacing it with the first whitespace character of the duplicated block.
|
- ..Duplicate_Whitespace: Removes all duplicate whitspace from the string replacing it with the first whitespace character of the duplicated block.
|
||||||
- ..All_Whitespace: Removes all whitspace from the string.
|
- ..All_Whitespace: Removes all whitspace from the string.
|
||||||
|
- ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
|
||||||
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
||||||
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
||||||
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
||||||
|
@ -2951,6 +2951,7 @@ type DB_Table
|
|||||||
- ..Trailing_Whitespace: Removes all whitespace from the end of the string.
|
- ..Trailing_Whitespace: Removes all whitespace from the end of the string.
|
||||||
- ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
|
- ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
|
||||||
- ..All_Whitespace: Removes all whitespace from the string.
|
- ..All_Whitespace: Removes all whitespace from the string.
|
||||||
|
- ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
|
||||||
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
||||||
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
||||||
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
||||||
|
@ -1483,6 +1483,7 @@ type Column
|
|||||||
- ..Trailing_Whitespace: Removes all whitspace from the end of the string.
|
- ..Trailing_Whitespace: Removes all whitspace from the end of the string.
|
||||||
- ..Duplicate_Whitespace: Removes all duplicate whitspace from the string replacing it with the first whitespace character of the duplicated block.
|
- ..Duplicate_Whitespace: Removes all duplicate whitspace from the string replacing it with the first whitespace character of the duplicated block.
|
||||||
- ..All_Whitespace: Removes all whitspace from the string.
|
- ..All_Whitespace: Removes all whitspace from the string.
|
||||||
|
- ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
|
||||||
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
||||||
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
||||||
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
||||||
|
@ -3002,6 +3002,7 @@ type Table
|
|||||||
- ..Trailing_Whitespace: Removes all whitespace from the end of the string.
|
- ..Trailing_Whitespace: Removes all whitespace from the end of the string.
|
||||||
- ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
|
- ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
|
||||||
- ..All_Whitespace: Removes all whitespace from the string.
|
- ..All_Whitespace: Removes all whitespace from the string.
|
||||||
|
- ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
|
||||||
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
- ..Leading_Numbers: Removes all numbers from the start of the string.
|
||||||
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
- ..Trailing_Numbers: Removes all numbers from the end of the string.
|
||||||
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
- ..Non_ASCII: Removes all non-ascii characters from the string.
|
||||||
|
@ -1658,6 +1658,11 @@ add_specs suite_builder =
|
|||||||
expected = "XItXwasXtheXbestXofXtimesXitXwasXtheXworstXofXtimesX"
|
expected = "XItXwasXtheXbestXofXtimesXitXwasXtheXworstXofXtimesX"
|
||||||
res = input.replace Named_Pattern.All_Whitespace "X"
|
res = input.replace Named_Pattern.All_Whitespace "X"
|
||||||
res.should_equal expected
|
res.should_equal expected
|
||||||
|
input_with_newlines = 'It was\r the best of times\n it was the worst\r\n of times'
|
||||||
|
group_builder.specify "should replace newlines" <|
|
||||||
|
expected = 'It wasX the best of timesX it was the worstX of times'
|
||||||
|
res = input_with_newlines.replace Named_Pattern.Newlines "X"
|
||||||
|
res.should_equal expected
|
||||||
input2 = "1922 It was the best of times it was the worst of times 1804"
|
input2 = "1922 It was the best of times it was the worst of times 1804"
|
||||||
group_builder.specify "should replace leading numbers" <|
|
group_builder.specify "should replace leading numbers" <|
|
||||||
expected = "X It was the best of times it was the worst of times 1804"
|
expected = "X It was the best of times it was the worst of times 1804"
|
||||||
@ -1727,6 +1732,11 @@ add_specs suite_builder =
|
|||||||
expected = "Itwasthebestoftimesitwastheworstoftimes"
|
expected = "Itwasthebestoftimesitwastheworstoftimes"
|
||||||
res = input.cleanse [Named_Pattern.All_Whitespace]
|
res = input.cleanse [Named_Pattern.All_Whitespace]
|
||||||
res.should_equal expected
|
res.should_equal expected
|
||||||
|
input_with_newlines = 'It was\r the best of times\n it was the worst\r\n of times'
|
||||||
|
group_builder.specify "should remove newlines" <|
|
||||||
|
expected = 'It was the best of times it was the worst of times'
|
||||||
|
res = input_with_newlines.cleanse [Named_Pattern.Newlines]
|
||||||
|
res.should_equal expected
|
||||||
input2 = "1922 It was the best of times it was the worst of times 1804"
|
input2 = "1922 It was the best of times it was the worst of times 1804"
|
||||||
group_builder.specify "should remove leading numbers" <|
|
group_builder.specify "should remove leading numbers" <|
|
||||||
expected = " It was the best of times it was the worst of times 1804"
|
expected = " It was the best of times it was the worst of times 1804"
|
||||||
|
@ -90,6 +90,11 @@ add_specs suite_builder setup =
|
|||||||
expected_col = Column.from_vector "Test" ["Itwas", "thebest", "oftimes", "itwastheworstoftimes"]
|
expected_col = Column.from_vector "Test" ["Itwas", "thebest", "oftimes", "itwastheworstoftimes"]
|
||||||
res = test_col.text_cleanse [..All_Whitespace]
|
res = test_col.text_cleanse [..All_Whitespace]
|
||||||
res.should_equal expected_col
|
res.should_equal expected_col
|
||||||
|
test_col_with_newlines = Column.from_vector "Test" ['\nIt was', 'the best\r', '\rof times\n', '\r\nit was \r\n the worst of times\n\r']
|
||||||
|
group_builder.specify "should remove all whitespace" <|
|
||||||
|
expected_col = Column.from_vector "Test" ["It was", "the best", "of times", "it was the worst of times"]
|
||||||
|
res = test_col_with_newlines.text_cleanse [..Newlines]
|
||||||
|
res.should_equal expected_col
|
||||||
test_col_with_nums = Column.from_vector "Test" ["1It was", "the best2", "3of times4", " 1984 it was the worst of times 72"]
|
test_col_with_nums = Column.from_vector "Test" ["1It was", "the best2", "3of times4", " 1984 it was the worst of times 72"]
|
||||||
group_builder.specify "should remove leading numbers" <|
|
group_builder.specify "should remove leading numbers" <|
|
||||||
expected_col = Column.from_vector "Test" ["It was", "the best2", "of times4", " 1984 it was the worst of times 72"]
|
expected_col = Column.from_vector "Test" ["It was", "the best2", "of times4", " 1984 it was the worst of times 72"]
|
||||||
|
Loading…
Reference in New Issue
Block a user