Add newlines option to text cleanse/replace (#10761)

* Auto-commit work in progress before clean build on 2024-08-06 11:32:46 * Fixed Regex and additional test * changelog * . * Make non-capturing
2024-12-22 13:41:39 +03:00 · 2024-08-06 16:59:54 +01:00 · 2024-08-06 16:59:54 +01:00 · 0f688d0a25
commit 0f688d0a25
parent f0de43a970
10 changed files with 28 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -6,9 +6,11 @@
  comparisons.][10614]
 - [Relative paths are now resolved relative to the project location, also in the
  Cloud.][10660]
+- [Added Newline option to Text_Cleanse/Text_Replace.][10761]

 [10614]: https://github.com/enso-org/enso/pull/10614
 [10660]: https://github.com/enso-org/enso/pull/10660
+[10761]: https://github.com/enso-org/enso/pull/10761

 # Enso 2023.3

--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso
@ -553,6 +553,7 @@ Text.replace self term:(Text | Regex) replacement:Text (case_sensitivity:Case_Se
      - ..Trailing_Whitespace: Removes all whitespace from the end of the string.
      - ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
      - ..All_Whitespace: Removes all whitespace from the string.
+      - ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
      - ..Leading_Numbers: Removes all numbers from the start of the string.
      - ..Trailing_Numbers: Removes all numbers from the end of the string.
      - ..Non_ASCII: Removes all non-ascii characters from the string.
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Named_Pattern.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Named_Pattern.enso
@ -14,6 +14,9 @@ type Named_Pattern
    ## Matches one or more whitespace characters anywhere in a string.
    All_Whitespace

+    ## Matches one or more newline characters.
+    Newlines
+
    ## Matches one or more digits at the beginning of a string.
    Leading_Numbers

@ -44,6 +47,7 @@ type Named_Pattern
        Named_Pattern.Trailing_Whitespace -> "\s+$"
        Named_Pattern.Duplicate_Whitespace -> "(?<=\s)\s+"
        Named_Pattern.All_Whitespace -> "\s+"
+        Named_Pattern.Newlines -> "(?:\r\n?|\n)"
        Named_Pattern.Leading_Numbers -> "^\d+"
        Named_Pattern.Trailing_Numbers -> "\d+$"
        Named_Pattern.Non_ASCII -> "[^\x00-\x7F]"
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Widget_Helpers.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Widget_Helpers.enso
@ -127,7 +127,7 @@ make_format_chooser include_number:Boolean=True include_date:Boolean=True includ
   Creates a Multiple_Choice Widget for selecting data cleanse operations.
 make_data_cleanse_vector_selector : Display -> Widget
 make_data_cleanse_vector_selector display:Display=Display.Always =
-    patterns = ['Leading_Whitespace', 'Trailing_Whitespace', 'Duplicate_Whitespace', 'All_Whitespace', 'Leading_Numbers', 'Trailing_Numbers', 'Non_ASCII', 'Tabs', 'Letters', 'Numbers', 'Punctuation', 'Symbols']
+    patterns = ['Leading_Whitespace', 'Trailing_Whitespace', 'Duplicate_Whitespace', 'All_Whitespace', "Newlines", 'Leading_Numbers', 'Trailing_Numbers', 'Non_ASCII', 'Tabs', 'Letters', 'Numbers', 'Punctuation', 'Symbols']
    options = patterns.map f-> Option f (".." + f)
    Widget.Multiple_Choice values=options display=display

@ -139,7 +139,7 @@ make_any_selector display:Display=..Always add_text:Boolean=False add_regex:Bool
        if add_text then builder.append (Option "<Text Value>" '""')
        if add_regex then builder.append (Option "<Regular Expression>" '(regex "^$")')
        if add_named_pattern then
-            patterns = ["Leading_Whitespace", "Trailing_Whitespace", "All_Whitespace", "Leading_Numbers", "Trailing_Numbers", "Non_ASCII", "Tabs", "Letters", "Numbers", "Punctuation", "Symbols"]
+            patterns = ["Leading_Whitespace", "Trailing_Whitespace", "All_Whitespace", "Newlines", "Leading_Numbers", "Trailing_Numbers", "Non_ASCII", "Tabs", "Letters", "Numbers", "Punctuation", "Symbols"]
            patterns.each p-> builder.append (Option "<"+p+">" "Named_Pattern."+p)
        if add_number then builder.append (Option "<Number Value>" "0")
        if add_boolean then builder.append (Option "<True/False>" "True")
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso
@ -1484,6 +1484,7 @@ type DB_Column
            - ..Trailing_Whitespace: Removes all whitspace from the end of the string.
            - ..Duplicate_Whitespace: Removes all duplicate whitspace from the string replacing it with the first whitespace character of the duplicated block.
            - ..All_Whitespace: Removes all whitspace from the string.
+            - ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
            - ..Leading_Numbers: Removes all numbers from the start of the string.
            - ..Trailing_Numbers: Removes all numbers from the end of the string.
            - ..Non_ASCII: Removes all non-ascii characters from the string.
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
@ -2951,6 +2951,7 @@ type DB_Table
             - ..Trailing_Whitespace: Removes all whitespace from the end of the string.
             - ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
             - ..All_Whitespace: Removes all whitespace from the string.
+             - ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
             - ..Leading_Numbers: Removes all numbers from the start of the string.
             - ..Trailing_Numbers: Removes all numbers from the end of the string.
             - ..Non_ASCII: Removes all non-ascii characters from the string.
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
@ -1483,6 +1483,7 @@ type Column
            - ..Trailing_Whitespace: Removes all whitspace from the end of the string.
            - ..Duplicate_Whitespace: Removes all duplicate whitspace from the string replacing it with the first whitespace character of the duplicated block.
            - ..All_Whitespace: Removes all whitspace from the string.
+            - ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
            - ..Leading_Numbers: Removes all numbers from the start of the string.
            - ..Trailing_Numbers: Removes all numbers from the end of the string.
            - ..Non_ASCII: Removes all non-ascii characters from the string.
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
@ -3002,6 +3002,7 @@ type Table
             - ..Trailing_Whitespace: Removes all whitespace from the end of the string.
             - ..Duplicate_Whitespace: Removes all duplicate whitespace from the string replacing it with the first whitespace character of the duplicated block.
             - ..All_Whitespace: Removes all whitespace from the string.
+             - ..Newlines: Removes all newline characters from the string. Line Feed and Carriage Return characters are considered newlines.
             - ..Leading_Numbers: Removes all numbers from the start of the string.
             - ..Trailing_Numbers: Removes all numbers from the end of the string.
             - ..Non_ASCII: Removes all non-ascii characters from the string.
--- a/test/Base_Tests/src/Data/Text_Spec.enso
+++ b/test/Base_Tests/src/Data/Text_Spec.enso
@ -1658,6 +1658,11 @@ add_specs suite_builder =
            expected = "XItXwasXtheXbestXofXtimesXitXwasXtheXworstXofXtimesX"
            res = input.replace Named_Pattern.All_Whitespace "X"
            res.should_equal expected
+        input_with_newlines = 'It was\r the best  of  times\n  it was  the  worst\r\n  of  times'
+        group_builder.specify "should replace newlines" <|
+            expected = 'It wasX the best  of  timesX  it was  the  worstX  of  times'
+            res = input_with_newlines.replace Named_Pattern.Newlines "X"
+            res.should_equal expected
        input2 = "1922  It was the best  of  times  it was  the  worst  of  times  1804"
        group_builder.specify "should replace leading numbers" <|
            expected = "X  It was the best  of  times  it was  the  worst  of  times  1804"
@ -1727,6 +1732,11 @@ add_specs suite_builder =
            expected = "Itwasthebestoftimesitwastheworstoftimes"
            res = input.cleanse [Named_Pattern.All_Whitespace]
            res.should_equal expected
+        input_with_newlines = 'It was\r the best  of  times\n  it was  the  worst\r\n  of  times'
+        group_builder.specify "should remove newlines" <|
+            expected = 'It was the best  of  times  it was  the  worst  of  times'
+            res = input_with_newlines.cleanse [Named_Pattern.Newlines]
+            res.should_equal expected
        input2 = "1922  It was the best  of  times  it was  the  worst  of  times  1804"
        group_builder.specify "should remove leading numbers" <|
            expected = "  It was the best  of  times  it was  the  worst  of  times  1804"
--- a/test/Table_Tests/src/Common_Table_Operations/Text_Cleanse_Spec.enso
+++ b/test/Table_Tests/src/Common_Table_Operations/Text_Cleanse_Spec.enso
@ -90,6 +90,11 @@ add_specs suite_builder setup =
            expected_col = Column.from_vector "Test" ["Itwas", "thebest", "oftimes", "itwastheworstoftimes"]
            res = test_col.text_cleanse [..All_Whitespace]
            res.should_equal expected_col
+        test_col_with_newlines = Column.from_vector "Test" ['\nIt was', 'the best\r', '\rof  times\n', '\r\nit was \r\n the  worst  of  times\n\r']
+        group_builder.specify "should remove all whitespace" <|
+            expected_col = Column.from_vector "Test" ["It was", "the best", "of  times", "it was  the  worst  of  times"]
+            res = test_col_with_newlines.text_cleanse [..Newlines]
+            res.should_equal expected_col
        test_col_with_nums = Column.from_vector "Test" ["1It was", "the best2", "3of  times4", " 1984 it was  the  worst  of  times  72"]
        group_builder.specify "should remove leading numbers" <|
            expected_col = Column.from_vector "Test" ["It was", "the best2", "of  times4", " 1984 it was  the  worst  of  times  72"]