Add duplicates component (#10323)

* Update existing behaviou to match new * Add signatures * Red test * First test green * sbt javafmtAll * In-Memory working * Not implemeted for In-Db * Docs * Disable tests for in-db * Changelog * Code review changes * Fix * Fix * Fixc tests
2024-08-17 00:40:34 +03:00 · 2024-06-24 11:29:03 +01:00 · 2024-06-24 11:29:03 +01:00 · c324c78e23
commit c324c78e23
parent 791dba6729
12 changed files with 238 additions and 40 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -42,6 +42,7 @@
 - [Implemented `.cast` to and from `Decimal` columns for the in-memory
  database.][10206]
 - [Implemented fallback to Windows-1252 encoding for `Encoding.Default`.][10190]
+- [Added Table.duplicates component][10323]

 [debug-shortcuts]:

@ -50,6 +51,7 @@
 [10130]: https://github.com/enso-org/enso/pull/10130
 [10206]: https://github.com/enso-org/enso/pull/10206
 [10190]: https://github.com/enso-org/enso/pull/10190
+[10323]: https://github.com/enso-org/enso/pull/10323

 <br/>![Release Notes](/docs/assets/tags/release_notes.svg)

--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Array.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Array.enso
@ -357,9 +357,9 @@ type Array
       first duplicate appeared in the input.

       > Example
-         Removing repeating entries.
+         Removing unique entries.

-             [1, 3, 1, 2, 2, 1].to_array . duplicates == [1, 2].to_array
+             [1, 3, 1, 2, 2, 1].to_array . duplicates == [1, 1, 2, 2, 1].to_array
    duplicates : (Any -> Any) -> Vector Any
    duplicates self (on = x->x) =
        Array_Like_Helpers.duplicates self on
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso
@ -1227,9 +1227,9 @@ type Vector a
       first duplicate appeared in the input.

       > Example
-         Removing repeating entries.
+         Removing unique entries.

-             [1, 3, 1, 2, 2, 1] . duplicates == [1, 2]
+             [1, 3, 1, 2, 2, 1] . duplicates == [1, 1, 2, 2, 1]
    duplicates : (Any -> Any) -> Vector Any
    duplicates self (on = x->x) =
        Array_Like_Helpers.duplicates self on
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Array_Like_Helpers.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Internal/Array_Like_Helpers.enso
@ -165,11 +165,14 @@ distinct vector on =
                    existing.insert key True

 duplicates vector on = Vector.build builder->
-    vector.fold Map.empty current-> item->
+    counts = vector.fold Map.empty current-> item->
        key = on item
        count = current.get key 0
-        if count == 1 then builder.append item
        current.insert key count+1
+    vector.map item->
+        key = on item
+        count = counts.get key 0
+        if count != 1 then builder.append item

 take vector range = case range of
    ## We are using a specialized implementation for `take Sample`, because
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
@ -1323,9 +1323,7 @@ type DB_Table
           raised as an error regardless of the problem behavior, because it is
           not possible to create a table without any columns.
         - If a column in `columns` is not in the input table, a
-           `Missing_Input_Columns` is raised as an error, unless
-           `error_on_missing_columns` is set to `False`, in which case the
-           problem is reported according to the `on_problems` setting.
+           `Missing_Input_Columns` is raised as an error.
         - If no valid columns are selected, a `No_Input_Columns_Selected`, is
           reported as a dataflow error regardless of setting.
         - If floating points values are present in the distinct columns, a
@ -1333,13 +1331,46 @@ type DB_Table
           setting.
    @columns Widget_Helpers.make_column_name_multi_selector
    distinct : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> DB_Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
-    distinct self columns=self.column_names case_sensitivity:Case_Sensitivity=..Default error_on_missing_columns:Boolean=True on_problems:Problem_Behavior=Report_Warning =
-        key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _->
+    distinct self columns=self.column_names case_sensitivity:Case_Sensitivity=..Default on_problems:Problem_Behavior=Report_Warning =
+        key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=True on_problems=on_problems . catch No_Output_Columns _->
            Error.throw No_Input_Columns_Selected
        problem_builder = Problem_Builder.new
        new_table = self.connection.dialect.prepare_distinct self key_columns case_sensitivity problem_builder
        problem_builder.attach_problems_before on_problems new_table

+    ## GROUP Standard.Base.Selections
+       ICON preparation
+       Returns the set of rows which are duplicated within the specified columns from the
+       input table.
+
+       When multiple rows have the same values within the specified columns all of those rows are 
+       returned. Rows which are unique within the specified columns are removed.
+
+       Arguments:
+       - columns: The columns of the table to use for distinguishing the rows.
+       - case_sensitivity: Specifies if the text values should be compared case
+         sensitively.
+       - on_problems: Specifies how to handle if a problem occurs, raising as a
+         warning by default.
+
+       ! Error Conditions
+
+         - If there are no columns in the output table, a `No_Output_Columns` is
+           raised as an error regardless of the problem behavior, because it is
+           not possible to create a table without any columns.
+         - If a column in `columns` is not in the input table, a
+           `Missing_Input_Columns` is raised as an error.
+         - If no valid columns are selected, a `No_Input_Columns_Selected`, is
+           reported as a dataflow error regardless of setting.
+         - If floating points values are present in the distinct columns, a
+           `Floating_Point_Equality` is reported according to the `on_problems`
+           setting.
+    @columns Widget_Helpers.make_column_name_multi_selector
+    duplicates : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> DB_Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
+    duplicates self columns=self.column_names case_sensitivity:Case_Sensitivity=..Default on_problems:Problem_Behavior=..Report_Warning =
+        _ = [columns, case_sensitivity, on_problems]
+        Error.throw (Unsupported_Database_Operation.Error "DB_Table.duplicates is not implemented yet for the Database backends.")
+
    ## GROUP Standard.Base.Calculations
       ICON join
       Joins two tables according to the specified join conditions.
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso
@ -336,7 +336,7 @@ rename_columns (naming_helper : Column_Naming_Helper) (internal_columns:Vector)
                ## Attempt to treat as Map
                map = Map.from_vector mapping error_on_duplicates=False
                if map.length == mapping.length then rename_columns naming_helper internal_columns map case_sensitivity error_on_missing_columns on_problems else
-                    duplicates = mapping.duplicates on=_.first . map p->p.first.to_text
+                    duplicates = mapping.duplicates on=_.first . map p->p.first.to_text . distinct
                    duplicate_text = if duplicates.length < 5 then duplicates.to_vector . join ", " else
                        duplicates.take 3 . to_vector . join ", " + (", ... " + (duplicates.length - 3).to_text + " others")
                    Error.throw (Illegal_Argument.Error "duplicate old name mappings ("+duplicate_text+").")
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
@ -939,9 +939,7 @@ type Table
           raised as an error regardless of the problem behavior, because it is
           not possible to create a table without any columns.
         - If a column in `columns` is not in the input table, a
-           `Missing_Input_Columns` is raised as an error, unless
-           `error_on_missing_columns` is set to `False`, in which case the
-           problem is reported according to the `on_problems` setting.
+           `Missing_Input_Columns` is raised as an error.
         - If no valid columns are selected, a `No_Input_Columns_Selected`, is
           reported as a dataflow error regardless of setting.
         - If floating points values are present in the distinct columns, a
@ -949,8 +947,8 @@ type Table
           setting.
    @columns Widget_Helpers.make_column_name_multi_selector
    distinct : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
-    distinct self (columns = self.column_names) case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default error_on_missing_columns:Boolean=True on_problems:Problem_Behavior=..Report_Warning =
-        key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _->
+    distinct self (columns = self.column_names) case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default on_problems:Problem_Behavior=..Report_Warning =
+        key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=True on_problems=on_problems . catch No_Output_Columns _->
            Error.throw No_Input_Columns_Selected
        java_columns = key_columns.map c->c.java_column
        text_folding_strategy = Case_Sensitivity.folding_strategy case_sensitivity
@ -959,6 +957,45 @@ type Table
                self.java_table.distinct java_columns text_folding_strategy java_aggregator
        Table.Value java_table

+    ## GROUP Standard.Base.Selections
+       ICON preparation
+       Returns the set of rows which are duplicated within the specified columns from the
+       input table.
+
+       When multiple rows have the same values within the specified columns all of those rows are 
+       returned. Rows which are unique within the specified columns are removed.
+
+       Arguments:
+       - columns: The columns of the table to use for distinguishing the rows.
+       - case_sensitivity: Specifies if the text values should be compared case
+         sensitively.
+       - on_problems: Specifies how to handle if a problem occurs, raising as a
+         warning by default.
+
+       ! Error Conditions
+
+         - If there are no columns in the output table, a `No_Output_Columns` is
+           raised as an error regardless of the problem behavior, because it is
+           not possible to create a table without any columns.
+         - If a column in `columns` is not in the input table, a
+           `Missing_Input_Columns` is raised as an error.
+         - If no valid columns are selected, a `No_Input_Columns_Selected`, is
+           reported as a dataflow error regardless of setting.
+         - If floating points values are present in the distinct columns, a
+           `Floating_Point_Equality` is reported according to the `on_problems`
+           setting.
+    @columns Widget_Helpers.make_column_name_multi_selector
+    duplicates : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality
+    duplicates self (columns = self.column_names) case_sensitivity:Case_Sensitivity=..Default on_problems:Problem_Behavior=..Report_Warning =
+        key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=True on_problems=on_problems . catch No_Output_Columns _->
+            Error.throw No_Input_Columns_Selected
+        java_columns = key_columns.map c->c.java_column
+        text_folding_strategy = Case_Sensitivity.folding_strategy case_sensitivity
+        java_table = Illegal_Argument.handle_java_exception <|
+            Java_Problems.with_problem_aggregator on_problems java_aggregator->
+                self.java_table.duplicates java_columns text_folding_strategy java_aggregator
+        Table.Value java_table
+
    ## GROUP Standard.Base.Conversions
       ICON convert
       Parses columns within a `Table` to a specific value type.
--- a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
@ -239,6 +239,30 @@ public class Table {
    return new Table(newColumns);
  }

+  /**
+   * Creates a new table keeping only rows with distinct key columns.
+   *
+   * @param keyColumns set of columns to use as an index
+   * @param textFoldingStrategy a strategy for folding text columns
+   * @param problemAggregator an aggregator for problems
+   * @return a table where duplicate rows with the same key are removed
+   */
+  public Table duplicates(
+      Column[] keyColumns,
+      TextFoldingStrategy textFoldingStrategy,
+      ProblemAggregator problemAggregator) {
+    var rowsToKeep =
+        Distinct.buildDuplicatesRowsMask(
+            rowCount(), keyColumns, textFoldingStrategy, problemAggregator);
+    int cardinality = rowsToKeep.cardinality();
+    Column[] newColumns = new Column[this.columns.length];
+    for (int i = 0; i < this.columns.length; i++) {
+      newColumns[i] = this.columns[i].applyFilter(rowsToKeep, cardinality);
+    }
+
+    return new Table(newColumns);
+  }
+
  /**
   * Selects a subset of columns of this table, by names.
   *
--- a/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java
+++ b/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java
@ -2,8 +2,10 @@ package org.enso.table.operations;

 import java.util.Arrays;
 import java.util.BitSet;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import org.enso.base.text.TextFoldingStrategy;
 import org.enso.table.data.column.storage.Storage;
 import org.enso.table.data.index.MultiValueKeyBase;
@ -15,6 +17,7 @@ import org.enso.table.util.ConstantList;
 import org.graalvm.polyglot.Context;

 public class Distinct {
+
  /** Creates a row mask containing only the first row from sets of rows grouped by key columns. */
  public static BitSet buildDistinctRowsMask(
      int tableSize,
@ -50,4 +53,42 @@ public class Distinct {

    return mask;
  }
+
+  public static BitSet buildDuplicatesRowsMask(
+      int tableSize,
+      Column[] keyColumns,
+      TextFoldingStrategy textFoldingStrategy,
+      ProblemAggregator problemAggregator) {
+    ColumnAggregatedProblemAggregator groupingProblemAggregator =
+        new ColumnAggregatedProblemAggregator(problemAggregator);
+    Context context = Context.getCurrent();
+    var mask = new BitSet();
+    if (keyColumns.length != 0) {
+      Map<MultiValueKeyBase, Integer> visitedRows = new HashMap<>();
+      int size = keyColumns[0].getSize();
+      Storage<?>[] storage =
+          Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new);
+      List<TextFoldingStrategy> strategies = ConstantList.make(textFoldingStrategy, storage.length);
+      for (int i = 0; i < size; i++) {
+        UnorderedMultiValueKey key = new UnorderedMultiValueKey(storage, i, strategies);
+        key.checkAndReportFloatingEquality(
+            groupingProblemAggregator, columnIx -> keyColumns[columnIx].getName());
+
+        var keyIndex = visitedRows.get(key);
+        if (keyIndex == null) {
+          visitedRows.put(key, i);
+        } else {
+          mask.set(i);
+          mask.set(keyIndex);
+        }
+
+        context.safepoint();
+      }
+    } else {
+      // If there are no columns to distinct-by we just return the whole table.
+      mask.set(0, tableSize);
+    }
+
+    return mask;
+  }
 }
--- a/test/Base_Tests/src/Data/Vector_Spec.enso
+++ b/test/Base_Tests/src/Data/Vector_Spec.enso
@ -858,11 +858,11 @@ type_spec suite_builder name alter = suite_builder.group name group_builder->
        alter [1, 1.0, 2, 2.0] . distinct . should_equal [1, 2]
        alter [] . distinct . should_equal []

-    group_builder.specify "should return a vector containing only duplicate elements" <|
-        alter [1, 3, 1, 2, 2, 1] . duplicates . should_equal [1, 2]
-        alter ["a", "a", "a"] . duplicates . should_equal ["a"]
-        alter ['ś', 's', 's\u0301'] . duplicates . should_equal ['s\u0301']
-        alter [1, 1.0, 2, 2.0] . duplicates . should_equal [1.0, 2.0]
+    group_builder.specify "should return a vector containing duplicate elements" <|
+        alter [1, 3, 1, 2, 2, 1] . duplicates . should_equal [1, 1, 2, 2, 1]
+        alter ["a", "a", "a"] . duplicates . should_equal ["a", "a", "a"]
+        alter ['ś', 's', 's\u0301'] . duplicates . should_equal ['ś', 's\u0301']
+        alter [1, 1.0, 2, 2.0] . duplicates . should_equal [1, 1.0, 2, 2.0]
        alter [] . duplicates . should_equal []

    group_builder.specify "should be able to handle distinct on different primitive values" <|
--- a/test/Table_Tests/src/Common_Table_Operations/Distinct_Spec.enso
+++ b/test/Table_Tests/src/Common_Table_Operations/Distinct_Spec.enso
@ -128,21 +128,81 @@ add_specs suite_builder setup =
                t5 = t1.distinct [] on_problems=pb
                t5.should_fail_with No_Input_Columns_Selected

-                t6 = t1.distinct [] error_on_missing_columns=False on_problems=pb
-                t6.should_fail_with No_Input_Columns_Selected
+    db_todo = if setup.is_database.not then Nothing else "Table.duplicates is not implemented yet in Database."
+    suite_builder.group setup.prefix+"Table.duplicates" pending=db_todo group_builder->
+        data = Data.setup create_connection_fn

-            # When not erroring about missing columns, No_Input_Columns_Selected will still yield an error.
-            t7 = t1.distinct ["Y", "Z"] error_on_missing_columns=False on_problems=Problem_Behavior.Report_Warning
-            t7.should_fail_with No_Input_Columns_Selected
+        group_builder.teardown <|
+            data.teardown

-            action2 = t1.distinct ["X", "Y"] error_on_missing_columns=False on_problems=_
-            tester2 table =
-                table.at "X" . to_vector . should_equal [1, 2, 3]
-            problems2 = [Missing_Input_Columns.Error ["Y"]]
-            Problems.test_problem_handling action2 problems2 tester2
+        table_builder cols =
+            setup.table_builder cols connection=data.connection

-            action3 = t1.distinct [0, 42] error_on_missing_columns=False on_problems=_
-            tester3 table =
-                table.at "X" . to_vector . should_equal [1, 2, 3]
-            problems3 = [Missing_Input_Columns.Error [42]]
-            Problems.test_problem_handling action3 problems3 tester3
+        group_builder.specify "should group by all columns by default" <|
+            a = ["A", ["a", "a", "b", "b", "c"]]
+            b = ["B", [1, 1, 1, 2, 1]]
+            t = table_builder [a, b]
+            r = t.duplicates on_problems=Report_Error |> materialize |> _.order_by ["A", "B"]
+            r.at "A" . to_vector . should_equal ["a", "a"]
+            r.at "B" . to_vector . should_equal [1, 1]
+
+        group_builder.specify "should allow to select duplicates rows based on a subset of columns, returning any row from each group" <|
+            a = ["A", ["a", "a", "a", "a", "a", "a"]]
+            b = ["B", [1, 1, 2, 2, 1, 3]]
+            c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
+            t = table_builder [a, b, c]
+
+            r1 = t.duplicates ["A"] on_problems=Report_Error |> materialize
+            r1.at "A" . to_vector . should_equal ["a", "a", "a", "a", "a", "a"]
+            r1.at "B" . to_vector . should_equal [1, 1, 2, 2, 1, 3]
+            r1.at "C" . to_vector . should_equal [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
+
+            r2 = t.duplicates ["A", "B"] on_problems=Report_Error |> materialize
+            r2.at "A" . to_vector . should_equal ["a", "a", "a", "a", "a"]
+            r2.at "B" . to_vector . should_equal [1, 1, 2, 2, 1]
+            r2.at "C" . to_vector . should_equal [0.1, 0.2, 0.3, 0.4, 0.5]
+
+        group_builder.specify "should allow to control case-sensitivity of keys" <|
+            x = ["X", ['A', 'a', 'enso', 'Enso', 'A']]
+            t1 = table_builder [x]
+            d1 = t1.duplicates ["X"] on_problems=Report_Error |> materialize |> _.order_by ["X"]
+            d1.at "X" . to_vector . should_equal ['A', 'A']
+
+            d2 = t1.duplicates ["X"] case_sensitivity=Case_Sensitivity.Insensitive on_problems=Report_Error |> materialize |> _.order_by ["X"]
+            d2.at "X" . to_vector . should_equal ['A', 'A', 'Enso', 'a', 'enso']
+
+        group_builder.specify "should report a warning if the key contains floating point values" <|
+            t1 = table_builder [["X", [3.0, 1.0, 2.0, 2.0, 1.0]]]
+            action1 = t1.duplicates on_problems=_
+            tester1 table =
+                v = table.at "X" . to_vector
+                v.length . should_equal 4
+                v.fold 0 (+) . should_equal 6.0
+            problems1 = [Floating_Point_Equality.Error "X"]
+            Problems.test_problem_handling action1 problems1 tester1
+
+        group_builder.specify "should handle nulls correctly" <|
+            a = ["A", ["a", Nothing, "b", Nothing]]
+            b = ["B", [1, 2, 3, 4]]
+            t = table_builder [a, b]
+            r = t.duplicates ["A"] on_problems=Report_Error |> materialize |> _.order_by "B"
+            r.at "A" . to_vector . should_equal [Nothing, Nothing]
+            r.at "B" . to_vector . should_equal [2, 4]
+
+        group_builder.specify "should report missing input columns" <|
+            t1 = table_builder [["X", [1, 2, 3, 2, 2]]]
+            [Problem_Behavior.Ignore, Problem_Behavior.Report_Warning, Problem_Behavior.Report_Error].each pb->
+                t2 = t1.duplicates ["Y", "Z"] on_problems=pb
+                t2.should_fail_with Missing_Input_Columns
+                t2.catch . should_equal (Missing_Input_Columns.Error ["Y", "Z"])
+
+                t3 = t1.duplicates ["X", "Y"] on_problems=pb
+                t3.should_fail_with Missing_Input_Columns
+                t3.catch . should_equal (Missing_Input_Columns.Error ["Y"])
+
+                t4 = t1.duplicates [0, 42] on_problems=pb
+                t4.should_fail_with Missing_Input_Columns
+                t4.catch . should_equal (Missing_Input_Columns.Error [42])
+
+                t5 = t1.duplicates [] on_problems=pb
+                t5.should_fail_with No_Input_Columns_Selected
--- a/test/Table_Tests/src/Common_Table_Operations/Select_Columns_Spec.enso
+++ b/test/Table_Tests/src/Common_Table_Operations/Select_Columns_Spec.enso
@ -584,10 +584,10 @@ add_specs suite_builder setup =
                result.catch Any . message . should_equal message

            test_duplicate_names [["Alpha", "1"], ["Alpha", "2"]] "duplicate old name mappings (Alpha)."
-            test_duplicate_names [["Alpha", "1"], ["Beta", "2"], ["Gamma", "3"], ["Beta", "4"], ["Alpha", "5"]] "duplicate old name mappings (Beta, Alpha)."
+            test_duplicate_names [["Alpha", "1"], ["Beta", "2"], ["Gamma", "3"], ["Beta", "4"], ["Alpha", "5"]] "duplicate old name mappings (Alpha, Beta)."
            test_duplicate_names [["Alpha", "1"], ["Alpha", "2"], ["Alpha", "3"]] "duplicate old name mappings (Alpha)."
-            test_duplicate_names [["Alpha", "1"], ["Beta", "2"], ["Gamma", "3"], ["Beta", "4"], ["Alpha", "5"], ["Gamma","6"], ["Delta","7"], ["Delta","8"]] "duplicate old name mappings (Beta, Alpha, Gamma, Delta)."
-            test_duplicate_names [["Alpha", "1"], ["Beta", "2"], ["Gamma", "3"], ["Beta", "4"], ["Alpha", "5"], ["Gamma","6"], ["Delta","7"], ["Delta","8"], ["Echo","9"], ["Echo","10"]] "duplicate old name mappings (Beta, Alpha, Gamma, ... 2 others)."
+            test_duplicate_names [["Alpha", "1"], ["Beta", "2"], ["Gamma", "3"], ["Beta", "4"], ["Alpha", "5"], ["Gamma","6"], ["Delta","7"], ["Delta","8"]] "duplicate old name mappings (Alpha, Beta, Gamma, Delta)."
+            test_duplicate_names [["Alpha", "1"], ["Beta", "2"], ["Gamma", "3"], ["Beta", "4"], ["Alpha", "5"], ["Gamma","6"], ["Delta","7"], ["Delta","8"], ["Echo","9"], ["Echo","10"]] "duplicate old name mappings (Alpha, Beta, Gamma, ... 2 others)."

        group_builder.specify "should correctly handle problems: unmatched names" <|
            weird_name = '.*?-!@#!"'