Constant columns (in expressions and Column_Operations) should have clearer names (#8188)

Previously, constant columns were given generated names with UUIDs in them, which are long and provide no information. Instead, we now use the constant value itself to form the name.

Since these new generated names are less unique, we must explicitly make them unique, in cases where the caller did not explicilty set a name.
This commit is contained in:
GregoryTravis 2023-11-01 10:41:03 -04:00 committed by GitHub
parent 8bc17bd370
commit d467683ed1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 76 additions and 9 deletions

View File

@ -835,6 +835,10 @@ type Table
@new_name Widget_Helpers.make_column_name_selector
set : Column | Text | Array | Vector | Range | Date_Range | Constant_Column | Column_Operation -> Text -> Set_Mode -> Problem_Behavior -> Table ! Existing_Column | Missing_Column | No_Such_Column | Expression_Error
set self column new_name="" set_mode=Set_Mode.Add_Or_Update on_problems=Report_Warning =
problem_builder = Problem_Builder.new
unique = self.column_naming_helper.create_unique_name_strategy
unique.mark_used self.column_names
resolved = case column of
_ : Text -> self.evaluate_expression column on_problems
_ : Column ->
@ -848,19 +852,31 @@ type Table
_ : Date_Range -> Error.throw (Unsupported_Database_Operation.Error "Cannot use `Date_Range` for `set` in the database.")
_ -> Error.throw (Illegal_Argument.Error "Unsupported type for `Table.set`.")
renamed = if new_name == "" then resolved else resolved.rename new_name
renamed.if_not_error <| self.column_naming_helper.check_ambiguity self.column_names new_name <|
name_is_generated column = case column of
_ : Column -> False
_ -> True
## If `new_name` was specified, use that. Otherwise, if `column` is a
`Column`, use its name. In these two cases, do not make it unique.
Otherwise, make it unique.
new_column_name = if new_name != "" then new_name else
if name_is_generated column then unique.make_unique resolved.name else resolved.name
renamed = resolved.rename new_column_name
renamed.if_not_error <| self.column_naming_helper.check_ambiguity self.column_names renamed.name <|
index = self.internal_columns.index_of (c -> c.name == renamed.name)
check_add = case set_mode of
Set_Mode.Add_Or_Update -> True
Set_Mode.Add -> if index.is_nothing then True else Error.throw (Existing_Column.Error renamed.name)
Set_Mode.Update -> if index.is_nothing then Error.throw (Missing_Column.Error renamed.name) else True
check_add.if_not_error <|
new_table = check_add.if_not_error <|
new_col = renamed.as_internal
new_cols = if index.is_nothing then self.internal_columns + [new_col] else
Vector.new self.column_count i-> if i == index then new_col else self.internal_columns.at i
self.updated_columns new_cols
problem_builder.report_unique_name_strategy unique
problem_builder.attach_problems_after on_problems new_table
## Given an expression, create a derived column where each value is the
result of evaluating the expression for the row.
@ -905,7 +921,7 @@ type Table
_ -> type_mapping.value_type_to_sql argument_value_type Problem_Behavior.Ignore
expr = SQL_Expression.Constant value
new_type_ref = SQL_Type_Reference.from_constant sql_type
Column.Value ("Constant_" + UUID.randomUUID.to_text) self.connection new_type_ref expr self.context
Column.Value value.pretty self.connection new_type_ref expr self.context
## PRIVATE
Create a unique temporary column name.

View File

@ -1475,6 +1475,10 @@ type Table
@column Column_Operation.default_widget
set : Text | Column -> Text -> Set_Mode -> Problem_Behavior -> Table ! Existing_Column | Missing_Column | No_Such_Column | Expression_Error
set self column:(Text | Column | Constant_Column | Column_Operation) new_name="" set_mode=Set_Mode.Add_Or_Update on_problems=Report_Warning =
problem_builder = Problem_Builder.new
unique = self.column_naming_helper.create_unique_name_strategy
unique.mark_used self.column_names
resolved = case column of
_ : Text -> self.evaluate_expression column on_problems
_ : Column -> column
@ -1482,8 +1486,17 @@ type Table
_ : Column_Operation -> column.evaluate self (set_mode==Set_Mode.Update && new_name=="") on_problems
_ -> Error.throw (Illegal_Argument.Error "Unsupported type for `Table.set`.")
renamed = if new_name == "" then resolved else resolved.rename new_name
renamed.if_not_error <| self.column_naming_helper.check_ambiguity self.column_names new_name <|
name_is_generated column = case column of
_ : Column -> False
_ -> True
## If `new_name` was specified, use that. Otherwise, if `column` is a
`Column`, use its name. In these two cases, do not make it unique.
Otherwise, make it unique.
new_column_name = if new_name != "" then new_name else
if name_is_generated column then unique.make_unique resolved.name else resolved.name
renamed = resolved.rename new_column_name
renamed.if_not_error <| self.column_naming_helper.check_ambiguity self.column_names renamed.name <|
check_add_mode = case set_mode of
Set_Mode.Add_Or_Update -> True
Set_Mode.Add -> if self.java_table.getColumnByName renamed.name . is_nothing then True else
@ -1491,10 +1504,13 @@ type Table
Set_Mode.Update -> if self.java_table.getColumnByName renamed.name . is_nothing . not then True else
Error.throw (Missing_Column.Error renamed.name)
check_add_mode.if_not_error <|
new_table = check_add_mode.if_not_error <|
if resolved.length != self.row_count then Error.throw (Row_Count_Mismatch.Error self.row_count resolved.length) else
Table.Value (self.java_table.addOrReplaceColumn renamed.java_column)
problem_builder.report_unique_name_strategy unique
problem_builder.attach_problems_after on_problems new_table
## Given an expression, create a derived column where each value is the
result of evaluating the expression for the row.
@ -1532,7 +1548,7 @@ type Table
make_constant_column : Any -> Column
make_constant_column self value =
if Table_Helpers.is_column value then Error.throw (Illegal_Argument.Error "A constant value may only be created from a scalar, not a Column") else
Column.from_vector_repeated ("Constant_" + UUID.randomUUID.to_text) [value] self.row_count
Column.from_vector_repeated value.pretty [value] self.row_count
## PRIVATE
Create a unique temporary column name.

View File

@ -1237,7 +1237,7 @@ spec setup =
Test.specify "Should create the correct column name" <|
t = table_builder [["x", ["1", "2", "3"]]]
t.at "x" . const 12 . name . take 9 . should_equal "Constant_"
t.at "x" . const 12 . name . should_equal "12"
Test.specify "Should not allow the creation of a constant column of columns" <|
t = table_builder [["x", ["1", "2", "3"]]]

View File

@ -197,3 +197,38 @@ spec setup =
t.set (Column_Operation.Add (Column_Ref.Name "X") (Column_Ref.Name "Z")) . should_fail_with No_Such_Column
t.set (Column_Operation.Not (Column_Ref.Name "zzz")) . should_fail_with No_Such_Column
t.set (Column_Operation.Not (Column_Ref.Index 42)) . should_fail_with Index_Out_Of_Bounds
Test.group "Unique derived column names" <|
Test.specify "Should disambiguate two derived columns that would otherwise have had the same name" <|
t = table_builder [["X", [1, 2, 3]]]
column_op = Column_Operation.Power 2 (Column_Ref.Name "X")
t2 = t.set column_op . set column_op
t2.column_names . should_equal ["X", "[2] ^ [X]", "[2] ^ [X] 1"]
t2.at "X" . to_vector . should_equal [1, 2, 3]
t2.at "[2] ^ [X]" . to_vector . should_equal [2, 4, 8]
t2.at "[2] ^ [X] 1" . to_vector . should_equal [2, 4, 8]
Test.specify "Should disambiguate two derived columns that would otherwise have had the same name, within the same expression" <|
t = table_builder [["X", [1, 2, 3]]]
expression = "2 + (2 * 2) + (2 ^ [X])"
t2 = t.set expression
t2.column_names . should_equal ["X", expression]
t2.at "X" . to_vector . should_equal [1, 2, 3]
t2.at expression . to_vector . should_equal [8, 10, 14]
Test.specify "Should use .pretty to distinguish string constants from regular column names" <|
t = table_builder [["X", ["a", "b", "c"]]]
expression = '"foo" + [X] + "bar"'
t2 = t.set expression
t2.column_names . should_equal ["X", expression]
t2.at "X" . to_vector . should_equal ["a", "b", "c"]
t2.at expression . to_vector . should_equal ["fooabar", "foobbar", "foocbar"]
Test.specify "Should disambiguate between a column reference and a literal string" <|
t = table_builder [["X", ["a", "b", "c"]]]
t2 = t.set (Column_Operation.Add "prefix" (Column_Ref.Name "X"))
t3 = t2.set (Column_Operation.Add "prefix" "X")
t3.column_names . should_equal ["X", "['prefix'] + [X]", "['prefix'] + 'X'"]
t3.at "['prefix'] + [X]" . to_vector . should_equal ["prefixa", "prefixb", "prefixc"]
t3.at "['prefix'] + 'X'" . to_vector . should_equal ["prefixX", "prefixX", "prefixX"]