Allow the creation of a constant column on an in-memory table with no rows. (#8218)

This commit is contained in:
GregoryTravis 2023-11-09 09:40:51 -05:00 committed by GitHub
parent ce1ef7df03
commit ea3d778456
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 93 additions and 47 deletions

View File

@ -855,15 +855,12 @@ type Table
_ : Date_Range -> Error.throw (Unsupported_Database_Operation.Error "Cannot use `Date_Range` for `set` in the database.")
_ -> Error.throw (Illegal_Argument.Error "Unsupported type for `Table.set`.")
name_is_generated column = case column of
_ : Column -> False
_ -> True
## If `new_name` was specified, use that. Otherwise, if `column` is a
`Column`, use its name. In these two cases, do not make it unique.
Otherwise, make it unique.
Otherwise, make it unique. If set_mode is Update, however, do not
make it unique.
new_column_name = if new_name != "" then new_name else
if name_is_generated column then unique.make_unique resolved.name else resolved.name
if column.is_a Column || set_mode==Set_Mode.Update || set_mode==Set_Mode.Add_Or_Update then resolved.name else unique.make_unique resolved.name
renamed = resolved.rename new_column_name
renamed.if_not_error <| self.column_naming_helper.check_ambiguity self.column_names renamed.name <|
index = self.internal_columns.index_of (c -> c.name == renamed.name)

View File

@ -103,17 +103,17 @@ type Column
## PRIVATE
ADVANCED
Creates a new column given a name and a vector of elements repeated over and over.
Creates a new column given a name and an element repeated over and over.
Arguments:
- name: The name of the column to create.
- items: The elements to contain in the column.
- repeats: The number of times to repeat the vector.
from_vector_repeated : Text -> Vector -> Integer -> Column
from_vector_repeated name items repeats =
- item: The element to repeat in the column.
- repeats: The number of times to repeat the element.
from_repeated_item : Text -> Any -> Integer -> Column
from_repeated_item name item repeats =
Invalid_Column_Names.handle_java_exception <| Illegal_Argument.handle_java_exception <|
java_column = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
Java_Column.fromRepeatedItems name items repeats java_problem_aggregator
Java_Column.fromRepeatedItem name item repeats java_problem_aggregator
Column.Value java_column
## PRIVATE

View File

@ -1582,15 +1582,12 @@ type Table
_ : Column_Operation -> column.evaluate self (set_mode==Set_Mode.Update && new_name=="") on_problems
_ -> Error.throw (Illegal_Argument.Error "Unsupported type for `Table.set`.")
name_is_generated column = case column of
_ : Column -> False
_ -> True
## If `new_name` was specified, use that. Otherwise, if `column` is a
`Column`, use its name. In these two cases, do not make it unique.
Otherwise, make it unique.
Otherwise, make it unique. If set_mode is Update, however, do not
make it unique.
new_column_name = if new_name != "" then new_name else
if name_is_generated column then unique.make_unique resolved.name else resolved.name
if column.is_a Column || set_mode==Set_Mode.Update || set_mode==Set_Mode.Add_Or_Update then resolved.name else unique.make_unique resolved.name
renamed = resolved.rename new_column_name
renamed.if_not_error <| self.column_naming_helper.check_ambiguity self.column_names renamed.name <|
check_add_mode = case set_mode of
@ -1644,7 +1641,7 @@ type Table
make_constant_column : Any -> Column
make_constant_column self value =
if Table_Helpers.is_column value then Error.throw (Illegal_Argument.Error "A constant value may only be created from a scalar, not a Column") else
Column.from_vector_repeated value.pretty [value] self.row_count
Column.from_repeated_item value.pretty value self.row_count
## PRIVATE
Create a unique temporary column name.

View File

@ -3,6 +3,7 @@ package org.enso.table.data.table;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.InferredBuilder;
import org.enso.table.data.column.builder.MixedBuilder;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.StorageType;
@ -161,37 +162,31 @@ public class Column {
}
/**
* Creates a new column with given name and elements.
* Creates a new column with given name and an element to repeat.
*
* @param name the name to use
* @param items the items contained in the column
* @param items the item repeated in the column
* @return a column with given name and items
*/
public static Column fromRepeatedItems(String name, List<Value> items, int repeat, ProblemAggregator problemAggregator) {
if (repeat < 1) {
throw new IllegalArgumentException("Repeat count must be positive.");
public static Column fromRepeatedItem(String name, Value item, int repeat, ProblemAggregator problemAggregator) {
if (repeat < 0) {
throw new IllegalArgumentException("Repeat count must be non-negative.");
}
if (repeat == 1) {
return fromItems(name, items, null, problemAggregator);
Object converted = Polyglot_Utils.convertPolyglotValue(item);
Builder builder;
if (converted == null) {
builder = new MixedBuilder(repeat);
} else {
StorageType storageType = StorageType.forBoxedItem(converted);
builder = Builder.getForType(storageType, repeat, problemAggregator);
}
Context context = Context.getCurrent();
var totalSize = items.size() * repeat;
var values = new ArrayList<Object>(items.size());
// ToDo: This a workaround for an issue with polyglot layer. #5590 is related.
// to revert replace with: for (Value item : items) {
for (Object item : items) {
Object converted = item instanceof Value v ? Polyglot_Utils.convertPolyglotValue(v) : item;
values.add(converted);
context.safepoint();
}
var builder = new InferredBuilder(totalSize, problemAggregator);
for (int i = 0; i < totalSize; i++) {
var item = values.get(i % items.size());
builder.appendNoGrow(item);
for (int i = 0; i < repeat; i++) {
builder.appendNoGrow(converted);
context.safepoint();
}

View File

@ -1264,6 +1264,39 @@ spec setup =
t = table_builder [["x", ["1", "2", "3"]]]
t.at "x" . const (t.at "x") . should_fail_with Illegal_Argument
Test.group prefix+"Table.make_constant_column" <|
Test.specify "Should allow the creation of constant columns" <|
t = table_builder [["x", ["1", "2", "3"]]]
t.make_constant_column True . to_vector . should_equal [True, True, True]
t.make_constant_column 12 . to_vector . should_equal [12, 12, 12]
t.make_constant_column 12.3 . to_vector . should_equal [12.3, 12.3, 12.3]
t.make_constant_column "asdf" . to_vector . should_equal ["asdf", "asdf", "asdf"]
Test.specify "Should allow the creation of constant columns on a table with no rows" <|
t = table_builder [["x", ["1", "2", "3"]]]
empty = t.take 0
constant = empty.make_constant_column 42
empty2 = empty.set constant
empty2.column_names.should_equal ['x', '42']
Test.specify "Should create a column of the correct type on a table with no rows" <|
t = table_builder [["x", ["1", "2", "3"]]]
empty = t.take 0
[[False, .is_boolean], [42, .is_integer], ["42", .is_text], ["foo", .is_text], [1.1, .is_floating_point]].map pair->
value = pair.at 0
pred = pair.at 1
c = empty.make_constant_column value
pred c.value_type . should_be_true
pred ((empty.set c).at c.name . value_type) . should_be_true
nulls_db_pending = if setup.is_database then "Empty NULL columns are unsupported in the database backends"
Test.specify "Should create a column of the correct type on a table with no rows" pending=nulls_db_pending <|
t = table_builder [["x", ["1", "2", "3"]]]
empty = t.take 0
c = empty.make_constant_column Nothing
c.value_type . should_equal Value_Type.Mixed
(empty.set c).at c.name . value_type . should_equal Value_Type.Mixed
# A dummy value used to force the in-memory backend to trigger a infer a mixed type for the given column.
type Mixed_Type_Object
Value

View File

@ -198,11 +198,19 @@ spec setup =
t.set (Column_Operation.Not (Column_Ref.Name "zzz")) . should_fail_with No_Such_Column
t.set (Column_Operation.Not (Column_Ref.Index 42)) . should_fail_with Index_Out_Of_Bounds
Test.group "Unique derived column names" <|
Test.specify "Should disambiguate two derived columns that would otherwise have had the same name" <|
Test.group prefix+"Unique derived column names" <|
Test.specify "Should not disambiguate two derived columns that would otherwise have had the same name, with Set_Mode.Add_Or_Update" <|
t = table_builder [["X", [1, 2, 3]]]
column_op = Column_Operation.Power 2 (Column_Ref.Name "X")
t2 = t.set column_op . set column_op
t2.column_names . should_equal ["X", "[2] ^ [X]"]
t2.at "X" . to_vector . should_equal [1, 2, 3]
t2.at "[2] ^ [X]" . to_vector . should_equal [2, 4, 8]
Test.specify "Should disambiguate two derived columns that would otherwise have had the same name, with Set_Mode.Add" <|
t = table_builder [["X", [1, 2, 3]]]
column_op = Column_Operation.Power 2 (Column_Ref.Name "X")
t2 = t.set column_op set_mode=Set_Mode.Add . set column_op set_mode=Set_Mode.Add
t2.column_names . should_equal ["X", "[2] ^ [X]", "[2] ^ [X] 1"]
t2.at "X" . to_vector . should_equal [1, 2, 3]
t2.at "[2] ^ [X]" . to_vector . should_equal [2, 4, 8]
@ -229,6 +237,26 @@ spec setup =
t2 = t.set (Column_Operation.Add "prefix" (Column_Ref.Name "X"))
t3 = t2.set (Column_Operation.Add "prefix" "X")
t3.column_names . should_equal ["X", "['prefix'] + [X]", "['prefix'] + 'X'"]
t3.column_names . should_equal ['X', "['prefix'] + [X]", "['prefix'] + 'X'"]
t3.at "['prefix'] + [X]" . to_vector . should_equal ["prefixa", "prefixb", "prefixc"]
t3.at "['prefix'] + 'X'" . to_vector . should_equal ["prefixX", "prefixX", "prefixX"]
Test.specify "Should not disambiguate if set_mode is Update" <|
t = table_builder [["X", [1, 2, 3]]]
t2 = t.set (Column_Operation.Add (Column_Ref.Name "X") 1) set_mode=Set_Mode.Update
t2.column_names . should_equal ["X"]
t2.at "X" . to_vector . should_equal [2, 3, 4]
Test.specify "Should not disambiguate if set_mode is Add_Or_Update" <|
t = table_builder [["X", [1, 2, 3]], ["[X] + 1", [10, 20, 30]]]
# set_mode=Set_Mode.Add_Or_Update is the default
t2 = t.set (Column_Operation.Add (Column_Ref.Name "X") 1)
t2.column_names . should_equal ["X", "[X] + 1"]
t2.at "X" . to_vector . should_equal [1, 2, 3]
t2.at "[X] + 1" . to_vector . should_equal [2, 3, 4]
Test.specify "Should not disambiguate if the new name is explicitly set" <|
t = table_builder [["X", [1, 2, 3]]]
t2 = t.set (Column_Operation.Add (Column_Ref.Name "X") 1) new_name="X"
t2.column_names . should_equal ["X"]
t2.at "X" . to_vector . should_equal [2, 3, 4]

View File

@ -78,10 +78,6 @@ spec =
Problems.assume_no_problems c6
c6.to_vector.map .to_text . should_equal ([x+1, 1.0, x+2, "a", x+3].map .to_text)
# It should also work for from_vector_repeated:
c7 = Column.from_vector_repeated "X" [1.5, x+2, 100] 10
Problems.expect_only_warning Loss_Of_Integer_Precision c7
Test.specify "should not be reported when numeric operation mixes Float and Integer" <|
t = table_builder [["A", [y]], ["B", [1.0]]]
a = t.at "A"