Fix cross_tab column naming edge cases, add fill_empty (#5863)

Closes #5151 and adds some additional tests for `cross_tab` that verify duplicated and invalid names.

I decided that for empty or `Nothing` names, instead of replacing them with `Column` and implicitly losing connection with the value that was in the column, we should just error on such values.

To make handling of these easier, `fill_empty` was added allowing to easily replace the empty values with something else.

Also, `{is,fill}_missing` was renamed to `{is,fill}_nothing` to align with `Filter_Condition.Is_Nothing`.
This commit is contained in:
Radosław Waśko 2023-03-11 12:58:54 +01:00 committed by GitHub
parent 263c3ad651
commit 952beba8d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 209 additions and 100 deletions

View File

@ -336,6 +336,8 @@
- [Remove many regex compile flags; separated `match` into `match` and
`match_all`.][5785]
- [Aligned names of columns created by column operations.][5850]
- [Improved `cross_tab`. Renamed `fill_missing` and `is_missing` to
`fill_nothing` and `is_nothing`. Added `fill_empty`.][5863]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -517,6 +519,7 @@
[5785]: https://github.com/enso-org/enso/pull/5785
[5802]: https://github.com/enso-org/enso/pull/5802
[5850]: https://github.com/enso-org/enso/pull/5850
[5863]: https://github.com/enso-org/enso/pull/5863
#### Enso Compiler

View File

@ -633,8 +633,8 @@ type Column
Returns a column of booleans, with `True` items at the positions where
this column contains a `Nothing`.
is_missing : Column
is_missing self =
is_nothing : Column
is_nothing self =
new_name = self.naming_helpers.to_expression_text self + " is null"
self.make_unary_op "IS_NULL" new_name new_type=SQL_Type.boolean
@ -666,7 +666,7 @@ type Column
is_present : Column
is_present self =
new_name = self.naming_helpers.function_name "is_present" [self]
self.is_missing.not . rename new_name
self.is_nothing.not . rename new_name
## PRIVATE
Returns a column of booleans with `True` at the positions where this
@ -683,7 +683,7 @@ type Column
new_name = self.naming_helpers.function_name "is_blank" [self]
is_blank = case self.sql_type.is_definitely_text of
True -> self.is_empty
False -> self.is_missing
False -> self.is_nothing
result = case treat_nans_as_blank && self.sql_type.is_definitely_double of
True -> is_blank || self.is_nan
False -> is_blank
@ -693,11 +693,26 @@ type Column
Returns a new column where missing values have been replaced with the
provided default.
fill_missing : Any -> Column
fill_missing self default =
new_name = self.naming_helpers.function_name "fill_missing" [self, default]
fill_nothing : Any -> Column
fill_nothing self default =
new_name = self.naming_helpers.function_name "fill_nothing" [self, default]
self.make_binary_op "FILL_NULL" default new_name
## ALIAS Fill Empty
Returns a new column where empty Text values have been replaced with the
provided default.
Arguments:
- default: The value to replace missing values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
fill_empty : Column | Any -> Column
fill_empty self default =
new_name = self.naming_helpers.function_name "fill_empty" [self, default]
result = self.is_empty.iif default self
result.rename new_name
## Returns a new column, containing the same elements as `self`, but with
the given name.
@ -875,7 +890,7 @@ type Column
expected.
is_in_not_null = self.make_op "IS_IN" operands=non_nulls new_name=new_name new_type=SQL_Type.boolean
result = case nulls.not_empty of
True -> is_in_not_null || self.is_missing
True -> is_in_not_null || self.is_nothing
False -> is_in_not_null
result.rename new_name
_ : Array -> self.is_in (Vector.from_polyglot_array vector)
@ -889,7 +904,7 @@ type Column
our columns too. That is because, we want the containment check
for `NULL` to work the same way as for any other value.
in_subquery = Query.Select [Pair.new column.name column.expression] column.context
has_nulls_expression = SQL_Expression.Operation "BOOL_OR" [column.is_missing.expression]
has_nulls_expression = SQL_Expression.Operation "BOOL_OR" [column.is_nothing.expression]
has_nulls_subquery = Query.Select [Pair.new "has_nulls" has_nulls_expression] column.context
new_expr = SQL_Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery, has_nulls_subquery]
Column.Value new_name self.connection SQL_Type.boolean new_expr self.context

View File

@ -36,4 +36,4 @@
import Standard.Examples
example_fill_missing = Examples.decimal_column.fill_missing 20.5
example_fill_missing = Examples.decimal_column.fill_nothing 20.5

View File

@ -693,11 +693,11 @@ type Column
import Standard.Examples
example_is_missing = Examples.decimal_column.is_missing
is_missing : Column
is_missing self =
example_is_missing = Examples.decimal_column.is_nothing
is_nothing : Column
is_nothing self =
new_name = Naming_Helpers.to_expression_text self + " is null"
run_vectorized_unary_op self "is_missing" (== Nothing) new_name on_missing=True
run_vectorized_unary_op self "is_nothing" (== Nothing) new_name on_missing=True
## UNSTABLE
Returns a column of booleans, with `True` items at the positions where
@ -730,7 +730,7 @@ type Column
is_present : Column
is_present self =
new_name = Naming_Helpers.function_name "is_present" [self]
self.is_missing.not.rename new_name
self.is_nothing.not.rename new_name
## PRIVATE
Returns a column of booleans with `True` at the positions where this
@ -747,9 +747,9 @@ type Column
new_name = Naming_Helpers.function_name "is_blank" [self]
result = case self.storage_type of
Storage.Text -> self.is_empty
Storage.Decimal -> if treat_nans_as_blank then self.is_missing || self.is_nan else self.is_missing
Storage.Decimal -> if treat_nans_as_blank then self.is_nothing || self.is_nan else self.is_nothing
Storage.Any -> if treat_nans_as_blank then self.is_empty || self.is_nan else self.is_empty
_ -> self.is_missing
_ -> self.is_nothing
result.rename new_name
## ALIAS Fill Missing
@ -767,10 +767,10 @@ type Column
import Standard.Examples
example_fill_missing = Examples.decimal_column.fill_missing 20.5
fill_missing : Column | Any -> Column
fill_missing self default =
new_name = Naming_Helpers.function_name "fill_missing" [self, default]
example_fill_missing = Examples.decimal_column.fill_nothing 20.5
fill_nothing : Column | Any -> Column
fill_nothing self default =
new_name = Naming_Helpers.function_name "fill_nothing" [self, default]
storage = self.java_column.getStorage
new_st = case default of
Column.Value java_col ->
@ -781,6 +781,21 @@ type Column
col = Java_Column.new new_name new_st
Column.Value col
## ALIAS Fill Empty
Returns a new column where empty Text values have been replaced with the
provided default.
Arguments:
- default: The value to replace missing values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
fill_empty : Column | Any -> Column
fill_empty self default =
new_name = Naming_Helpers.function_name "fill_empty" [self, default]
result = self.is_empty.iif default self
result.rename new_name
## Checks for each element of the column if it starts with `other`.
Arguments:

View File

@ -27,7 +27,9 @@ type Row
get : (Integer | Text) -> Any -> Any
get self column ~if_missing=Nothing =
table_column = self.table.get column
if table_column.is_nothing then if_missing else table_column.at self.index
case table_column of
Nothing -> if_missing
_ -> table_column.at self.index
## Gets the row as a Vector.
to_vector : Vector

View File

@ -424,7 +424,7 @@ type Row_Count_Mismatch
"The number of rows in the left table ("+self.left_rows.to_text+") does not match the number of rows in the right table ("+self.right_rows.to_text+")."
type Invalid_Aggregate_Column
## Indicates that a provided name is not found within available column not
## Indicates that a provided name is not found within available columns nor
represents a valid expression.
Error (name : Text) (expression_error : Expression_Error | No_Such_Column | Nothing)

View File

@ -26,8 +26,8 @@ make_filter_column source_column filter_condition on_problems = case filter_cond
on_problems.escalate_warnings <|
source_column != value
# Nothing
Is_Nothing -> source_column.is_missing
Not_Nothing -> source_column.is_missing.not
Is_Nothing -> source_column.is_nothing
Not_Nothing -> source_column.is_nothing.not
# Boolean
Is_True ->
Value_Type.expect_boolean source_column.value_type <| source_column

View File

@ -36,6 +36,9 @@ type Join_Condition_Resolver
resolve_left = resolve_selector self.left_at
resolve_right = resolve_selector self.right_at
is_nothing column = case column of
Nothing -> True
_ -> False
conditions_vector = case conditions of
_ : Vector -> conditions
single_condition : Join_Condition -> [single_condition]
@ -43,7 +46,7 @@ type Join_Condition_Resolver
handle_equals left_selector right_selector =
left = resolve_left left_selector
right = resolve_right right_selector
if left.is_nothing || right.is_nothing then Nothing else
if is_nothing left || is_nothing right then Nothing else
if left.name == right.name then
redundant_names.append right.name
self.make_equals problem_builder left right
@ -54,7 +57,7 @@ type Join_Condition_Resolver
Join_Condition.Equals_Ignore_Case left_selector right_selector locale ->
left = resolve_left left_selector
right = resolve_right right_selector
if left.is_nothing || right.is_nothing then Nothing else
if is_nothing left || is_nothing right then Nothing else
Value_Type.expect_text left.value_type <|
Value_Type.expect_text right.value_type <|
self.make_equals_ignore_case problem_builder left right locale
@ -62,7 +65,7 @@ type Join_Condition_Resolver
left = resolve_left left_selector
right_lower = resolve_right right_lower_selector
right_upper = resolve_right right_upper_selector
if left.is_nothing || right_lower.is_nothing || right_upper.is_nothing then Nothing else
if is_nothing left || is_nothing right_lower || is_nothing right_upper then Nothing else
self.make_between problem_builder left right_lower right_upper
problem_builder.attach_problems_before on_problems <|
if converted.contains Nothing then Panic.throw (Illegal_State.Error "Impossible: unresolved columns remaining in the join resolution. This should have raised a dataflow error. This is a bug in the Table library.") else

View File

@ -393,12 +393,17 @@ unify_result_type_for_union column_set all_tables allow_type_widening problem_bu
problem_builder.report_other_warning (No_Common_Type.Error column_set.name)
common_type
False ->
first_column = columns.find (c-> c.is_nothing.not)
is_not_nothing c = case c of
Nothing -> False
_ -> True
first_column = columns.find is_not_nothing
first_type = first_column.value_type
if first_type == Value_Type.Mixed then Value_Type.Mixed else
first_wrong_column = columns.find if_missing=Nothing col->
col.is_nothing.not && col.value_type != first_type
if first_wrong_column.is_nothing then first_type else
is_not_nothing col && col.value_type != first_type
case first_wrong_column of
Nothing -> first_type
_ ->
got_type = first_wrong_column.value_type
problem_builder.report_other_warning (Column_Type_Mismatch.Error column_set.name first_type got_type)
Nothing

View File

@ -103,7 +103,10 @@ type No_Fallback_Column
Table.point_data : Table -> Vector
Table.point_data self =
get_point_data field = field.lookup_in self . rename field.name . catch Any (_->Nothing)
columns = Point_Data.all_fields.map get_point_data . filter (x -> x.is_nothing.not)
is_not_nothing x = case x of
Nothing -> False
_ -> True
columns = Point_Data.all_fields.map get_point_data . filter is_not_nothing
(0.up_to self.row_count).to_vector.map <| row_n->
pairs = columns.map column->
value = column.at row_n . catch_ Nothing

View File

@ -336,7 +336,7 @@ public final class BoolStorage extends Storage<Boolean> {
}
})
.add(
new UnaryMapOperation<>(Maps.IS_MISSING) {
new UnaryMapOperation<>(Maps.IS_NOTHING) {
@Override
public BoolStorage run(BoolStorage storage) {
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);

View File

@ -301,7 +301,7 @@ public final class DoubleStorage extends NumericStorage<Double> {
}
})
.add(
new UnaryMapOperation<>(Maps.IS_MISSING) {
new UnaryMapOperation<>(Maps.IS_NOTHING) {
@Override
public BoolStorage run(DoubleStorage storage) {
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);

View File

@ -398,7 +398,7 @@ public final class LongStorage extends NumericStorage<Long> {
}
})
.add(
new UnaryMapOperation<>(Maps.IS_MISSING) {
new UnaryMapOperation<>(Maps.IS_NOTHING) {
@Override
public BoolStorage run(LongStorage storage) {
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);

View File

@ -42,7 +42,7 @@ public final class ObjectStorage extends SpecializedStorage<Object> {
static <T, S extends SpecializedStorage<T>> MapOpStorage<T, S> buildObjectOps() {
MapOpStorage<T, S> ops = new MapOpStorage<>();
ops.add(
new UnaryMapOperation<>(Maps.IS_MISSING) {
new UnaryMapOperation<>(Maps.IS_NOTHING) {
@Override
protected BoolStorage run(S storage) {
BitSet r = new BitSet();

View File

@ -75,7 +75,7 @@ public abstract class Storage<T> {
public static final String NOT = "not";
public static final String AND = "&&";
public static final String OR = "||";
public static final String IS_MISSING = "is_missing";
public static final String IS_NOTHING = "is_nothing";
public static final String IS_NAN = "is_nan";
public static final String IS_EMPTY = "is_empty";
public static final String STARTS_WITH = "starts_with";

View File

@ -9,6 +9,7 @@ import org.enso.table.data.table.Table;
import org.enso.table.data.table.problems.FloatingPointGrouping;
import org.enso.table.problems.AggregatedProblems;
import org.enso.table.util.ConstantList;
import org.enso.table.util.NameDeduplicator;
import java.util.*;
import java.util.function.IntFunction;
@ -122,6 +123,8 @@ public class MultiValueIndex<KeyType extends MultiValueKeyBase> {
Column nameColumn,
Aggregator[] aggregates,
String[] aggregateNames) {
NameDeduplicator outputTableNameDeduplicator = new NameDeduplicator();
final int size = locs.size();
var nameIndex =
@ -133,17 +136,16 @@ public class MultiValueIndex<KeyType extends MultiValueKeyBase> {
// Create the storage
Builder[] storage = new Builder[columnCount];
IntStream.range(0, groupingColumns.length)
.forEach(
i -> storage[i] = Builder.getForType(groupingColumns[i].getStorage().getType(), size));
IntStream.range(0, nameIndex.locs.size())
.forEach(
i -> {
for (int i = 0; i < groupingColumns.length; i++) {
storage[i] = Builder.getForType(groupingColumns[i].getStorage().getType(), size);
}
for (int i = 0; i < nameIndex.locs.size(); i++) {
int offset = groupingColumns.length + i * aggregates.length;
IntStream.range(0, aggregates.length)
.forEach(
j -> storage[offset + j] = Builder.getForType(aggregates[j].getType(), size));
});
for (int j = 0; j < aggregates.length; j++) {
storage[offset + j] = Builder.getForType(aggregates[j].getType(), size);
}
}
// Fill the storage
for (List<Integer> group_locs : this.locs.values()) {
@ -170,23 +172,25 @@ public class MultiValueIndex<KeyType extends MultiValueKeyBase> {
}
}
// Merge Problems
AggregatedProblems[] problems = new AggregatedProblems[aggregates.length + 1];
problems[0] = this.problems;
IntStream.range(0, aggregates.length)
.forEach(i -> problems[i + 1] = aggregates[i].getProblems());
AggregatedProblems merged = AggregatedProblems.merge(problems);
// Create Columns
Column[] output = new Column[columnCount];
IntStream.range(0, groupingColumns.length)
.forEach(i -> output[i] = new Column(groupingColumns[i].getName(), storage[i].seal()));
for (int i = 0; i < groupingColumns.length; i++) {
outputTableNameDeduplicator.markUsed(groupingColumns[i].getName());
output[i] = new Column(groupingColumns[i].getName(), storage[i].seal());
}
int offset = groupingColumns.length;
for (List<Integer> name_locs : nameIndex.locs.values()) {
// ToDo: Use the NameDeduplicator here.
Object boxed = nameColumn.getStorage().getItemBoxed(name_locs.get(0));
String name = boxed == null ? "" : boxed.toString();
String name;
if (boxed == null) {
throw Column.raiseNothingName();
} else {
name = boxed.toString();
// We want to fail hard on invalid colum names stemming from invalid input values and make
// the user fix the data before cross_tab, to avoid data corruption.
Column.ensureNameIsValid(name);
}
for (int i = 0; i < aggregates.length; i++) {
String effectiveName;
@ -198,12 +202,27 @@ public class MultiValueIndex<KeyType extends MultiValueKeyBase> {
effectiveName = name + " " + aggregateNames[i];
}
// Check again to ensure that the appended aggregate name does not invalidate the name.
// We do not check aggregateName itself before, because it _is_ allowed for it to be empty -
// meaning just key names will be used and that is fine.
Column.ensureNameIsValid(effectiveName);
effectiveName = outputTableNameDeduplicator.makeUnique(effectiveName);
output[offset + i] = new Column(effectiveName, storage[offset + i].seal());
}
offset += aggregates.length;
}
// Merge Problems
AggregatedProblems[] problems = new AggregatedProblems[aggregates.length + 2];
problems[0] = this.problems;
problems[1] = AggregatedProblems.of(outputTableNameDeduplicator.getProblems());
for (int i = 0; i < aggregates.length; i++) {
problems[i + 2] = aggregates[i].getProblems();
}
AggregatedProblems merged = AggregatedProblems.merge(problems);
return new Table(output, merged);
}

View File

@ -33,9 +33,13 @@ public class Column {
this.storage = storage;
}
public static IllegalArgumentException raiseNothingName() throws IllegalArgumentException {
throw new IllegalArgumentException("Column name cannot be Nothing.");
}
public static void ensureNameIsValid(String name) {
if (name == null) {
throw new IllegalArgumentException("Column name cannot be Nothing.");
raiseNothingName();
}
if (name.isEmpty()) {
throw new IllegalArgumentException("Column name cannot be empty.");

View File

@ -190,7 +190,7 @@ public class ExpressionVisitorImpl extends ExpressionBaseVisitor<Value> {
@Override
public Value visitIsNull(ExpressionParser.IsNullContext ctx) {
var op = ctx.IS_NULL() != null || ctx.IS_NOT_NULL() != null ? "is_missing" : "is_empty";
var op = ctx.IS_NULL() != null || ctx.IS_NOT_NULL() != null ? "is_nothing" : "is_empty";
var condition = executeMethod(op, visit(ctx.expr()));
return ctx.IS_NOT_NULL() != null || ctx.IS_NOT_EMPTY() != null
? executeMethod("not", condition)

View File

@ -33,8 +33,8 @@ spec setup =
(x == Nothing).to_vector . should_equal [Nothing, Nothing, Nothing, Nothing]
Test.specify "should allow to check which values are null"
x.is_missing.to_vector . should_equal [False, False, False, True]
(x + Nothing).is_missing.to_vector . should_equal [True, True, True, True]
x.is_nothing.to_vector . should_equal [False, False, False, True]
(x + Nothing).is_nothing.to_vector . should_equal [True, True, True, True]
Test.specify "Column equality should handle nulls correctly" pending="TODO" <|
a = [2, 3, Nothing, Nothing]
@ -69,6 +69,15 @@ spec setup =
((t.at "A") == (t.at "B")) . to_vector . should_equal r_sensitive
((t.at "A").equals_ignore_case (t.at "B")) . to_vector . should_equal r_insensitive
Test.specify "should allow to fill empty/nothing values" <|
t = table_builder [["X", ["a", "", " ", Nothing, "b"]]]
c1 = t.at "X" . fill_nothing "NA"
c1.to_vector . should_equal ["a", "", " ", "NA", "b"]
c2 = t.at "X" . fill_empty "<empty>"
c2.to_vector . should_equal ["a", "<empty>", " ", "<empty>", "b"]
Test.specify "should report a warning if checking equality on floating point columns" <|
t = table_builder [["X", [1.0, 2.1, 3.2]], ["Y", [1.0, 2.0, 3.2]]]
@ -245,13 +254,14 @@ spec setup =
t.at "b" . like "%abc%" . name . should_equal "[b] like '%abc%'"
t.at "b" . ends_with "abc" . name . should_equal "ends_with([b], 'abc')"
t.at "b" . is_empty . name . should_equal "[b] is empty"
t.at "b" . fill_empty "<empty>" . name . should_equal "fill_empty([b], '<empty>')"
Test.specify "nulls" <|
t.at "a" . coalesce [Nothing, 42] . name . should_equal "coalesce([a], Nothing, 42)"
t.at "a" . is_missing . name . should_equal "[a] is null"
t.at "a" . is_nothing . name . should_equal "[a] is null"
t.at "a" . is_present . name . should_equal "is_present([a])"
t.at "a" . is_blank . name . should_equal "is_blank([a])"
t.at "a" . fill_missing 100 . name . should_equal "fill_missing([a], 100)"
t.at "a" . fill_nothing 100 . name . should_equal "fill_nothing([a], 100)"
Test.specify "misc"
t.at "a" . min [1, 2] . name . should_equal "min([a], 1, 2)"

View File

@ -90,6 +90,9 @@ spec setup =
t1.at "y" . to_vector . should_equal [2, 1]
t1.at "z" . to_vector . should_equal [1, 1]
t2 = table2.cross_tab ["Group", "Group"] "Key"
t2.column_names . should_equal ["Group", "x", "y", "z"]
Test.specify "should allow multiple values aggregates" <|
t1 = table.cross_tab values=[Count, Sum "Value"]
t1.column_names . should_equal ["x Count", "x Sum Value", "y Count", "y Sum Value", "z Count", "z Sum Value"]
@ -128,28 +131,35 @@ spec setup =
err2.should_fail_with Column_Indexes_Out_Of_Range
err2.catch.indexes . should_equal [42]
Test.specify "should fail if aggregate values contain invalid expressions" pending="TODO?" <|
Test.specify "should fail if aggregate values contain invalid expressions" <|
err1 = table.cross_tab values=[Sum "[MISSING]*10"]
err1.should_fail_with Invalid_Aggregate_Column
err1.catch.name . should_equal "[MISSING]*10"
err1.catch.expression_error . should_equal (No_Such_Column.Error "MISSING")
err2 = table.cross_tab values=[Sum "[[[["]
err2 = table.cross_tab values=[Sum "[[["]
err2.should_fail_with Invalid_Aggregate_Column
err2.catch.name . should_equal "[[[["
err1.catch.expression_error . should_be_a Expression_Error.Syntax_Error
err2.catch.name . should_equal "[[["
err2.catch.expression_error . should_be_a Expression_Error.Syntax_Error
Test.specify "should not allow Group_By for values" <|
err1 = table.cross_tab [] "Key" values=[Count, Group_By "Value"] on_problems=Problem_Behavior.Ignore
err1.should_fail_with Illegal_Argument
Test.specify "should gracefully handle duplicate aggregate names" pending="TODO: this should be fixed as part of https://github.com/enso-org/enso/issues/5151" <|
action = table.cross_tab [] "Key" values=[Count new_name="Agg1", Sum "Value" new_name="Agg1"]
Test.specify "should gracefully handle duplicate aggregate names" <|
action = table.cross_tab [] "Key" values=[Count new_name="Agg1", Sum "Value" new_name="Agg1"] on_problems=_
tester table =
table.column_names . should_equal ["x Agg1", "x Agg1_1", "y Agg1", "y Agg1_1", "z Agg1", "z Agg1_1"]
problems = [Duplicate_Output_Column_Names.Error ["Agg1"]]
problems = [Duplicate_Output_Column_Names.Error ["x Agg1", "y Agg1", "z Agg1"]]
Problems.test_problem_handling action problems tester
table3 = table2.rename_columns (Map.from_vector [["Group", "x"]])
action3 = table3.cross_tab ["x"] "Key" on_problems=_
tester3 table =
table.column_names . should_equal ["x", "x_1", "y", "z"]
problems3 = [Duplicate_Output_Column_Names.Error ["x"]]
Problems.test_problem_handling action3 problems3 tester3
Test.specify "should fail on invalid aggregations" <|
table = table_builder [["Key", ["x", "x", "x", "x", "y", "y", "y", "z", "z"]], ["TextValue", ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
[Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <|
@ -170,25 +180,45 @@ spec setup =
err.should_fail_with Invalid_Aggregation
err.catch . should_equal (Invalid_Aggregation.Error "Sum TextValue" [0, 4, 7] "Cannot convert to a number.")
Test.specify "should correctly handle uncommon blank fields" pending="TODO: this should be fixed as part of https://github.com/enso-org/enso/issues/5151" <|
table = table_builder [["Key", [" ", "x", "x", "x", "", "", "", Nothing, Nothing]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
Test.specify "should allow non-Text columns to be used as name" <|
table = table_builder [["Key", [1, 1, 1, 2, 2, 1, 3, 3, 1]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
t1 = table.cross_tab
# TODO
t1.column_names . should_equal ["x", "Column_1", "Column_2"]
t1.column_names . should_equal ["1", "2", "3"]
t1.row_count . should_equal 1
t1.at "1" . to_vector . should_equal [5]
t1.at "2" . to_vector . should_equal [2]
t1.at "3" . to_vector . should_equal [2]
Test.specify "should correctly handle uncommon characters in fields becoming column names" <|
table = table_builder [["Key", ["💡🎉🌻", "ąęź", "ąęź", '\n\n', "😊", "😊", "🌻", "😊", "🌻"]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
t1 = table.cross_tab
t1.column_names . should_equal ["💡🎉🌻", "🌻", "😊", "ąęź", '\n\n']
table = table_builder [["Key", ["💡🎉🌻", "ąęź", "ąęź", '\n\n', "😊", "😊", "🌻", "😊", "🌻", " "]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]]
t1 = table.cross_tab . sort_columns
t1.column_names . should_equal ['\n\n', ' ', 'ąęź', '🌻', '💡🎉🌻', '😊']
t1.row_count . should_equal 1
t1.at "💡🎉🌻" . to_vector . should_equal [1]
t1.at "🌻" . to_vector . should_equal [2]
t1.at "😊" . to_vector . should_equal [3]
t1.at "ąęź" . to_vector . should_equal [2]
t1.at '\n\n' . to_vector . should_equal [1]
t1.at " " . to_vector . should_equal [1]
Test.specify "should fail gracefully if an effective column name would contain invalid characters" <|
table = table_builder [["Key", ['x', 'x', 'y\0', '\0', 'y\0', 'z', 'z', 'z', 'z']], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
r1 = table.cross_tab
r1.should_fail_with Illegal_Argument
r1.catch.to_display_text . should_contain "must not contain the NUL character"
r2 = table2.cross_tab [] "Key" values=[Average "Value" new_name='x\0']
r2.print
r2.should_fail_with Illegal_Argument
r2.catch.to_display_text . should_contain "must not contain the NUL character"
Test.specify "should fail gracefully if an effective column name would be empty or null" <|
table = table_builder [["Key", [" ", "x", "x", "x", "", "", "", "y", "y"]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
r1 = table.cross_tab
r1.should_fail_with Illegal_Argument
r1.catch.to_display_text . should_contain "cannot be empty"
table2 = table_builder [["Key", [" ", "x", "x", "x", Nothing, Nothing, Nothing, "y", "y"]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
r2 = table2.cross_tab
r2 . should_fail_with Illegal_Argument
r2.catch.to_display_text . should_contain "cannot be Nothing"

View File

@ -643,7 +643,7 @@ spec setup =
r2.at 3 . should_equal [3, 30, 7, 7, 200]
t4_3 = table_builder [["X", [Nothing, 2, 3]], ["Y", [10, 20, 30]]]
t4_4 = t4_3.set (t4_3.at "X" . fill_missing 7) new_name="C"
t4_4 = t4_3.set (t4_3.at "X" . fill_nothing 7) new_name="C"
t7 = t4_4.join t5 on=(Join_Condition.Equals "C" "X") join_kind=Join_Kind.Full
within_table t7 <|
expect_column_names ["X", "Y", "C", "Right_X", "Z"] t7

View File

@ -111,9 +111,9 @@ spec =
t5.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ((FALSE) OR ("T1"."A" IS NULL))', []]
Test.group "[Codegen] Handling Missing Values" <|
Test.specify "fill_missing should allow to replace missing values in a column with a constant" <|
c = t1.at "A" . fill_missing "not-applicable"
c.to_sql.prepare . should_equal ['SELECT COALESCE("T1"."A", ?) AS "fill_missing([A], \'not-applicable\')" FROM "T1" AS "T1"', [["not-applicable", int]]]
Test.specify "fill_nothing should allow to replace missing values in a column with a constant" <|
c = t1.at "A" . fill_nothing "not-applicable"
c.to_sql.prepare . should_equal ['SELECT COALESCE("T1"."A", ?) AS "fill_nothing([A], \'not-applicable\')" FROM "T1" AS "T1"', [["not-applicable", int]]]
Test.specify "filter_blank_rows should drop rows that contain at least one missing column in a Table" <|
t2 = t1.filter_blank_rows when_any=True

View File

@ -112,10 +112,10 @@ spec prefix connection =
Test.group prefix+"Missing Values" <|
t4 = upload "T4" <|
Table.new [["a", [0, 1, Nothing, 42, Nothing]], ["b", [True, Nothing, True, False, Nothing]], ["c", ["", "foo", "bar", Nothing, Nothing]]]
Test.specify "fill_missing should replace nulls" <|
t4.at 'a' . fill_missing 10 . to_vector . should_equal [0, 1, 10, 42, 10]
t4.at 'b' . fill_missing False . to_vector . should_equal [True, False, True, False, False]
t4.at 'c' . fill_missing "NA" . to_vector . should_equal ["", "foo", "bar", "NA", "NA"]
Test.specify "fill_nothing should replace nulls" <|
t4.at 'a' . fill_nothing 10 . to_vector . should_equal [0, 1, 10, 42, 10]
t4.at 'b' . fill_nothing False . to_vector . should_equal [True, False, True, False, False]
t4.at 'c' . fill_nothing "NA" . to_vector . should_equal ["", "foo", "bar", "NA", "NA"]
Test.specify "should correctly be counted" <|
t4.row_count . should_equal 5

View File

@ -75,7 +75,7 @@ spec = Test.group "Columns" <|
Test.specify "should allow to fill missing values from another column" <|
nulled = Column.from_vector "col" [0, Nothing, 4, 5, Nothing, Nothing]
defaults = Column.from_vector "def" [1, 2, 10, 20, Nothing, 30]
r = nulled.fill_missing defaults
r = nulled.fill_nothing defaults
r.to_vector . should_equal [0, 2, 4, 5, Nothing, 30]
Test.specify "should allow to count duplicate value occurences" <|

View File

@ -291,49 +291,49 @@ spec =
Test.group "Filling Missing Values" <|
Test.specify "should coerce non-coercible types to Object" <|
strs = Column.from_vector 'x' ["a", Nothing, "b", Nothing]
strs_filled = strs.fill_missing False
strs_filled = strs.fill_nothing False
strs_filled.to_vector . should_equal ["a", False, "b", False]
strs_filled.storage_type . should_equal Storage.Any
ints = Column.from_vector 'x' [1, Nothing, 2, Nothing]
ints_filled = ints.fill_missing "X"
ints_filled = ints.fill_nothing "X"
ints_filled.to_vector . should_equal [1, "X", 2, "X"]
ints_filled.storage_type . should_equal Storage.Any
bools = Column.from_vector 'x' [True, False, Nothing]
bools_filled = bools.fill_missing "X"
bools_filled = bools.fill_nothing "X"
bools_filled.to_vector . should_equal [True, False, "X"]
bools_filled.storage_type . should_equal Storage.Any
Test.specify "should coerce long and double types to double" <|
ints = Column.from_vector 'x' [1, Nothing, 2, Nothing]
ints_filled = ints.fill_missing 0.5
ints_filled = ints.fill_nothing 0.5
ints_filled.to_vector . should_equal [1.0, 0.5, 2.0, 0.5]
ints_filled.storage_type . should_equal Storage.Decimal
decimals = Column.from_vector 'x' [0.5, Nothing, Nothing, 0.25]
decimals_filled = decimals.fill_missing 42
decimals_filled = decimals.fill_nothing 42
decimals_filled.to_vector . should_equal [0.5, 42.0, 42.0, 0.25]
decimals_filled.storage_type . should_equal Storage.Decimal
Test.specify "should keep String, Boolean, Long and Double type" <|
strs = Column.from_vector 'x' ["a", Nothing, "b", Nothing]
strs_filled = strs.fill_missing "X"
strs_filled = strs.fill_nothing "X"
strs_filled.to_vector . should_equal ["a", "X", "b", "X"]
strs_filled.storage_type . should_equal Storage.Text
bools = Column.from_vector 'x' [True, False, Nothing]
bools_filled = bools.fill_missing False
bools_filled = bools.fill_nothing False
bools_filled.to_vector . should_equal [True, False, False]
bools_filled.storage_type . should_equal Storage.Boolean
ints = Column.from_vector 'x' [1, Nothing, 2, Nothing]
ints_filled = ints.fill_missing 42
ints_filled = ints.fill_nothing 42
ints_filled.to_vector . should_equal [1, 42, 2, 42]
ints_filled.storage_type . should_equal Storage.Integer
decimals = Column.from_vector 'x' [0.5, Nothing, Nothing, 0.25]
decimals_filled = decimals.fill_missing 1.0
decimals_filled = decimals.fill_nothing 1.0
decimals_filled.to_vector . should_equal [0.5, 1.0, 1.0, 0.25]
decimals_filled.storage_type . should_equal Storage.Decimal