mirror of
https://github.com/enso-org/enso.git
synced 2024-12-28 10:22:43 +03:00
Fix cross_tab
column naming edge cases, add fill_empty
(#5863)
Closes #5151 and adds some additional tests for `cross_tab` that verify duplicated and invalid names. I decided that for empty or `Nothing` names, instead of replacing them with `Column` and implicitly losing connection with the value that was in the column, we should just error on such values. To make handling of these easier, `fill_empty` was added allowing to easily replace the empty values with something else. Also, `{is,fill}_missing` was renamed to `{is,fill}_nothing` to align with `Filter_Condition.Is_Nothing`.
This commit is contained in:
parent
263c3ad651
commit
952beba8d1
@ -336,6 +336,8 @@
|
||||
- [Remove many regex compile flags; separated `match` into `match` and
|
||||
`match_all`.][5785]
|
||||
- [Aligned names of columns created by column operations.][5850]
|
||||
- [Improved `cross_tab`. Renamed `fill_missing` and `is_missing` to
|
||||
`fill_nothing` and `is_nothing`. Added `fill_empty`.][5863]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -517,6 +519,7 @@
|
||||
[5785]: https://github.com/enso-org/enso/pull/5785
|
||||
[5802]: https://github.com/enso-org/enso/pull/5802
|
||||
[5850]: https://github.com/enso-org/enso/pull/5850
|
||||
[5863]: https://github.com/enso-org/enso/pull/5863
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -633,8 +633,8 @@ type Column
|
||||
|
||||
Returns a column of booleans, with `True` items at the positions where
|
||||
this column contains a `Nothing`.
|
||||
is_missing : Column
|
||||
is_missing self =
|
||||
is_nothing : Column
|
||||
is_nothing self =
|
||||
new_name = self.naming_helpers.to_expression_text self + " is null"
|
||||
self.make_unary_op "IS_NULL" new_name new_type=SQL_Type.boolean
|
||||
|
||||
@ -666,7 +666,7 @@ type Column
|
||||
is_present : Column
|
||||
is_present self =
|
||||
new_name = self.naming_helpers.function_name "is_present" [self]
|
||||
self.is_missing.not . rename new_name
|
||||
self.is_nothing.not . rename new_name
|
||||
|
||||
## PRIVATE
|
||||
Returns a column of booleans with `True` at the positions where this
|
||||
@ -683,7 +683,7 @@ type Column
|
||||
new_name = self.naming_helpers.function_name "is_blank" [self]
|
||||
is_blank = case self.sql_type.is_definitely_text of
|
||||
True -> self.is_empty
|
||||
False -> self.is_missing
|
||||
False -> self.is_nothing
|
||||
result = case treat_nans_as_blank && self.sql_type.is_definitely_double of
|
||||
True -> is_blank || self.is_nan
|
||||
False -> is_blank
|
||||
@ -693,11 +693,26 @@ type Column
|
||||
|
||||
Returns a new column where missing values have been replaced with the
|
||||
provided default.
|
||||
fill_missing : Any -> Column
|
||||
fill_missing self default =
|
||||
new_name = self.naming_helpers.function_name "fill_missing" [self, default]
|
||||
fill_nothing : Any -> Column
|
||||
fill_nothing self default =
|
||||
new_name = self.naming_helpers.function_name "fill_nothing" [self, default]
|
||||
self.make_binary_op "FILL_NULL" default new_name
|
||||
|
||||
## ALIAS Fill Empty
|
||||
|
||||
Returns a new column where empty Text values have been replaced with the
|
||||
provided default.
|
||||
|
||||
Arguments:
|
||||
- default: The value to replace missing values with. If this argument
|
||||
is a column, the value from `default` at the corresponding position
|
||||
will be used.
|
||||
fill_empty : Column | Any -> Column
|
||||
fill_empty self default =
|
||||
new_name = self.naming_helpers.function_name "fill_empty" [self, default]
|
||||
result = self.is_empty.iif default self
|
||||
result.rename new_name
|
||||
|
||||
## Returns a new column, containing the same elements as `self`, but with
|
||||
the given name.
|
||||
|
||||
@ -875,7 +890,7 @@ type Column
|
||||
expected.
|
||||
is_in_not_null = self.make_op "IS_IN" operands=non_nulls new_name=new_name new_type=SQL_Type.boolean
|
||||
result = case nulls.not_empty of
|
||||
True -> is_in_not_null || self.is_missing
|
||||
True -> is_in_not_null || self.is_nothing
|
||||
False -> is_in_not_null
|
||||
result.rename new_name
|
||||
_ : Array -> self.is_in (Vector.from_polyglot_array vector)
|
||||
@ -889,7 +904,7 @@ type Column
|
||||
our columns too. That is because, we want the containment check
|
||||
for `NULL` to work the same way as for any other value.
|
||||
in_subquery = Query.Select [Pair.new column.name column.expression] column.context
|
||||
has_nulls_expression = SQL_Expression.Operation "BOOL_OR" [column.is_missing.expression]
|
||||
has_nulls_expression = SQL_Expression.Operation "BOOL_OR" [column.is_nothing.expression]
|
||||
has_nulls_subquery = Query.Select [Pair.new "has_nulls" has_nulls_expression] column.context
|
||||
new_expr = SQL_Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery, has_nulls_subquery]
|
||||
Column.Value new_name self.connection SQL_Type.boolean new_expr self.context
|
||||
|
@ -36,4 +36,4 @@
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_fill_missing = Examples.decimal_column.fill_missing 20.5
|
||||
example_fill_missing = Examples.decimal_column.fill_nothing 20.5
|
||||
|
@ -693,11 +693,11 @@ type Column
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_is_missing = Examples.decimal_column.is_missing
|
||||
is_missing : Column
|
||||
is_missing self =
|
||||
example_is_missing = Examples.decimal_column.is_nothing
|
||||
is_nothing : Column
|
||||
is_nothing self =
|
||||
new_name = Naming_Helpers.to_expression_text self + " is null"
|
||||
run_vectorized_unary_op self "is_missing" (== Nothing) new_name on_missing=True
|
||||
run_vectorized_unary_op self "is_nothing" (== Nothing) new_name on_missing=True
|
||||
|
||||
## UNSTABLE
|
||||
Returns a column of booleans, with `True` items at the positions where
|
||||
@ -730,7 +730,7 @@ type Column
|
||||
is_present : Column
|
||||
is_present self =
|
||||
new_name = Naming_Helpers.function_name "is_present" [self]
|
||||
self.is_missing.not.rename new_name
|
||||
self.is_nothing.not.rename new_name
|
||||
|
||||
## PRIVATE
|
||||
Returns a column of booleans with `True` at the positions where this
|
||||
@ -747,9 +747,9 @@ type Column
|
||||
new_name = Naming_Helpers.function_name "is_blank" [self]
|
||||
result = case self.storage_type of
|
||||
Storage.Text -> self.is_empty
|
||||
Storage.Decimal -> if treat_nans_as_blank then self.is_missing || self.is_nan else self.is_missing
|
||||
Storage.Decimal -> if treat_nans_as_blank then self.is_nothing || self.is_nan else self.is_nothing
|
||||
Storage.Any -> if treat_nans_as_blank then self.is_empty || self.is_nan else self.is_empty
|
||||
_ -> self.is_missing
|
||||
_ -> self.is_nothing
|
||||
result.rename new_name
|
||||
|
||||
## ALIAS Fill Missing
|
||||
@ -767,10 +767,10 @@ type Column
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_fill_missing = Examples.decimal_column.fill_missing 20.5
|
||||
fill_missing : Column | Any -> Column
|
||||
fill_missing self default =
|
||||
new_name = Naming_Helpers.function_name "fill_missing" [self, default]
|
||||
example_fill_missing = Examples.decimal_column.fill_nothing 20.5
|
||||
fill_nothing : Column | Any -> Column
|
||||
fill_nothing self default =
|
||||
new_name = Naming_Helpers.function_name "fill_nothing" [self, default]
|
||||
storage = self.java_column.getStorage
|
||||
new_st = case default of
|
||||
Column.Value java_col ->
|
||||
@ -781,6 +781,21 @@ type Column
|
||||
col = Java_Column.new new_name new_st
|
||||
Column.Value col
|
||||
|
||||
## ALIAS Fill Empty
|
||||
|
||||
Returns a new column where empty Text values have been replaced with the
|
||||
provided default.
|
||||
|
||||
Arguments:
|
||||
- default: The value to replace missing values with. If this argument
|
||||
is a column, the value from `default` at the corresponding position
|
||||
will be used.
|
||||
fill_empty : Column | Any -> Column
|
||||
fill_empty self default =
|
||||
new_name = Naming_Helpers.function_name "fill_empty" [self, default]
|
||||
result = self.is_empty.iif default self
|
||||
result.rename new_name
|
||||
|
||||
## Checks for each element of the column if it starts with `other`.
|
||||
|
||||
Arguments:
|
||||
|
@ -27,7 +27,9 @@ type Row
|
||||
get : (Integer | Text) -> Any -> Any
|
||||
get self column ~if_missing=Nothing =
|
||||
table_column = self.table.get column
|
||||
if table_column.is_nothing then if_missing else table_column.at self.index
|
||||
case table_column of
|
||||
Nothing -> if_missing
|
||||
_ -> table_column.at self.index
|
||||
|
||||
## Gets the row as a Vector.
|
||||
to_vector : Vector
|
||||
|
@ -424,7 +424,7 @@ type Row_Count_Mismatch
|
||||
"The number of rows in the left table ("+self.left_rows.to_text+") does not match the number of rows in the right table ("+self.right_rows.to_text+")."
|
||||
|
||||
type Invalid_Aggregate_Column
|
||||
## Indicates that a provided name is not found within available column not
|
||||
## Indicates that a provided name is not found within available columns nor
|
||||
represents a valid expression.
|
||||
Error (name : Text) (expression_error : Expression_Error | No_Such_Column | Nothing)
|
||||
|
||||
|
@ -26,8 +26,8 @@ make_filter_column source_column filter_condition on_problems = case filter_cond
|
||||
on_problems.escalate_warnings <|
|
||||
source_column != value
|
||||
# Nothing
|
||||
Is_Nothing -> source_column.is_missing
|
||||
Not_Nothing -> source_column.is_missing.not
|
||||
Is_Nothing -> source_column.is_nothing
|
||||
Not_Nothing -> source_column.is_nothing.not
|
||||
# Boolean
|
||||
Is_True ->
|
||||
Value_Type.expect_boolean source_column.value_type <| source_column
|
||||
|
@ -36,6 +36,9 @@ type Join_Condition_Resolver
|
||||
resolve_left = resolve_selector self.left_at
|
||||
resolve_right = resolve_selector self.right_at
|
||||
|
||||
is_nothing column = case column of
|
||||
Nothing -> True
|
||||
_ -> False
|
||||
conditions_vector = case conditions of
|
||||
_ : Vector -> conditions
|
||||
single_condition : Join_Condition -> [single_condition]
|
||||
@ -43,7 +46,7 @@ type Join_Condition_Resolver
|
||||
handle_equals left_selector right_selector =
|
||||
left = resolve_left left_selector
|
||||
right = resolve_right right_selector
|
||||
if left.is_nothing || right.is_nothing then Nothing else
|
||||
if is_nothing left || is_nothing right then Nothing else
|
||||
if left.name == right.name then
|
||||
redundant_names.append right.name
|
||||
self.make_equals problem_builder left right
|
||||
@ -54,7 +57,7 @@ type Join_Condition_Resolver
|
||||
Join_Condition.Equals_Ignore_Case left_selector right_selector locale ->
|
||||
left = resolve_left left_selector
|
||||
right = resolve_right right_selector
|
||||
if left.is_nothing || right.is_nothing then Nothing else
|
||||
if is_nothing left || is_nothing right then Nothing else
|
||||
Value_Type.expect_text left.value_type <|
|
||||
Value_Type.expect_text right.value_type <|
|
||||
self.make_equals_ignore_case problem_builder left right locale
|
||||
@ -62,7 +65,7 @@ type Join_Condition_Resolver
|
||||
left = resolve_left left_selector
|
||||
right_lower = resolve_right right_lower_selector
|
||||
right_upper = resolve_right right_upper_selector
|
||||
if left.is_nothing || right_lower.is_nothing || right_upper.is_nothing then Nothing else
|
||||
if is_nothing left || is_nothing right_lower || is_nothing right_upper then Nothing else
|
||||
self.make_between problem_builder left right_lower right_upper
|
||||
problem_builder.attach_problems_before on_problems <|
|
||||
if converted.contains Nothing then Panic.throw (Illegal_State.Error "Impossible: unresolved columns remaining in the join resolution. This should have raised a dataflow error. This is a bug in the Table library.") else
|
||||
|
@ -393,12 +393,17 @@ unify_result_type_for_union column_set all_tables allow_type_widening problem_bu
|
||||
problem_builder.report_other_warning (No_Common_Type.Error column_set.name)
|
||||
common_type
|
||||
False ->
|
||||
first_column = columns.find (c-> c.is_nothing.not)
|
||||
is_not_nothing c = case c of
|
||||
Nothing -> False
|
||||
_ -> True
|
||||
first_column = columns.find is_not_nothing
|
||||
first_type = first_column.value_type
|
||||
if first_type == Value_Type.Mixed then Value_Type.Mixed else
|
||||
first_wrong_column = columns.find if_missing=Nothing col->
|
||||
col.is_nothing.not && col.value_type != first_type
|
||||
if first_wrong_column.is_nothing then first_type else
|
||||
is_not_nothing col && col.value_type != first_type
|
||||
case first_wrong_column of
|
||||
Nothing -> first_type
|
||||
_ ->
|
||||
got_type = first_wrong_column.value_type
|
||||
problem_builder.report_other_warning (Column_Type_Mismatch.Error column_set.name first_type got_type)
|
||||
Nothing
|
||||
|
@ -103,7 +103,10 @@ type No_Fallback_Column
|
||||
Table.point_data : Table -> Vector
|
||||
Table.point_data self =
|
||||
get_point_data field = field.lookup_in self . rename field.name . catch Any (_->Nothing)
|
||||
columns = Point_Data.all_fields.map get_point_data . filter (x -> x.is_nothing.not)
|
||||
is_not_nothing x = case x of
|
||||
Nothing -> False
|
||||
_ -> True
|
||||
columns = Point_Data.all_fields.map get_point_data . filter is_not_nothing
|
||||
(0.up_to self.row_count).to_vector.map <| row_n->
|
||||
pairs = columns.map column->
|
||||
value = column.at row_n . catch_ Nothing
|
||||
|
@ -336,7 +336,7 @@ public final class BoolStorage extends Storage<Boolean> {
|
||||
}
|
||||
})
|
||||
.add(
|
||||
new UnaryMapOperation<>(Maps.IS_MISSING) {
|
||||
new UnaryMapOperation<>(Maps.IS_NOTHING) {
|
||||
@Override
|
||||
public BoolStorage run(BoolStorage storage) {
|
||||
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);
|
||||
|
@ -301,7 +301,7 @@ public final class DoubleStorage extends NumericStorage<Double> {
|
||||
}
|
||||
})
|
||||
.add(
|
||||
new UnaryMapOperation<>(Maps.IS_MISSING) {
|
||||
new UnaryMapOperation<>(Maps.IS_NOTHING) {
|
||||
@Override
|
||||
public BoolStorage run(DoubleStorage storage) {
|
||||
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);
|
||||
|
@ -398,7 +398,7 @@ public final class LongStorage extends NumericStorage<Long> {
|
||||
}
|
||||
})
|
||||
.add(
|
||||
new UnaryMapOperation<>(Maps.IS_MISSING) {
|
||||
new UnaryMapOperation<>(Maps.IS_NOTHING) {
|
||||
@Override
|
||||
public BoolStorage run(LongStorage storage) {
|
||||
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);
|
||||
|
@ -42,7 +42,7 @@ public final class ObjectStorage extends SpecializedStorage<Object> {
|
||||
static <T, S extends SpecializedStorage<T>> MapOpStorage<T, S> buildObjectOps() {
|
||||
MapOpStorage<T, S> ops = new MapOpStorage<>();
|
||||
ops.add(
|
||||
new UnaryMapOperation<>(Maps.IS_MISSING) {
|
||||
new UnaryMapOperation<>(Maps.IS_NOTHING) {
|
||||
@Override
|
||||
protected BoolStorage run(S storage) {
|
||||
BitSet r = new BitSet();
|
||||
|
@ -75,7 +75,7 @@ public abstract class Storage<T> {
|
||||
public static final String NOT = "not";
|
||||
public static final String AND = "&&";
|
||||
public static final String OR = "||";
|
||||
public static final String IS_MISSING = "is_missing";
|
||||
public static final String IS_NOTHING = "is_nothing";
|
||||
public static final String IS_NAN = "is_nan";
|
||||
public static final String IS_EMPTY = "is_empty";
|
||||
public static final String STARTS_WITH = "starts_with";
|
||||
|
@ -9,6 +9,7 @@ import org.enso.table.data.table.Table;
|
||||
import org.enso.table.data.table.problems.FloatingPointGrouping;
|
||||
import org.enso.table.problems.AggregatedProblems;
|
||||
import org.enso.table.util.ConstantList;
|
||||
import org.enso.table.util.NameDeduplicator;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.IntFunction;
|
||||
@ -122,6 +123,8 @@ public class MultiValueIndex<KeyType extends MultiValueKeyBase> {
|
||||
Column nameColumn,
|
||||
Aggregator[] aggregates,
|
||||
String[] aggregateNames) {
|
||||
NameDeduplicator outputTableNameDeduplicator = new NameDeduplicator();
|
||||
|
||||
final int size = locs.size();
|
||||
|
||||
var nameIndex =
|
||||
@ -133,17 +136,16 @@ public class MultiValueIndex<KeyType extends MultiValueKeyBase> {
|
||||
|
||||
// Create the storage
|
||||
Builder[] storage = new Builder[columnCount];
|
||||
IntStream.range(0, groupingColumns.length)
|
||||
.forEach(
|
||||
i -> storage[i] = Builder.getForType(groupingColumns[i].getStorage().getType(), size));
|
||||
IntStream.range(0, nameIndex.locs.size())
|
||||
.forEach(
|
||||
i -> {
|
||||
for (int i = 0; i < groupingColumns.length; i++) {
|
||||
storage[i] = Builder.getForType(groupingColumns[i].getStorage().getType(), size);
|
||||
}
|
||||
|
||||
for (int i = 0; i < nameIndex.locs.size(); i++) {
|
||||
int offset = groupingColumns.length + i * aggregates.length;
|
||||
IntStream.range(0, aggregates.length)
|
||||
.forEach(
|
||||
j -> storage[offset + j] = Builder.getForType(aggregates[j].getType(), size));
|
||||
});
|
||||
for (int j = 0; j < aggregates.length; j++) {
|
||||
storage[offset + j] = Builder.getForType(aggregates[j].getType(), size);
|
||||
}
|
||||
}
|
||||
|
||||
// Fill the storage
|
||||
for (List<Integer> group_locs : this.locs.values()) {
|
||||
@ -170,23 +172,25 @@ public class MultiValueIndex<KeyType extends MultiValueKeyBase> {
|
||||
}
|
||||
}
|
||||
|
||||
// Merge Problems
|
||||
AggregatedProblems[] problems = new AggregatedProblems[aggregates.length + 1];
|
||||
problems[0] = this.problems;
|
||||
IntStream.range(0, aggregates.length)
|
||||
.forEach(i -> problems[i + 1] = aggregates[i].getProblems());
|
||||
AggregatedProblems merged = AggregatedProblems.merge(problems);
|
||||
|
||||
// Create Columns
|
||||
Column[] output = new Column[columnCount];
|
||||
IntStream.range(0, groupingColumns.length)
|
||||
.forEach(i -> output[i] = new Column(groupingColumns[i].getName(), storage[i].seal()));
|
||||
for (int i = 0; i < groupingColumns.length; i++) {
|
||||
outputTableNameDeduplicator.markUsed(groupingColumns[i].getName());
|
||||
output[i] = new Column(groupingColumns[i].getName(), storage[i].seal());
|
||||
}
|
||||
|
||||
int offset = groupingColumns.length;
|
||||
for (List<Integer> name_locs : nameIndex.locs.values()) {
|
||||
// ToDo: Use the NameDeduplicator here.
|
||||
Object boxed = nameColumn.getStorage().getItemBoxed(name_locs.get(0));
|
||||
String name = boxed == null ? "" : boxed.toString();
|
||||
String name;
|
||||
if (boxed == null) {
|
||||
throw Column.raiseNothingName();
|
||||
} else {
|
||||
name = boxed.toString();
|
||||
// We want to fail hard on invalid colum names stemming from invalid input values and make
|
||||
// the user fix the data before cross_tab, to avoid data corruption.
|
||||
Column.ensureNameIsValid(name);
|
||||
}
|
||||
|
||||
for (int i = 0; i < aggregates.length; i++) {
|
||||
String effectiveName;
|
||||
@ -198,12 +202,27 @@ public class MultiValueIndex<KeyType extends MultiValueKeyBase> {
|
||||
effectiveName = name + " " + aggregateNames[i];
|
||||
}
|
||||
|
||||
// Check again to ensure that the appended aggregate name does not invalidate the name.
|
||||
// We do not check aggregateName itself before, because it _is_ allowed for it to be empty -
|
||||
// meaning just key names will be used and that is fine.
|
||||
Column.ensureNameIsValid(effectiveName);
|
||||
effectiveName = outputTableNameDeduplicator.makeUnique(effectiveName);
|
||||
|
||||
output[offset + i] = new Column(effectiveName, storage[offset + i].seal());
|
||||
}
|
||||
|
||||
offset += aggregates.length;
|
||||
}
|
||||
|
||||
// Merge Problems
|
||||
AggregatedProblems[] problems = new AggregatedProblems[aggregates.length + 2];
|
||||
problems[0] = this.problems;
|
||||
problems[1] = AggregatedProblems.of(outputTableNameDeduplicator.getProblems());
|
||||
for (int i = 0; i < aggregates.length; i++) {
|
||||
problems[i + 2] = aggregates[i].getProblems();
|
||||
}
|
||||
AggregatedProblems merged = AggregatedProblems.merge(problems);
|
||||
|
||||
return new Table(output, merged);
|
||||
}
|
||||
|
||||
|
@ -33,9 +33,13 @@ public class Column {
|
||||
this.storage = storage;
|
||||
}
|
||||
|
||||
public static IllegalArgumentException raiseNothingName() throws IllegalArgumentException {
|
||||
throw new IllegalArgumentException("Column name cannot be Nothing.");
|
||||
}
|
||||
|
||||
public static void ensureNameIsValid(String name) {
|
||||
if (name == null) {
|
||||
throw new IllegalArgumentException("Column name cannot be Nothing.");
|
||||
raiseNothingName();
|
||||
}
|
||||
if (name.isEmpty()) {
|
||||
throw new IllegalArgumentException("Column name cannot be empty.");
|
||||
|
@ -190,7 +190,7 @@ public class ExpressionVisitorImpl extends ExpressionBaseVisitor<Value> {
|
||||
|
||||
@Override
|
||||
public Value visitIsNull(ExpressionParser.IsNullContext ctx) {
|
||||
var op = ctx.IS_NULL() != null || ctx.IS_NOT_NULL() != null ? "is_missing" : "is_empty";
|
||||
var op = ctx.IS_NULL() != null || ctx.IS_NOT_NULL() != null ? "is_nothing" : "is_empty";
|
||||
var condition = executeMethod(op, visit(ctx.expr()));
|
||||
return ctx.IS_NOT_NULL() != null || ctx.IS_NOT_EMPTY() != null
|
||||
? executeMethod("not", condition)
|
||||
|
@ -33,8 +33,8 @@ spec setup =
|
||||
(x == Nothing).to_vector . should_equal [Nothing, Nothing, Nothing, Nothing]
|
||||
|
||||
Test.specify "should allow to check which values are null"
|
||||
x.is_missing.to_vector . should_equal [False, False, False, True]
|
||||
(x + Nothing).is_missing.to_vector . should_equal [True, True, True, True]
|
||||
x.is_nothing.to_vector . should_equal [False, False, False, True]
|
||||
(x + Nothing).is_nothing.to_vector . should_equal [True, True, True, True]
|
||||
|
||||
Test.specify "Column equality should handle nulls correctly" pending="TODO" <|
|
||||
a = [2, 3, Nothing, Nothing]
|
||||
@ -69,6 +69,15 @@ spec setup =
|
||||
((t.at "A") == (t.at "B")) . to_vector . should_equal r_sensitive
|
||||
((t.at "A").equals_ignore_case (t.at "B")) . to_vector . should_equal r_insensitive
|
||||
|
||||
Test.specify "should allow to fill empty/nothing values" <|
|
||||
t = table_builder [["X", ["a", "", " ", Nothing, "b"]]]
|
||||
|
||||
c1 = t.at "X" . fill_nothing "NA"
|
||||
c1.to_vector . should_equal ["a", "", " ", "NA", "b"]
|
||||
|
||||
c2 = t.at "X" . fill_empty "<empty>"
|
||||
c2.to_vector . should_equal ["a", "<empty>", " ", "<empty>", "b"]
|
||||
|
||||
Test.specify "should report a warning if checking equality on floating point columns" <|
|
||||
t = table_builder [["X", [1.0, 2.1, 3.2]], ["Y", [1.0, 2.0, 3.2]]]
|
||||
|
||||
@ -245,13 +254,14 @@ spec setup =
|
||||
t.at "b" . like "%abc%" . name . should_equal "[b] like '%abc%'"
|
||||
t.at "b" . ends_with "abc" . name . should_equal "ends_with([b], 'abc')"
|
||||
t.at "b" . is_empty . name . should_equal "[b] is empty"
|
||||
t.at "b" . fill_empty "<empty>" . name . should_equal "fill_empty([b], '<empty>')"
|
||||
|
||||
Test.specify "nulls" <|
|
||||
t.at "a" . coalesce [Nothing, 42] . name . should_equal "coalesce([a], Nothing, 42)"
|
||||
t.at "a" . is_missing . name . should_equal "[a] is null"
|
||||
t.at "a" . is_nothing . name . should_equal "[a] is null"
|
||||
t.at "a" . is_present . name . should_equal "is_present([a])"
|
||||
t.at "a" . is_blank . name . should_equal "is_blank([a])"
|
||||
t.at "a" . fill_missing 100 . name . should_equal "fill_missing([a], 100)"
|
||||
t.at "a" . fill_nothing 100 . name . should_equal "fill_nothing([a], 100)"
|
||||
|
||||
Test.specify "misc"
|
||||
t.at "a" . min [1, 2] . name . should_equal "min([a], 1, 2)"
|
||||
|
@ -90,6 +90,9 @@ spec setup =
|
||||
t1.at "y" . to_vector . should_equal [2, 1]
|
||||
t1.at "z" . to_vector . should_equal [1, 1]
|
||||
|
||||
t2 = table2.cross_tab ["Group", "Group"] "Key"
|
||||
t2.column_names . should_equal ["Group", "x", "y", "z"]
|
||||
|
||||
Test.specify "should allow multiple values aggregates" <|
|
||||
t1 = table.cross_tab values=[Count, Sum "Value"]
|
||||
t1.column_names . should_equal ["x Count", "x Sum Value", "y Count", "y Sum Value", "z Count", "z Sum Value"]
|
||||
@ -128,28 +131,35 @@ spec setup =
|
||||
err2.should_fail_with Column_Indexes_Out_Of_Range
|
||||
err2.catch.indexes . should_equal [42]
|
||||
|
||||
Test.specify "should fail if aggregate values contain invalid expressions" pending="TODO?" <|
|
||||
Test.specify "should fail if aggregate values contain invalid expressions" <|
|
||||
err1 = table.cross_tab values=[Sum "[MISSING]*10"]
|
||||
err1.should_fail_with Invalid_Aggregate_Column
|
||||
err1.catch.name . should_equal "[MISSING]*10"
|
||||
err1.catch.expression_error . should_equal (No_Such_Column.Error "MISSING")
|
||||
|
||||
err2 = table.cross_tab values=[Sum "[[[["]
|
||||
err2 = table.cross_tab values=[Sum "[[["]
|
||||
err2.should_fail_with Invalid_Aggregate_Column
|
||||
err2.catch.name . should_equal "[[[["
|
||||
err1.catch.expression_error . should_be_a Expression_Error.Syntax_Error
|
||||
err2.catch.name . should_equal "[[["
|
||||
err2.catch.expression_error . should_be_a Expression_Error.Syntax_Error
|
||||
|
||||
Test.specify "should not allow Group_By for values" <|
|
||||
err1 = table.cross_tab [] "Key" values=[Count, Group_By "Value"] on_problems=Problem_Behavior.Ignore
|
||||
err1.should_fail_with Illegal_Argument
|
||||
|
||||
Test.specify "should gracefully handle duplicate aggregate names" pending="TODO: this should be fixed as part of https://github.com/enso-org/enso/issues/5151" <|
|
||||
action = table.cross_tab [] "Key" values=[Count new_name="Agg1", Sum "Value" new_name="Agg1"]
|
||||
Test.specify "should gracefully handle duplicate aggregate names" <|
|
||||
action = table.cross_tab [] "Key" values=[Count new_name="Agg1", Sum "Value" new_name="Agg1"] on_problems=_
|
||||
tester table =
|
||||
table.column_names . should_equal ["x Agg1", "x Agg1_1", "y Agg1", "y Agg1_1", "z Agg1", "z Agg1_1"]
|
||||
problems = [Duplicate_Output_Column_Names.Error ["Agg1"]]
|
||||
problems = [Duplicate_Output_Column_Names.Error ["x Agg1", "y Agg1", "z Agg1"]]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
table3 = table2.rename_columns (Map.from_vector [["Group", "x"]])
|
||||
action3 = table3.cross_tab ["x"] "Key" on_problems=_
|
||||
tester3 table =
|
||||
table.column_names . should_equal ["x", "x_1", "y", "z"]
|
||||
problems3 = [Duplicate_Output_Column_Names.Error ["x"]]
|
||||
Problems.test_problem_handling action3 problems3 tester3
|
||||
|
||||
Test.specify "should fail on invalid aggregations" <|
|
||||
table = table_builder [["Key", ["x", "x", "x", "x", "y", "y", "y", "z", "z"]], ["TextValue", ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
|
||||
[Problem_Behavior.Report_Error, Problem_Behavior.Report_Warning, Problem_Behavior.Ignore].each pb-> Test.with_clue "Problem_Behavior="+pb.to_text+" " <|
|
||||
@ -170,25 +180,45 @@ spec setup =
|
||||
err.should_fail_with Invalid_Aggregation
|
||||
err.catch . should_equal (Invalid_Aggregation.Error "Sum TextValue" [0, 4, 7] "Cannot convert to a number.")
|
||||
|
||||
Test.specify "should correctly handle uncommon blank fields" pending="TODO: this should be fixed as part of https://github.com/enso-org/enso/issues/5151" <|
|
||||
table = table_builder [["Key", [" ", "x", "x", "x", "", "", "", Nothing, Nothing]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
|
||||
Test.specify "should allow non-Text columns to be used as name" <|
|
||||
table = table_builder [["Key", [1, 1, 1, 2, 2, 1, 3, 3, 1]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
|
||||
t1 = table.cross_tab
|
||||
# TODO
|
||||
t1.column_names . should_equal ["x", "Column_1", "Column_2"]
|
||||
t1.column_names . should_equal ["1", "2", "3"]
|
||||
t1.row_count . should_equal 1
|
||||
t1.at "1" . to_vector . should_equal [5]
|
||||
t1.at "2" . to_vector . should_equal [2]
|
||||
t1.at "3" . to_vector . should_equal [2]
|
||||
|
||||
Test.specify "should correctly handle uncommon characters in fields becoming column names" <|
|
||||
table = table_builder [["Key", ["💡🎉🌻", "ąęź", "ąęź", '\n\n', "😊", "😊", "🌻", "😊", "🌻"]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
|
||||
t1 = table.cross_tab
|
||||
t1.column_names . should_equal ["💡🎉🌻", "🌻", "😊", "ąęź", '\n\n']
|
||||
table = table_builder [["Key", ["💡🎉🌻", "ąęź", "ąęź", '\n\n', "😊", "😊", "🌻", "😊", "🌻", " "]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]]
|
||||
t1 = table.cross_tab . sort_columns
|
||||
t1.column_names . should_equal ['\n\n', ' ', 'ąęź', '🌻', '💡🎉🌻', '😊']
|
||||
t1.row_count . should_equal 1
|
||||
t1.at "💡🎉🌻" . to_vector . should_equal [1]
|
||||
t1.at "🌻" . to_vector . should_equal [2]
|
||||
t1.at "😊" . to_vector . should_equal [3]
|
||||
t1.at "ąęź" . to_vector . should_equal [2]
|
||||
t1.at '\n\n' . to_vector . should_equal [1]
|
||||
t1.at " " . to_vector . should_equal [1]
|
||||
|
||||
Test.specify "should fail gracefully if an effective column name would contain invalid characters" <|
|
||||
table = table_builder [["Key", ['x', 'x', 'y\0', '\0', 'y\0', 'z', 'z', 'z', 'z']], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
|
||||
r1 = table.cross_tab
|
||||
r1.should_fail_with Illegal_Argument
|
||||
r1.catch.to_display_text . should_contain "must not contain the NUL character"
|
||||
|
||||
r2 = table2.cross_tab [] "Key" values=[Average "Value" new_name='x\0']
|
||||
r2.print
|
||||
r2.should_fail_with Illegal_Argument
|
||||
r2.catch.to_display_text . should_contain "must not contain the NUL character"
|
||||
|
||||
Test.specify "should fail gracefully if an effective column name would be empty or null" <|
|
||||
table = table_builder [["Key", [" ", "x", "x", "x", "", "", "", "y", "y"]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
|
||||
r1 = table.cross_tab
|
||||
r1.should_fail_with Illegal_Argument
|
||||
r1.catch.to_display_text . should_contain "cannot be empty"
|
||||
|
||||
table2 = table_builder [["Key", [" ", "x", "x", "x", Nothing, Nothing, Nothing, "y", "y"]], ["Value", [1, 2, 3, 4, 5, 6, 7, 8, 9]]]
|
||||
r2 = table2.cross_tab
|
||||
r2 . should_fail_with Illegal_Argument
|
||||
r2.catch.to_display_text . should_contain "cannot be Nothing"
|
||||
|
@ -643,7 +643,7 @@ spec setup =
|
||||
r2.at 3 . should_equal [3, 30, 7, 7, 200]
|
||||
|
||||
t4_3 = table_builder [["X", [Nothing, 2, 3]], ["Y", [10, 20, 30]]]
|
||||
t4_4 = t4_3.set (t4_3.at "X" . fill_missing 7) new_name="C"
|
||||
t4_4 = t4_3.set (t4_3.at "X" . fill_nothing 7) new_name="C"
|
||||
t7 = t4_4.join t5 on=(Join_Condition.Equals "C" "X") join_kind=Join_Kind.Full
|
||||
within_table t7 <|
|
||||
expect_column_names ["X", "Y", "C", "Right_X", "Z"] t7
|
||||
|
@ -111,9 +111,9 @@ spec =
|
||||
t5.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ((FALSE) OR ("T1"."A" IS NULL))', []]
|
||||
|
||||
Test.group "[Codegen] Handling Missing Values" <|
|
||||
Test.specify "fill_missing should allow to replace missing values in a column with a constant" <|
|
||||
c = t1.at "A" . fill_missing "not-applicable"
|
||||
c.to_sql.prepare . should_equal ['SELECT COALESCE("T1"."A", ?) AS "fill_missing([A], \'not-applicable\')" FROM "T1" AS "T1"', [["not-applicable", int]]]
|
||||
Test.specify "fill_nothing should allow to replace missing values in a column with a constant" <|
|
||||
c = t1.at "A" . fill_nothing "not-applicable"
|
||||
c.to_sql.prepare . should_equal ['SELECT COALESCE("T1"."A", ?) AS "fill_nothing([A], \'not-applicable\')" FROM "T1" AS "T1"', [["not-applicable", int]]]
|
||||
|
||||
Test.specify "filter_blank_rows should drop rows that contain at least one missing column in a Table" <|
|
||||
t2 = t1.filter_blank_rows when_any=True
|
||||
|
@ -112,10 +112,10 @@ spec prefix connection =
|
||||
Test.group prefix+"Missing Values" <|
|
||||
t4 = upload "T4" <|
|
||||
Table.new [["a", [0, 1, Nothing, 42, Nothing]], ["b", [True, Nothing, True, False, Nothing]], ["c", ["", "foo", "bar", Nothing, Nothing]]]
|
||||
Test.specify "fill_missing should replace nulls" <|
|
||||
t4.at 'a' . fill_missing 10 . to_vector . should_equal [0, 1, 10, 42, 10]
|
||||
t4.at 'b' . fill_missing False . to_vector . should_equal [True, False, True, False, False]
|
||||
t4.at 'c' . fill_missing "NA" . to_vector . should_equal ["", "foo", "bar", "NA", "NA"]
|
||||
Test.specify "fill_nothing should replace nulls" <|
|
||||
t4.at 'a' . fill_nothing 10 . to_vector . should_equal [0, 1, 10, 42, 10]
|
||||
t4.at 'b' . fill_nothing False . to_vector . should_equal [True, False, True, False, False]
|
||||
t4.at 'c' . fill_nothing "NA" . to_vector . should_equal ["", "foo", "bar", "NA", "NA"]
|
||||
|
||||
Test.specify "should correctly be counted" <|
|
||||
t4.row_count . should_equal 5
|
||||
|
@ -75,7 +75,7 @@ spec = Test.group "Columns" <|
|
||||
Test.specify "should allow to fill missing values from another column" <|
|
||||
nulled = Column.from_vector "col" [0, Nothing, 4, 5, Nothing, Nothing]
|
||||
defaults = Column.from_vector "def" [1, 2, 10, 20, Nothing, 30]
|
||||
r = nulled.fill_missing defaults
|
||||
r = nulled.fill_nothing defaults
|
||||
r.to_vector . should_equal [0, 2, 4, 5, Nothing, 30]
|
||||
|
||||
Test.specify "should allow to count duplicate value occurences" <|
|
||||
|
@ -291,49 +291,49 @@ spec =
|
||||
Test.group "Filling Missing Values" <|
|
||||
Test.specify "should coerce non-coercible types to Object" <|
|
||||
strs = Column.from_vector 'x' ["a", Nothing, "b", Nothing]
|
||||
strs_filled = strs.fill_missing False
|
||||
strs_filled = strs.fill_nothing False
|
||||
strs_filled.to_vector . should_equal ["a", False, "b", False]
|
||||
strs_filled.storage_type . should_equal Storage.Any
|
||||
|
||||
ints = Column.from_vector 'x' [1, Nothing, 2, Nothing]
|
||||
ints_filled = ints.fill_missing "X"
|
||||
ints_filled = ints.fill_nothing "X"
|
||||
ints_filled.to_vector . should_equal [1, "X", 2, "X"]
|
||||
ints_filled.storage_type . should_equal Storage.Any
|
||||
|
||||
bools = Column.from_vector 'x' [True, False, Nothing]
|
||||
bools_filled = bools.fill_missing "X"
|
||||
bools_filled = bools.fill_nothing "X"
|
||||
bools_filled.to_vector . should_equal [True, False, "X"]
|
||||
bools_filled.storage_type . should_equal Storage.Any
|
||||
|
||||
Test.specify "should coerce long and double types to double" <|
|
||||
ints = Column.from_vector 'x' [1, Nothing, 2, Nothing]
|
||||
ints_filled = ints.fill_missing 0.5
|
||||
ints_filled = ints.fill_nothing 0.5
|
||||
ints_filled.to_vector . should_equal [1.0, 0.5, 2.0, 0.5]
|
||||
ints_filled.storage_type . should_equal Storage.Decimal
|
||||
|
||||
decimals = Column.from_vector 'x' [0.5, Nothing, Nothing, 0.25]
|
||||
decimals_filled = decimals.fill_missing 42
|
||||
decimals_filled = decimals.fill_nothing 42
|
||||
decimals_filled.to_vector . should_equal [0.5, 42.0, 42.0, 0.25]
|
||||
decimals_filled.storage_type . should_equal Storage.Decimal
|
||||
|
||||
Test.specify "should keep String, Boolean, Long and Double type" <|
|
||||
strs = Column.from_vector 'x' ["a", Nothing, "b", Nothing]
|
||||
strs_filled = strs.fill_missing "X"
|
||||
strs_filled = strs.fill_nothing "X"
|
||||
strs_filled.to_vector . should_equal ["a", "X", "b", "X"]
|
||||
strs_filled.storage_type . should_equal Storage.Text
|
||||
|
||||
bools = Column.from_vector 'x' [True, False, Nothing]
|
||||
bools_filled = bools.fill_missing False
|
||||
bools_filled = bools.fill_nothing False
|
||||
bools_filled.to_vector . should_equal [True, False, False]
|
||||
bools_filled.storage_type . should_equal Storage.Boolean
|
||||
|
||||
ints = Column.from_vector 'x' [1, Nothing, 2, Nothing]
|
||||
ints_filled = ints.fill_missing 42
|
||||
ints_filled = ints.fill_nothing 42
|
||||
ints_filled.to_vector . should_equal [1, 42, 2, 42]
|
||||
ints_filled.storage_type . should_equal Storage.Integer
|
||||
|
||||
decimals = Column.from_vector 'x' [0.5, Nothing, Nothing, 0.25]
|
||||
decimals_filled = decimals.fill_missing 1.0
|
||||
decimals_filled = decimals.fill_nothing 1.0
|
||||
decimals_filled.to_vector . should_equal [0.5, 1.0, 1.0, 0.25]
|
||||
decimals_filled.storage_type . should_equal Storage.Decimal
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user