Implement in-memory and database mixed decimal column comparisons (#10614)

This commit is contained in:
GregoryTravis 2024-07-25 17:27:19 -04:00 committed by GitHub
parent 8c4a40c9ff
commit f31c084f43
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 223 additions and 30 deletions

View File

@ -46,6 +46,8 @@
- [Compare two objects with `Ordering.compare` and define comparator with
`Comparable.new`][10468]
- [Added `dec` construction function for creating `Decimal`s.][10517]
- [Implemented in-memory and database mixed `Decimal` column
comparisons.][10614]
[10434]: https://github.com/enso-org/enso/pull/10434
[10445]: https://github.com/enso-org/enso/pull/10445
@ -53,6 +55,7 @@
[10467]: https://github.com/enso-org/enso/pull/10467
[10474]: https://github.com/enso-org/enso/pull/10474
[10517]: https://github.com/enso-org/enso/pull/10517
[10614]: https://github.com/enso-org/enso/pull/10614
# Enso 2024.2

View File

@ -2684,7 +2684,7 @@ run_vectorized_binary_op_with_fallback_problem_handling column name operand fall
_ ->
s1 = column.java_column.getStorage
rs = Polyglot_Helpers.handle_polyglot_dataflow_errors <|
s1.vectorizedOrFallbackBinaryMap name problem_builder applied_fn operand skip_nulls storage_type
s1.vectorizedOrFallbackBinaryMap name problem_builder applied_fn (enso_to_java operand) skip_nulls storage_type
Column.Value (Java_Column.new new_name rs)
## PRIVATE

View File

@ -1,5 +1,6 @@
package org.enso.table.data.column.operation.map.numeric.comparisons;
import java.math.BigDecimal;
import java.math.BigInteger;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.numeric.helpers.DoubleArrayAdapter;
@ -43,6 +44,11 @@ public class EqualsComparison<T extends Number, I extends Storage<? super T>>
return a.equals(b);
}
@Override
protected boolean doBigDecimal(BigDecimal a, BigDecimal b) {
return a.compareTo(b) == 0;
}
@Override
protected boolean onOtherType(Object a, Object b) {
return false;

View File

@ -1,5 +1,6 @@
package org.enso.table.data.column.operation.map.numeric.comparisons;
import java.math.BigDecimal;
import java.math.BigInteger;
import org.enso.table.data.column.storage.Storage;
@ -23,4 +24,9 @@ public class GreaterComparison<T extends Number, I extends Storage<? super T>>
protected boolean doBigInteger(BigInteger a, BigInteger b) {
return a.compareTo(b) > 0;
}
@Override
protected boolean doBigDecimal(BigDecimal a, BigDecimal b) {
return a.compareTo(b) > 0;
}
}

View File

@ -1,5 +1,6 @@
package org.enso.table.data.column.operation.map.numeric.comparisons;
import java.math.BigDecimal;
import java.math.BigInteger;
import org.enso.table.data.column.storage.Storage;
@ -23,4 +24,9 @@ public class GreaterOrEqualComparison<T extends Number, I extends Storage<? supe
protected boolean doBigInteger(BigInteger a, BigInteger b) {
return a.compareTo(b) >= 0;
}
@Override
protected boolean doBigDecimal(BigDecimal a, BigDecimal b) {
return a.compareTo(b) >= 0;
}
}

View File

@ -1,5 +1,6 @@
package org.enso.table.data.column.operation.map.numeric.comparisons;
import java.math.BigDecimal;
import java.math.BigInteger;
import org.enso.table.data.column.storage.Storage;
@ -23,4 +24,9 @@ public class LessComparison<T extends Number, I extends Storage<? super T>>
protected boolean doBigInteger(BigInteger a, BigInteger b) {
return a.compareTo(b) < 0;
}
@Override
protected boolean doBigDecimal(BigDecimal a, BigDecimal b) {
return a.compareTo(b) < 0;
}
}

View File

@ -1,5 +1,6 @@
package org.enso.table.data.column.operation.map.numeric.comparisons;
import java.math.BigDecimal;
import java.math.BigInteger;
import org.enso.table.data.column.storage.Storage;
@ -23,4 +24,9 @@ public class LessOrEqualComparison<T extends Number, I extends Storage<? super T
protected boolean doBigInteger(BigInteger a, BigInteger b) {
return a.compareTo(b) <= 0;
}
@Override
protected boolean doBigDecimal(BigDecimal a, BigDecimal b) {
return a.compareTo(b) <= 0;
}
}

View File

@ -2,17 +2,20 @@ package org.enso.table.data.column.operation.map.numeric.comparisons;
import static org.enso.table.data.column.operation.map.numeric.helpers.DoubleArrayAdapter.fromAnyStorage;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.BitSet;
import org.enso.base.CompareException;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.operation.map.BinaryMapOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.numeric.helpers.BigDecimalArrayAdapter;
import org.enso.table.data.column.operation.map.numeric.helpers.BigIntegerArrayAdapter;
import org.enso.table.data.column.operation.map.numeric.helpers.DoubleArrayAdapter;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
import org.enso.table.data.column.storage.numeric.BigDecimalStorage;
import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
import org.enso.table.data.column.storage.numeric.DoubleStorage;
import org.enso.table.data.column.storage.type.AnyObjectType;
@ -28,6 +31,8 @@ public abstract class NumericComparison<T extends Number, I extends Storage<? su
protected abstract boolean doBigInteger(BigInteger a, BigInteger b);
protected abstract boolean doBigDecimal(BigDecimal a, BigDecimal b);
protected boolean onOtherType(Object a, Object b) {
throw new CompareException(a, b);
}
@ -47,24 +52,47 @@ public abstract class NumericComparison<T extends Number, I extends Storage<? su
BigIntegerArrayAdapter.fromStorage(s), bigInteger, problemAggregator);
case BigIntegerStorage s -> runBigIntegerMap(
BigIntegerArrayAdapter.fromStorage(s), bigInteger, problemAggregator);
case BigDecimalStorage s -> runBigDecimalMap(
BigDecimalArrayAdapter.fromStorage(s), new BigDecimal(bigInteger), problemAggregator);
case DoubleStorage s -> runDoubleMap(s, bigInteger.doubleValue(), problemAggregator);
default -> throw new IllegalStateException(
"Unsupported lhs storage: " + storage.getClass().getCanonicalName());
};
} else if (arg instanceof BigDecimal bigDecimal) {
return switch (storage) {
case AbstractLongStorage s -> runBigDecimalMap(
BigDecimalArrayAdapter.fromStorage(s), bigDecimal, problemAggregator);
case BigIntegerStorage s -> runBigDecimalMap(
BigDecimalArrayAdapter.fromStorage(s), bigDecimal, problemAggregator);
case BigDecimalStorage s -> runBigDecimalMap(
BigDecimalArrayAdapter.fromStorage(s), bigDecimal, problemAggregator);
case DoubleStorage s -> runBigDecimalMap(
BigDecimalArrayAdapter.fromStorage(s), bigDecimal, problemAggregator);
default -> throw new IllegalStateException(
"Unsupported lhs storage: " + storage.getClass().getCanonicalName());
};
} else if (NumericConverter.isCoercibleToLong(arg)) {
long rhs = NumericConverter.coerceToLong(arg);
return switch (storage) {
case AbstractLongStorage s -> runLongMap(s, rhs, problemAggregator);
case BigIntegerStorage s -> runBigIntegerMap(
BigIntegerArrayAdapter.fromStorage(s), BigInteger.valueOf(rhs), problemAggregator);
case BigDecimalStorage s -> runBigDecimalMap(
BigDecimalArrayAdapter.fromStorage(s), BigDecimal.valueOf(rhs), problemAggregator);
case DoubleStorage s -> runDoubleMap(s, (double) rhs, problemAggregator);
default -> throw new IllegalStateException(
"Unsupported lhs storage: " + storage.getClass().getCanonicalName());
};
} else if (NumericConverter.isCoercibleToDouble(arg)) {
DoubleArrayAdapter lhs = DoubleArrayAdapter.fromAnyStorage(storage);
double rhs = NumericConverter.coerceToDouble(arg);
return runDoubleMap(lhs, rhs, problemAggregator);
return switch (storage) {
case BigDecimalStorage s -> runBigDecimalMap(
BigDecimalArrayAdapter.fromStorage(s), BigDecimal.valueOf(rhs), problemAggregator);
default -> {
DoubleArrayAdapter lhs = DoubleArrayAdapter.fromAnyStorage(storage);
yield runDoubleMap(lhs, rhs, problemAggregator);
}
};
} else {
int n = storage.size();
BitSet isNothing = new BitSet();
@ -155,16 +183,47 @@ public abstract class NumericComparison<T extends Number, I extends Storage<? su
return new BoolStorage(comparisonResults, isNothing, n, false);
}
protected BoolStorage runBigDecimalMap(
BigDecimalArrayAdapter lhs, BigDecimal rhs, MapOperationProblemAggregator problemAggregator) {
int n = lhs.size();
BitSet comparisonResults = new BitSet();
BitSet isNothing = new BitSet();
Context context = Context.getCurrent();
for (int i = 0; i < n; ++i) {
BigDecimal item = lhs.getItem(i);
if (item == null) {
isNothing.set(i);
} else {
boolean r = doBigDecimal(item, rhs);
if (r) {
comparisonResults.set(i);
}
}
context.safepoint();
}
return new BoolStorage(comparisonResults, isNothing, n, false);
}
@Override
public BoolStorage runZip(
I storage, Storage<?> arg, MapOperationProblemAggregator problemAggregator) {
return switch (storage) {
case DoubleStorage lhs -> {
if (arg.getType() instanceof AnyObjectType) {
yield runMixedZip(lhs, arg, problemAggregator);
} else {
yield runDoubleZip(lhs, fromAnyStorage(arg), problemAggregator);
}
yield switch (arg) {
case BigDecimalStorage rhs -> runBigDecimalZip(
BigDecimalArrayAdapter.fromStorage(lhs),
BigDecimalArrayAdapter.fromStorage(rhs),
problemAggregator);
default -> {
if (arg.getType() instanceof AnyObjectType) {
yield runMixedZip(lhs, arg, problemAggregator);
} else {
yield runDoubleZip(lhs, fromAnyStorage(arg), problemAggregator);
}
}
};
}
case AbstractLongStorage lhs -> switch (arg) {
@ -174,28 +233,50 @@ public abstract class NumericComparison<T extends Number, I extends Storage<? su
BigIntegerArrayAdapter right = BigIntegerArrayAdapter.fromStorage(rhs);
yield runBigIntegerZip(left, right, problemAggregator);
}
case BigDecimalStorage rhs -> runBigDecimalZip(
BigDecimalArrayAdapter.fromStorage(lhs),
BigDecimalArrayAdapter.fromStorage(rhs),
problemAggregator);
case DoubleStorage rhs -> runDoubleZip(
DoubleArrayAdapter.fromStorage(lhs), rhs, problemAggregator);
default -> runMixedZip(lhs, arg, problemAggregator);
};
case BigIntegerStorage lhs -> {
BigIntegerArrayAdapter left = BigIntegerArrayAdapter.fromStorage(lhs);
yield switch (arg) {
case AbstractLongStorage rhs -> {
BigIntegerArrayAdapter left = BigIntegerArrayAdapter.fromStorage(lhs);
BigIntegerArrayAdapter right = BigIntegerArrayAdapter.fromStorage(rhs);
yield runBigIntegerZip(left, right, problemAggregator);
}
case BigIntegerStorage rhs -> {
BigIntegerArrayAdapter left = BigIntegerArrayAdapter.fromStorage(lhs);
BigIntegerArrayAdapter right = BigIntegerArrayAdapter.fromStorage(rhs);
yield runBigIntegerZip(left, right, problemAggregator);
}
case BigDecimalStorage rhs -> runBigDecimalZip(
BigDecimalArrayAdapter.fromStorage(lhs),
BigDecimalArrayAdapter.fromStorage(rhs),
problemAggregator);
case DoubleStorage rhs -> runDoubleZip(
DoubleArrayAdapter.fromStorage(lhs), rhs, problemAggregator);
default -> runMixedZip(lhs, arg, problemAggregator);
};
}
case BigDecimalStorage lhs -> {
if (arg instanceof AbstractLongStorage
|| arg instanceof BigIntegerStorage
|| arg instanceof BigDecimalStorage
|| arg instanceof DoubleStorage) {
BigDecimalArrayAdapter left = BigDecimalArrayAdapter.fromAnyStorage(lhs);
BigDecimalArrayAdapter right = BigDecimalArrayAdapter.fromAnyStorage(arg);
yield runBigDecimalZip(left, right, problemAggregator);
} else {
yield runMixedZip(lhs, arg, problemAggregator);
}
}
default -> throw new IllegalStateException(
"Unsupported lhs storage: " + storage.getClass().getCanonicalName());
};
@ -294,6 +375,37 @@ public abstract class NumericComparison<T extends Number, I extends Storage<? su
return new BoolStorage(comparisonResults, isNothing, n, false);
}
protected BoolStorage runBigDecimalZip(
BigDecimalArrayAdapter lhs,
BigDecimalArrayAdapter rhs,
MapOperationProblemAggregator problemAggregator) {
int n = lhs.size();
int m = Math.min(lhs.size(), rhs.size());
BitSet comparisonResults = new BitSet();
BitSet isNothing = new BitSet();
Context context = Context.getCurrent();
for (int i = 0; i < m; ++i) {
BigDecimal x = lhs.getItem(i);
BigDecimal y = rhs.getItem(i);
if (x == null || y == null) {
isNothing.set(i);
} else {
boolean r = doBigDecimal(x, y);
if (r) {
comparisonResults.set(i);
}
}
context.safepoint();
}
if (m < n) {
isNothing.set(m, n);
}
return new BoolStorage(comparisonResults, isNothing, n, false);
}
protected BoolStorage runMixedZip(
Storage<?> lhs, Storage<?> rhs, MapOperationProblemAggregator problemAggregator) {
int n = lhs.size();

View File

@ -3,6 +3,7 @@ package org.enso.table.data.column.storage;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.type.AnyObjectType;
import org.enso.table.data.column.storage.type.BigDecimalType;
import org.enso.table.data.column.storage.type.BigIntegerType;
import org.enso.table.data.column.storage.type.FloatType;
import org.enso.table.data.column.storage.type.IntegerType;
@ -52,13 +53,16 @@ public final class MixedStorage extends ObjectStorage implements ColumnStorageWi
private boolean isNumeric(StorageType type) {
return type instanceof IntegerType
|| type instanceof FloatType
|| type instanceof BigIntegerType;
|| type instanceof BigIntegerType
|| type instanceof BigDecimalType;
}
private StorageType commonNumericType(StorageType a, StorageType b) {
assert isNumeric(a);
assert isNumeric(b);
if (a instanceof FloatType || b instanceof FloatType) {
if (a instanceof BigDecimalType || b instanceof BigDecimalType) {
return BigDecimalType.INSTANCE;
} else if (a instanceof FloatType || b instanceof FloatType) {
return FloatType.FLOAT_64;
} else if (a instanceof BigIntegerType || b instanceof BigIntegerType) {
return BigIntegerType.INSTANCE;

View File

@ -1,5 +1,6 @@
package org.enso.table.data.column.storage.type;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.time.LocalDate;
import java.time.LocalDateTime;
@ -42,6 +43,7 @@ public sealed interface StorageType
return switch (item) {
case String s -> TextType.VARIABLE_LENGTH;
case BigDecimal i -> BigDecimalType.INSTANCE;
case BigInteger i -> BigIntegerType.INSTANCE;
case Boolean b -> BooleanType.INSTANCE;
case LocalDate d -> DateType.INSTANCE;

View File

@ -658,26 +658,67 @@ add_specs suite_builder setup =
suite_builder.group prefix+"(Column_Operations_Spec) Column Comparisons" group_builder->
table_builder = build_sorted_table table_builder=setup.light_table_builder
group_builder.specify "should allow to compare numbers" <|
with_mixed_columns_if_supported [["x", [1, 4, 5, Nothing]], ["y", [2.0, 3.25, 5.0, Nothing]]] t2->
x = t2.at "x"
y = t2.at "y"
setup.is_integer_type x.inferred_precise_value_type . should_be_true
y.inferred_precise_value_type . should_be_a (Value_Type.Float ...)
(x < y).to_vector . should_equal [True, False, False, Nothing]
(x <= y).to_vector . should_equal [True, False, True, Nothing]
(x > y).to_vector . should_equal (x <= y).not.to_vector
(x >= y).to_vector . should_equal (x < y).not.to_vector
group_builder.specify "(Column_Operations_Spec) should infer the correct precise value type for mixed columns" <|
with_mixed_columns_if_supported [["i", [1, 4, 5, Nothing]], ["f", [2.0, 3.25, 5.0, Nothing]], ["d", [dec "2.0", dec "3.25", dec "5.0", Nothing]]] t->
setup.is_integer_type (t.at "i").inferred_precise_value_type . should_be_true
(t.at "f").inferred_precise_value_type . should_be_a (Value_Type.Float ...)
(x < 1000).to_vector . should_equal [True, True, True, Nothing]
case setup.test_selection.supports_decimal_type of
True ->
(t.at "d").inferred_precise_value_type . should_be_a (Value_Type.Decimal ...)
False ->
(t.at "d").inferred_precise_value_type . should_be_a (Value_Type.Integer ...)
[(<), (<=), (>), (>=)].each op->
op x y . value_type . should_equal Value_Type.Boolean
op x y . to_vector . should_succeed
op x 23 . to_vector . should_succeed
op y 23 . to_vector . should_succeed
op x 1.5 . to_vector . should_succeed
if setup.test_selection.run_advanced_edge_case_tests then
group_builder.specify "(Column_Operations_Spec) should allow to compare numbers (including mixed types)" <|
x_values = [1.25, 4.5, 5.0]
y_values = [2.0, 3.0, 5.0]
converters = [.truncate, x->x, dec]
converters.map x_converter-> converters.map y_converter->
x_values_converted = x_values.map x_converter
y_values_converted = y_values.map y_converter
with_mixed_columns_if_supported [['x', x_values_converted + [Nothing]], ['y', y_values_converted + [Nothing]]] t->
x = t.at "x"
y = t.at "y"
Test.with_clue " comparison: "+x.value_type.to_text+', '+y.value_type.to_text+" " <|
(x < y).to_vector . should_equal [True, False, False, Nothing]
(x <= y).to_vector . should_equal [True, False, True, Nothing]
(x > y).to_vector . should_equal (x <= y).not.to_vector
(x >= y).to_vector . should_equal (x < y).not.to_vector
converters.map converter->
const_float = 3.0
constant = converter const_float
Test.with_clue " constant: "+(Meta.get_simple_type_name constant) <|
(x == constant).to_vector . should_equal [False, False, False, Nothing]
(x != constant).to_vector . should_equal [True, True, True, Nothing]
(y == constant).to_vector . should_equal [False, True, False, Nothing]
(y != constant).to_vector . should_equal [True, False, True, Nothing]
[6.5, 1000.5].map const_float->
converters.map converter->
constant = converter const_float
Test.with_clue " constant: "+(Meta.get_simple_type_name constant) <|
(x < -constant).to_vector . should_equal [False, False, False, Nothing]
(x < constant).to_vector . should_equal [True, True, True, Nothing]
(x <= -constant).to_vector . should_equal [False, False, False, Nothing]
(x <= constant).to_vector . should_equal [True, True, True, Nothing]
(y > -constant).to_vector . should_equal [True, True, True, Nothing]
(y > constant).to_vector . should_equal [False, False, False, Nothing]
(y >= -constant).to_vector . should_equal [True, True, True, Nothing]
(y >= constant).to_vector . should_equal [False, False, False, Nothing]
[(<), (<=), (>), (>=)].each op->
op x y . value_type . should_equal Value_Type.Boolean
op x y . to_vector . should_succeed
op x 23 . to_vector . should_succeed
op y 23 . to_vector . should_succeed
op x 1.5 . to_vector . should_succeed
op x (dec 1.5) . to_vector . should_succeed
group_builder.specify "should allow to compare texts" <|
t0 = table_builder [["X", ["a", "b", "c"]], ["Y", ["a", "b", "d"]]]
@ -1062,7 +1103,8 @@ add_specs suite_builder setup =
table.at "x" . round 16 . should_fail_with Illegal_Argument
if setup.test_selection.supports_decimal_type then
group_builder.specify "should return decimals when rounding decimals" <|
pending = if setup.is_database.not then "https://github.com/enso-org/enso/issues/10644"
group_builder.specify "should return decimals when rounding decimals" pending=pending <|
i1 = 9223372036854775807 - 1
c = table_builder [["X", [i1]]] . at "X"
decimal_col = c.cast Value_Type.Decimal

View File

@ -16,7 +16,7 @@ type Dummy_Connection
Nothing
in_memory_setup =
selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=True natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True supports_unicode_normalization=True supports_time_duration=True supports_nanoseconds_in_time=True supports_mixed_columns=True text_length_limited_columns=True fixed_length_text_columns=True supports_8bit_integer=True
selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=True natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True supports_unicode_normalization=True supports_decimal_type=True supports_time_duration=True supports_nanoseconds_in_time=True supports_mixed_columns=True text_length_limited_columns=True fixed_length_text_columns=True supports_8bit_integer=True
aggregate_selection = Common_Table_Operations.Aggregate_Spec.Test_Selection.Config
agg_table_fn _ = (enso_project.data / "data.csv") . read