Extend the range of int/float arguments to outside the range of Java long, in ceil, floor, and truncate (#11135)

This commit is contained in:
Gregory Michael Travis 2024-10-07 11:36:17 -04:00 committed by GitHub
parent d0f0770ef4
commit cce50fab3a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 333 additions and 101 deletions

View File

@ -68,10 +68,12 @@
- [Support for creating Atoms in expressions.][10820]
- [IO.print without new line][10858]
- [Add `Text.to_decimal`.][10874]
- [Added .floor, .ceil, .trunc to the in-memory `Decimal` column.][10887]
- [Added `floor`, `ceil`, `trunc` to the in-memory `Decimal` column.][10887]
- [Added vectorized .round to the in-memory `Decimal` column.][10912]
- [`select_into_database_table` no longer defaults the primary key to the first
column.][11120]
- [Extend the range of `floor`, `ceil`, `trunc` to values outside the `Long`
range.][11135]
- [Added `format` parameter to `Decimal.parse`.][11205]
[10614]: https://github.com/enso-org/enso/pull/10614
@ -85,6 +87,7 @@
[10887]: https://github.com/enso-org/enso/pull/10887
[10912]: https://github.com/enso-org/enso/pull/10912
[11120]: https://github.com/enso-org/enso/pull/11120
[11135]: https://github.com/enso-org/enso/pull/11135
[11205]: https://github.com/enso-org/enso/pull/11205
#### Enso Language & Runtime

View File

@ -46,7 +46,6 @@ polyglot java import org.enso.table.data.column.operation.unary.IsNothingOperati
polyglot java import org.enso.table.data.column.operation.unary.NotOperation
polyglot java import org.enso.table.data.column.operation.unary.TextLengthOperation
polyglot java import org.enso.table.data.column.operation.unary.TruncatedTimePartOperation
polyglot java import org.enso.table.data.column.operation.unary.UnaryDecimalRoundOperation
polyglot java import org.enso.table.data.column.operation.unary.UnaryRoundOperation
polyglot java import org.enso.table.data.column.operation.UnaryOperation
polyglot java import org.enso.table.data.column.storage.Storage as Java_Storage
@ -930,11 +929,8 @@ type Column
case precise_value_type.is_integer of
True ->
self.rename new_name
False -> case precise_value_type.is_decimal of
True ->
apply_unary_operation self UnaryDecimalRoundOperation.TRUNCATE_INSTANCE
False ->
apply_unary_operation self UnaryRoundOperation.TRUNCATE_INSTANCE
False ->
apply_unary_operation self UnaryRoundOperation.TRUNCATE_INSTANCE
False -> case precise_value_type == Value_Type.Date_Time of
True ->
fun = _.date
@ -959,11 +955,7 @@ type Column
new_name = naming_helper.function_name "ceil" [self]
self.rename new_name
False ->
case self.inferred_precise_value_type.is_decimal of
True ->
apply_unary_operation self UnaryDecimalRoundOperation.CEIL_INSTANCE
False ->
apply_unary_operation self UnaryRoundOperation.CEIL_INSTANCE
apply_unary_operation self UnaryRoundOperation.CEIL_INSTANCE
## GROUP Standard.Base.Rounding
ICON math
@ -983,11 +975,7 @@ type Column
new_name = naming_helper.function_name "floor" [self]
self.rename new_name
False ->
case self.inferred_precise_value_type.is_decimal of
True ->
apply_unary_operation self UnaryDecimalRoundOperation.FLOOR_INSTANCE
False ->
apply_unary_operation self UnaryRoundOperation.FLOOR_INSTANCE
apply_unary_operation self UnaryRoundOperation.FLOOR_INSTANCE
## GROUP Standard.Base.Logical
ICON operators

View File

@ -1,5 +1,6 @@
from Standard.Base import all
polyglot java import java.math.BigInteger
polyglot java import java.util.Random as Java_Random
polyglot java import org.enso.base.Text_Utils
@ -112,6 +113,38 @@ type Faker
integer self minimum=0 maximum=100 =
minimum + (self.generator.nextInt (maximum - minimum))
## GROUP Standard.Base.Random
ICON random
Create a random large Integer value (represented internally as a Java
`BigInteger`.
The values provided by this method are selected from a sparse set within
the specified range. For example, with `bit_length=4`, the possible range
is -16 to 16, but the actual values only include 9, 11, 13 and 15.
Arguments
- bit_length: specifies the range of values to select from. The values
will be between -2^bit_length and 2^bit_length.
large_integer : Integer -> Integer -> Integer
large_integer self bit_length =
BigInteger.new bit_length 0 self.generator
## GROUP Standard.Base.Random
ICON random
Create a random Decimal value (represented internally as a Java
`BigDecimal`.
This generator uses `large_integer` to generate an `Integer`, and then
adds a random `Float`. See `large_integer` for a description of the range
of values that this can return.
Arguments
- bit_length: specifies the range of values to select from. The values
will be between -2^bit_length and 2^bit_length.
decimal : Integer -> Integer -> Integer
decimal self bit_length =
(self.large_integer bit_length) + self.float
## GROUP Standard.Base.Random
ICON random
Create a random Float value

View File

@ -0,0 +1,132 @@
package org.enso.table.data.column.builder;
import java.math.BigInteger;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.BigIntegerType;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.problems.ProblemAggregator;
/**
* A builder for storing enso Integers, which might be Longs or BigIntegers.
*
* <p>This builder starts off delegating to LongBuilder, but if it receives a BigInteger, it retypes
* the LongBuilder to a BigIntegerBuilder.
*/
public class InferredIntegerBuilder extends Builder {
private LongBuilder longBuilder = null;
private TypedBuilder bigIntegerBuilder = null;
private int currentSize = 0;
private final int initialSize;
private final ProblemAggregator problemAggregator;
/** Creates a new instance of this builder, with the given known result length. */
public InferredIntegerBuilder(int initialSize, ProblemAggregator problemAggregator) {
this.initialSize = initialSize;
this.problemAggregator = problemAggregator;
longBuilder =
NumericBuilder.createLongBuilder(this.initialSize, IntegerType.INT_64, problemAggregator);
}
@Override
public void appendNoGrow(Object o) {
if (o == null) {
appendNulls(1);
} else if (o instanceof BigInteger bi) {
retypeToBigIntegerMaybe();
bigIntegerBuilder.appendNoGrow(bi);
} else {
Long lng = NumericConverter.tryConvertingToLong(o);
if (lng == null) {
throw new IllegalStateException(
"Unexpected value added to InferredIntegerBuilder "
+ o.getClass()
+ ". This is a bug in the Table library.");
} else {
if (bigIntegerBuilder != null) {
bigIntegerBuilder.appendNoGrow(BigInteger.valueOf(lng));
} else {
longBuilder.appendNoGrow(lng);
}
}
}
currentSize++;
}
@Override
public void append(Object o) {
if (o == null) {
appendNulls(1);
} else if (o instanceof BigInteger bi) {
retypeToBigIntegerMaybe();
bigIntegerBuilder.append(bi);
} else {
Long lng = NumericConverter.tryConvertingToLong(o);
if (lng == null) {
throw new IllegalStateException(
"Unexpected value added to InferredIntegerBuilder "
+ o.getClass()
+ ". This is a bug in the Table library.");
} else {
if (bigIntegerBuilder != null) {
bigIntegerBuilder.append(BigInteger.valueOf(lng));
} else {
longBuilder.append(lng);
}
}
}
currentSize++;
}
@Override
public void appendNulls(int count) {
if (bigIntegerBuilder != null) {
bigIntegerBuilder.appendNulls(count);
} else {
longBuilder.appendNulls(count);
}
currentSize += count;
}
@Override
public void appendBulkStorage(Storage<?> storage) {
for (int i = 0; i < storage.size(); i++) {
append(storage.getItemBoxed(i));
}
}
@Override
public int getCurrentSize() {
return currentSize;
}
@Override
public Storage<?> seal() {
if (bigIntegerBuilder != null) {
return bigIntegerBuilder.seal();
} else {
return longBuilder.seal();
}
}
@Override
public StorageType getType() {
if (bigIntegerBuilder != null) {
return BigIntegerType.INSTANCE;
} else {
return IntegerType.INT_64;
}
}
// Retype the LongBuilder to a BigIntegerBuilder, if we haven't already
// done so.
private void retypeToBigIntegerMaybe() {
if (bigIntegerBuilder != null) {
return;
}
bigIntegerBuilder = longBuilder.retypeTo(BigIntegerType.INSTANCE);
longBuilder = null;
}
}

View File

@ -1,53 +0,0 @@
package org.enso.table.data.column.operation.unary;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.function.Function;
import org.enso.base.numeric.Decimal_Utils;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.InferredBuilder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.type.BigDecimalType;
public class UnaryDecimalRoundOperation extends AbstractUnaryOperation {
public static final String CEIL = "ceil";
public static final UnaryOperation CEIL_INSTANCE =
new UnaryDecimalRoundOperation(CEIL, Decimal_Utils::ceil);
public static final String FLOOR = "floor";
public static final UnaryOperation FLOOR_INSTANCE =
new UnaryDecimalRoundOperation(FLOOR, Decimal_Utils::floor);
public static String TRUNCATE = "truncate";
public static final UnaryOperation TRUNCATE_INSTANCE =
new UnaryDecimalRoundOperation(TRUNCATE, Decimal_Utils::truncate);
private final Function<BigDecimal, BigInteger> function;
private UnaryDecimalRoundOperation(String name, Function<BigDecimal, BigInteger> function) {
super(name, true);
this.function = function;
}
@Override
public boolean canApply(ColumnStorage storage) {
return storage.getType() instanceof BigDecimalType;
}
@Override
protected final void applyObjectRow(
Object value, Builder builder, MapOperationProblemAggregator problemAggregator) {
applyObjectRow(value, (InferredBuilder) builder, problemAggregator);
}
protected void applyObjectRow(
Object value, InferredBuilder builder, MapOperationProblemAggregator problemAggregator) {
switch (value) {
case BigDecimal d -> builder.append(function.apply(d));
default -> throw new IllegalArgumentException(
"Unsupported type: " + value.getClass() + " (expected decimal).");
}
}
}

View File

@ -1,31 +1,58 @@
package org.enso.table.data.column.operation.unary;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.function.DoubleToLongFunction;
import org.enso.table.data.column.builder.LongBuilder;
import java.util.function.Function;
import org.enso.base.numeric.Decimal_Utils;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.InferredIntegerBuilder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.ColumnLongStorage;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
public class UnaryRoundOperation extends AbstractUnaryOperation {
// Used to determine whether we should use Double or BigDecimal operations.
// Values outside this range are promoted to BigDecimal operation, because
// representing their rounded value as a Long might overflow the Long dynamic
// range.
public static final double USE_DOUBLE_LIMIT_POSITIVE = 9223372036854775000.0;
public static final double USE_DOUBLE_LIMIT_NEGATIVE = -9223372036854775000.0;
public class UnaryRoundOperation extends AbstractUnaryLongOperation {
public static final String CEIL = "ceil";
public static final UnaryOperation CEIL_INSTANCE =
new UnaryRoundOperation(CEIL, d -> (long) Math.ceil(d));
new UnaryRoundOperation(CEIL, d -> (long) Math.ceil(d), Decimal_Utils::ceil);
public static final String FLOOR = "floor";
public static final UnaryOperation FLOOR_INSTANCE =
new UnaryRoundOperation(FLOOR, d -> (long) Math.floor(d));
new UnaryRoundOperation(FLOOR, d -> (long) Math.floor(d), Decimal_Utils::floor);
public static String TRUNCATE = "truncate";
public static final UnaryOperation TRUNCATE_INSTANCE =
new UnaryRoundOperation(TRUNCATE, d -> (long) d);
new UnaryRoundOperation(TRUNCATE, d -> (long) d, Decimal_Utils::truncate);
private final DoubleToLongFunction function;
private final DoubleToLongFunction doubleFunction;
private final Function<BigDecimal, BigInteger> bigDecimalFunction;
private UnaryRoundOperation(String name, DoubleToLongFunction function) {
super(name, true, IntegerType.INT_64);
this.function = function;
private UnaryRoundOperation(
String name,
DoubleToLongFunction doubleFunction,
Function<BigDecimal, BigInteger> bigDecimalFunction) {
super(name, true);
this.doubleFunction = doubleFunction;
this.bigDecimalFunction = bigDecimalFunction;
}
protected Builder createBuilder(
ColumnStorage storage, MapOperationProblemAggregator problemAggregator) {
if (storage.getSize() > Integer.MAX_VALUE) {
throw new IllegalArgumentException(
"Cannot currently operate on columns larger than " + Integer.MAX_VALUE + ".");
}
return new InferredIntegerBuilder((int) storage.getSize(), problemAggregator);
}
@Override
@ -36,9 +63,9 @@ public class UnaryRoundOperation extends AbstractUnaryLongOperation {
@Override
public ColumnStorage apply(
ColumnStorage storage, MapOperationProblemAggregator problemAggregator) {
if (storage instanceof ColumnLongStorage longStorage) {
// For a long storage, the operation is an identity operation.
return longStorage;
if (storage instanceof ColumnLongStorage || storage instanceof BigIntegerStorage) {
// For an integral type storage, the operation is an identity operation.
return storage;
}
return super.apply(storage, problemAggregator);
@ -46,7 +73,7 @@ public class UnaryRoundOperation extends AbstractUnaryLongOperation {
@Override
protected void applyObjectRow(
Object value, LongBuilder builder, MapOperationProblemAggregator problemAggregator) {
Object value, Builder builder, MapOperationProblemAggregator problemAggregator) {
// Null handled by base class
switch (value) {
case Double d -> {
@ -54,11 +81,16 @@ public class UnaryRoundOperation extends AbstractUnaryLongOperation {
String msg = "Value is " + d;
problemAggregator.reportArithmeticError(msg, builder.getCurrentSize());
builder.appendNulls(1);
} else if (d > USE_DOUBLE_LIMIT_POSITIVE || d < USE_DOUBLE_LIMIT_NEGATIVE) {
builder.append(bigDecimalFunction.apply(BigDecimal.valueOf(d)));
} else {
builder.appendLong(function.applyAsLong(d));
builder.append(doubleFunction.applyAsLong(d));
}
}
case Float f -> applyObjectRow((double) f, builder, problemAggregator);
case BigDecimal bd -> {
builder.append(bigDecimalFunction.apply(bd));
}
case Number n -> applyObjectRow(n.doubleValue(), builder, problemAggregator);
default -> throw new IllegalArgumentException(
"Unsupported type: " + value.getClass() + " (expected numeric type).");

View File

@ -8,10 +8,10 @@ options = Bench.options
type Data
Value ~ints ~floats
Value ~ints ~floats ~large_ints ~large_floats ~decimals
create vector_size faker =
Data.Value (create_ints vector_size faker) (create_floats vector_size faker)
Data.Value (create_ints vector_size faker) (create_floats vector_size faker) (create_large_ints vector_size faker) (create_large_floats vector_size faker) (create_decimals vector_size faker)
create_ints vector_size faker =
@ -24,6 +24,21 @@ create_floats vector_size faker =
Column.from_vector "floats" floats_vec
create_large_ints vector_size faker =
ints_vec = Vector.new vector_size _->(faker.large_integer 60)
Column.from_vector "large_ints" ints_vec
create_large_floats vector_size faker =
floats_vec = Vector.new vector_size _->(faker.float -1000000000000000000000.0 1000000000000000000000.0)
Column.from_vector "large_floats" floats_vec
create_decimals vector_size faker =
decimals_vec = Vector.new vector_size _->(faker.decimal 60)
Column.from_vector "decimals" decimals_vec
collect_benches = Bench.build builder->
vector_size = 5 * 1000 * 1000
## No specific significance to this constant, just fixed to make generated set deterministic
@ -33,18 +48,6 @@ collect_benches = Bench.build builder->
data = Data.create vector_size faker
builder.group "Column_Numeric" options group_builder->
group_builder.specify "round_floats" <|
data.floats.round
group_builder.specify "truncate_floats" <|
data.floats.truncate
group_builder.specify "ceil_floats" <|
data.floats.ceil
group_builder.specify "floor_floats" <|
data.floats.floor
group_builder.specify "round_ints" <|
data.ints.round
@ -57,16 +60,76 @@ collect_benches = Bench.build builder->
group_builder.specify "floor_ints" <|
data.ints.floor
group_builder.specify "round_floats" <|
data.floats.round
group_builder.specify "truncate_floats" <|
data.floats.truncate
group_builder.specify "ceil_floats" <|
data.floats.ceil
group_builder.specify "floor_floats" <|
data.floats.floor
group_builder.specify "round_large_ints" <|
data.large_ints.round
group_builder.specify "truncate_large_ints" <|
data.large_ints.truncate
group_builder.specify "ceil_large_ints" <|
data.large_ints.ceil
group_builder.specify "floor_large_ints" <|
data.large_ints.floor
## Re-enable when https://github.com/enso-org/enso/issues/11132 is done.
group_builder.specify "round_large_floats" <|
data.large_floats.round
group_builder.specify "truncate_large_floats" <|
data.large_floats.truncate
group_builder.specify "ceil_large_floats" <|
data.large_floats.ceil
group_builder.specify "floor_large_floats" <|
data.large_floats.floor
## Re-enable when https://github.com/enso-org/enso/issues/11132 is done.
group_builder.specify "round_decimals" <|
data.decimals.round
group_builder.specify "truncate_decimals" <|
data.decimals.truncate
group_builder.specify "ceil_decimals" <|
data.decimals.ceil
group_builder.specify "floor_decimals" <|
data.decimals.floor
[True, False].each use_bankers->
[0, -2, 2].map decimal_places->
name = create_name "round_decimal_places_" decimal_places use_bankers
fun x = x.round decimal_places use_bankers
group_builder.specify ("ints_" + name) <|
fun data.ints
group_builder.specify ("floats_" + name) <|
fun data.floats
group_builder.specify ("ints_" + name) <|
fun data.ints
## Re-enable when https://github.com/enso-org/enso/issues/11132 is done.
group_builder.specify ("large_ints_" + name) <|
fun data.large_ints
group_builder.specify ("large_floats_" + name) <|
fun data.large_floats
group_builder.specify ("decimals_" + name) <|
fun data.decimals
## Creates a valid name for the benchmark

View File

@ -1153,6 +1153,27 @@ add_column_operation_specs suite_builder setup =
table = table_builder [["x", [0, 3, -3, 1, -2]]]
table.at "x" . round 16 . should_fail_with Illegal_Argument
if setup.test_selection.supports_decimal_type then
group_builder.specify "ceil, floor and truncate should work correctly on Integers outside the java Long range" <|
positive_values = [9223372036854775806, 9223372036854775807, 9223372036854775808, 9223372036854775809, 9223372036854775807000000]
values = positive_values + positive_values.map .negate
values.map x->
c = table_builder [["x", [x, -x]]] . at "x"
c.ceil . to_vector . should_equal [x, -x]
c.floor . to_vector . should_equal [x, -x]
c.truncate . to_vector . should_equal [x, -x]
if setup.is_database.not then
group_builder.specify "ceil, floor and truncate should work correctly on Floats outside the java Long range" <|
positive_values = [9223372036854775000.0, 9223372036854776000.0, 9223372036854775807000000.0]
values = positive_values + positive_values.map .negate
values.map x->
x_int = x.truncate
c = table_builder [["x", [x, -x]]] . at "x"
c.ceil . to_vector . should_equal [x_int, -x_int]
c.floor . to_vector . should_equal [x_int, -x_int]
c.truncate . to_vector . should_equal [x_int, -x_int]
if setup.test_selection.supports_decimal_type then
group_builder.specify "ceil, floor and truncate should work correctly on Decimals" <|
c = table_builder [["X", [Decimal.new "123492233720368547758075678.25", Decimal.new "179243023788662739454197523.625", Decimal.new "-123492233720368547758075678.25", Decimal.new "-179243023788662739454197523.625"]]] . at "X"

View File

@ -370,6 +370,19 @@ add_specs suite_builder =
warnings . should_contain <| Arithmetic_Error.Error 'Value is Infinity (at rows [3]).'
warnings . should_contain <| Arithmetic_Error.Error 'Value is NaN (at rows [2]).'
suite_builder.group "InferredIntegerBuilder" group_builder->
group_builder.specify "Should be able to handle Nothings" <|
c = Column.from_vector "x" [Nothing, 1, Nothing, Nothing, 2, 3, Nothing, Nothing, Nothing, Nothing, 4, 5, Nothing, Nothing, 6, Nothing, 7, Nothing]
c.truncate.to_vector . should_equal c.to_vector
c.truncate.length . should_equal c.length
group_builder.specify "Should be able to handle mixed integer / biginteger" <|
c0 = Column.from_vector "x" [1, 2, 3, 4, 5, 9223372036854775807001, 9223372036854775807002, 9223372036854775807003, 6, 7, 8]
c1 = Column.from_vector "x" [9223372036854775807001, 9223372036854775807002, 1, 2, 3, 4, 5, 9223372036854775807003, 6, 7, 8]
[c0, c1].map c->
c.truncate.to_vector . should_equal c.to_vector
c.truncate.length . should_equal c.length
suite_builder.group "Date_Time truncate" group_builder->
group_builder.specify "should be able to truncate a column of Date_Times" <|
c = Column.from_vector "foo" [Date_Time.new 2020 10 24 1 2 3, Date_Time.new 2020 10 24 1 2 3]