mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 03:51:43 +03:00
Fix inconsistency when building a Mixed column, fixes to Union (#7919)
- Fixes #7352 by remembering original value types in type inference mode to be able to reconstruct them for Mixed. - Added more benchmarks for comparing performance of constructing columns. - Fixes missing implementations that caused `Table.union` crashing on some type pairs. - Ensures that `Loss_Of_Integer_Precision` warning is not swallowed when numeric columns are unioned to create a `Float` column. - Adds test for all of the above cases. - Allow to output benchmark results to a CSV by setting an environment variable - useful for quickly comparing benchmarks, e.g. in Enso.
This commit is contained in:
parent
08cd449a99
commit
0cd446432f
@ -2390,7 +2390,9 @@ concat_columns column_set all_tables result_type result_row_count on_problems =
|
||||
_ : Integer ->
|
||||
storage = parent_table.at i . java_column . getStorage
|
||||
storage_builder.appendBulkStorage storage
|
||||
Column.from_storage column_set.name storage_builder.seal
|
||||
sealed_storage = Java_Problems.unpack_value_with_aggregated_problems on_problems <|
|
||||
storage_builder.sealWithProblems
|
||||
Column.from_storage column_set.name sealed_storage
|
||||
|
||||
## PRIVATE
|
||||
Conversion method to a Table from a Column.
|
||||
|
@ -157,7 +157,13 @@ type Bench
|
||||
## Run the specified set of benchmarks.
|
||||
run_main self =
|
||||
count = self.total_specs
|
||||
IO.println <| "Found " + count.to_text + " cases to execute"
|
||||
IO.println <| "Found " + count.to_text + " cases to execute (ETA " + self.estimated_runtime.to_display_text + ")"
|
||||
|
||||
case Environment.get "ENSO_BENCHMARK_REPORT_PATH" of
|
||||
Nothing -> Nothing
|
||||
path ->
|
||||
line = 'Label,Phase,"Invocations count","Average time (ms)"'
|
||||
line.write path on_existing_file=Existing_File_Behavior.Backup
|
||||
|
||||
self.fold Nothing _-> g-> s->
|
||||
c = g.configuration
|
||||
@ -168,10 +174,10 @@ type Bench
|
||||
## Measure the amount of time it takes to execute a given computation.
|
||||
|
||||
Arguments:
|
||||
- act: The action to perform.
|
||||
- label: A name for the benchmark.
|
||||
- warmup_conf: Warmup phase configuration.
|
||||
- measure_conf: Measurement phase configuration.
|
||||
- act: The action to perform.
|
||||
|
||||
> Example
|
||||
Measure a computation called "foo" with an iteration size of 2 and a number
|
||||
@ -193,8 +199,8 @@ type Bench
|
||||
IO.println <| "[DRY-RUN] Benchmark '" + label + "' finished in " + fmt_duration_ms + " ms"
|
||||
False ->
|
||||
measure_start = System.nano_time
|
||||
Bench.run_phase "Warmup" warmup_conf act
|
||||
Bench.run_phase "Measurement" measure_conf act
|
||||
Bench.run_phase label "Warmup" warmup_conf act
|
||||
Bench.run_phase label "Measurement" measure_conf act
|
||||
measure_end = System.nano_time
|
||||
measure_duration_ms = (measure_end - measure_start) / 1000000
|
||||
fmt_duration_ms = measure_duration_ms.format "#.###"
|
||||
@ -215,11 +221,12 @@ type Bench
|
||||
so that it is the same as in JMH.
|
||||
|
||||
Arguments:
|
||||
- label: A name for the benchmark.
|
||||
- phase_name: The name of the phase.
|
||||
- conf: The phase configuration.
|
||||
- act: Method that should be measured - benchmark body.
|
||||
run_phase : Text -> Phase_Conf -> (Any -> Any) -> Nothing
|
||||
run_phase (phase_name:Text) (conf:Phase_Conf) ~act =
|
||||
run_phase : Text -> Text -> Phase_Conf -> (Any -> Any) -> Nothing
|
||||
run_phase (label:Text) (phase_name:Text) (conf:Phase_Conf) ~act =
|
||||
duration_ns = conf.iterations * conf.seconds * 1000000000
|
||||
phase_start = System.nano_time
|
||||
stop_ns = phase_start + duration_ns
|
||||
@ -230,14 +237,29 @@ type Bench
|
||||
durations = go [] phase_start
|
||||
sum = durations.reduce (_ + _)
|
||||
run_iters = durations.length
|
||||
avg = sum / run_iters
|
||||
fmt = (avg / 1000000).format "#.###"
|
||||
avg = (sum / run_iters) / 1000000
|
||||
phase_end = System.nano_time
|
||||
phase_duration_ms = (phase_end - phase_start) / 1000000
|
||||
IO.println <| phase_name + " duration: " + phase_duration_ms.to_text + " ms"
|
||||
IO.println <| phase_name + " invocations: " + run_iters.to_text
|
||||
phase_duration = Duration.new nanoseconds=(phase_end - phase_start)
|
||||
Bench.summarize_phase label phase_name run_iters avg phase_duration
|
||||
|
||||
## PRIVATE
|
||||
This is a very simple implementation of summarizing the benchmark
|
||||
results.
|
||||
|
||||
We may want to improve it later, but it gets the job done to give us
|
||||
simple summary that can be analysed more easily than logs.
|
||||
summarize_phase (label:Text) (phase_name:Text) (invocations:Integer) (average_time:Float) (phase_duration:Duration) =
|
||||
fmt = average_time.format "#.###"
|
||||
IO.println <| phase_name + " duration: " + (phase_duration.total_milliseconds.format "#.##") + " ms"
|
||||
IO.println <| phase_name + " invocations: " + invocations.to_text
|
||||
IO.println <| phase_name + " avg time: " + fmt + " ms"
|
||||
|
||||
case Environment.get "ENSO_BENCHMARK_REPORT_PATH" of
|
||||
Nothing -> Nothing
|
||||
path ->
|
||||
line = '\n"'+label+'","'+phase_name+'",'+invocations.to_text+','+fmt
|
||||
line.write path on_existing_file=Existing_File_Behavior.Append
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Checks whether the given name is a valid benchmark name - either group name
|
||||
|
@ -4,10 +4,12 @@ import java.math.BigInteger;
|
||||
import java.util.Arrays;
|
||||
import org.enso.base.polyglot.NumericConverter;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
|
||||
import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
|
||||
import org.enso.table.data.column.storage.type.AnyObjectType;
|
||||
import org.enso.table.data.column.storage.type.BigIntegerType;
|
||||
import org.enso.table.data.column.storage.type.FloatType;
|
||||
import org.enso.table.data.column.storage.type.IntegerType;
|
||||
import org.enso.table.data.column.storage.type.StorageType;
|
||||
import org.enso.table.error.ValueTypeMismatchException;
|
||||
import org.graalvm.polyglot.Context;
|
||||
@ -24,8 +26,8 @@ public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(Object[] items) {
|
||||
super.writeTo(items);
|
||||
public void retypeToMixed(Object[] items) {
|
||||
super.retypeToMixed(items);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -36,7 +38,7 @@ public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
||||
@Override
|
||||
public TypedBuilder retypeTo(StorageType type) {
|
||||
if (type instanceof FloatType) {
|
||||
DoubleBuilder res = NumericBuilder.createDoubleBuilder(currentSize);
|
||||
DoubleBuilder res = NumericBuilder.createInferringDoubleBuilder(currentSize);
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
if (data[i] == null) {
|
||||
res.appendNulls(1);
|
||||
@ -98,4 +100,28 @@ public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendBulkStorage(Storage<?> storage) {
|
||||
if (storage.getType() instanceof IntegerType) {
|
||||
if (storage instanceof AbstractLongStorage longStorage) {
|
||||
int n = longStorage.size();
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (storage.isNa(i)) {
|
||||
data[currentSize++] = null;
|
||||
} else {
|
||||
long item = longStorage.getItem(i);
|
||||
data[currentSize++] = BigInteger.valueOf(item);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new IllegalStateException(
|
||||
"Unexpected storage implementation for type INTEGER: "
|
||||
+ storage
|
||||
+ ". This is a bug in the Table library.");
|
||||
}
|
||||
} else {
|
||||
super.appendBulkStorage(storage);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ public class BoolBuilder extends TypedBuilder {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(Object[] items) {
|
||||
public void retypeToMixed(Object[] items) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (isNa.get(i)) {
|
||||
items[i] = null;
|
||||
|
@ -5,7 +5,9 @@ import org.enso.table.data.column.operation.cast.ToFloatStorageConverter;
|
||||
import org.enso.table.data.column.storage.BoolStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
|
||||
import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
|
||||
import org.enso.table.data.column.storage.numeric.DoubleStorage;
|
||||
import org.enso.table.data.column.storage.type.BigIntegerType;
|
||||
import org.enso.table.data.column.storage.type.BooleanType;
|
||||
import org.enso.table.data.column.storage.type.FloatType;
|
||||
import org.enso.table.data.column.storage.type.IntegerType;
|
||||
@ -28,14 +30,10 @@ public class DoubleBuilder extends NumericBuilder {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(Object[] items) {
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
if (isMissing.get(i)) {
|
||||
items[i] = null;
|
||||
} else {
|
||||
items[i] = Double.longBitsToDouble(data[i]);
|
||||
}
|
||||
}
|
||||
public void retypeToMixed(Object[] items) {
|
||||
throw new IllegalStateException("The DoubleBuilder cannot be retyped to the Mixed type, because it would lose " +
|
||||
"type information about integers that were converted to doubles. If recasting is needed, " +
|
||||
"InferringDoubleBuilder should be used instead. This error leaking is a bug in the Table library.");
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -53,38 +51,11 @@ public class DoubleBuilder extends NumericBuilder {
|
||||
return FloatType.FLOAT_64;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the provided LongBuilder to a DoubleBuilder.
|
||||
*
|
||||
* <p>The original LongBuilder becomes invalidated after this operation and should no longer be
|
||||
* used.
|
||||
*/
|
||||
static DoubleBuilder retypeFromLongBuilder(LongBuilder longBuilder) {
|
||||
int currentSize = longBuilder.currentSize;
|
||||
DoubleBuilder newBuilder = new DoubleBuilder(longBuilder.isMissing, longBuilder.data, currentSize);
|
||||
|
||||
// Invalidate the old builder.
|
||||
longBuilder.data = null;
|
||||
longBuilder.isMissing = null;
|
||||
longBuilder.currentSize = -1;
|
||||
|
||||
// Translate the data in-place to avoid unnecessary allocations.
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
if (!newBuilder.isMissing.get(i)) {
|
||||
long currentIntegerValue = newBuilder.data[i];
|
||||
double convertedFloatValue = newBuilder.convertIntegerToDouble(currentIntegerValue);
|
||||
newBuilder.data[i] = Double.doubleToRawLongBits(convertedFloatValue);
|
||||
}
|
||||
}
|
||||
|
||||
return newBuilder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendNoGrow(Object o) {
|
||||
if (o == null) {
|
||||
isMissing.set(currentSize++);
|
||||
} else if (NumericConverter.isFloatLike(o)){
|
||||
} else if (NumericConverter.isFloatLike(o)) {
|
||||
double value = NumericConverter.coerceToDouble(o);
|
||||
data[currentSize++] = Double.doubleToRawLongBits(value);
|
||||
} else if (NumericConverter.isCoercibleToLong(o)) {
|
||||
@ -134,7 +105,25 @@ public class DoubleBuilder extends NumericBuilder {
|
||||
+ storage
|
||||
+ ". This is a bug in the Table library.");
|
||||
}
|
||||
} else if (Objects.equals(storage.getType(), BooleanType.INSTANCE)) {
|
||||
} else if (storage.getType() instanceof BigIntegerType) {
|
||||
if (storage instanceof BigIntegerStorage bigIntegerStorage) {
|
||||
int n = bigIntegerStorage.size();
|
||||
for (int i = 0; i < n; i++) {
|
||||
BigInteger item = bigIntegerStorage.getItem(i);
|
||||
if (item == null) {
|
||||
isMissing.set(currentSize++);
|
||||
} else {
|
||||
double converted = convertBigIntegerToDouble(item);
|
||||
data[currentSize++] = Double.doubleToRawLongBits(converted);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new IllegalStateException(
|
||||
"Unexpected storage implementation for type BIG INTEGER: "
|
||||
+ storage
|
||||
+ ". This is a bug in the Table library.");
|
||||
}
|
||||
} else if (storage.getType() instanceof BooleanType) {
|
||||
if (storage instanceof BoolStorage boolStorage) {
|
||||
int n = boolStorage.size();
|
||||
for (int i = 0; i < n; i++) {
|
||||
@ -201,34 +190,34 @@ public class DoubleBuilder extends NumericBuilder {
|
||||
* <p>
|
||||
* It verifies if the integer can be exactly represented in a double, and if not, it reports a warning.
|
||||
*/
|
||||
private double convertIntegerToDouble(long integer) {
|
||||
protected double convertIntegerToDouble(long integer) {
|
||||
double floatingPointValue = (double) integer;
|
||||
boolean isLosingPrecision = (long) floatingPointValue != integer;
|
||||
if (isLosingPrecision) {
|
||||
if (precisionLoss == null) {
|
||||
precisionLoss = new LossOfIntegerPrecision(integer, floatingPointValue);
|
||||
} else {
|
||||
precisionLoss.incrementAffectedRows();
|
||||
}
|
||||
reportPrecisionLoss(integer, floatingPointValue);
|
||||
}
|
||||
return floatingPointValue;
|
||||
}
|
||||
|
||||
private double convertBigIntegerToDouble(BigInteger bigInteger) {
|
||||
protected double convertBigIntegerToDouble(BigInteger bigInteger) {
|
||||
double floatingPointValue = bigInteger.doubleValue();
|
||||
BigInteger reconstructed = BigDecimal.valueOf(floatingPointValue).toBigInteger();
|
||||
boolean isLosingPrecision = !bigInteger.equals(reconstructed);
|
||||
if (isLosingPrecision) {
|
||||
if (precisionLoss == null) {
|
||||
precisionLoss = new LossOfIntegerPrecision(bigInteger, floatingPointValue);
|
||||
} else {
|
||||
precisionLoss.incrementAffectedRows();
|
||||
}
|
||||
reportPrecisionLoss(bigInteger, floatingPointValue);
|
||||
}
|
||||
return floatingPointValue;
|
||||
}
|
||||
|
||||
private LossOfIntegerPrecision precisionLoss = null;
|
||||
protected final void reportPrecisionLoss(Number number, double approximation) {
|
||||
if (precisionLoss == null) {
|
||||
precisionLoss = new LossOfIntegerPrecision(number, approximation);
|
||||
} else {
|
||||
precisionLoss.incrementAffectedRows();
|
||||
}
|
||||
}
|
||||
|
||||
protected LossOfIntegerPrecision precisionLoss = null;
|
||||
|
||||
@Override
|
||||
public AggregatedProblems getProblems() {
|
||||
|
@ -102,7 +102,7 @@ public class InferredBuilder extends Builder {
|
||||
// In inferred builder, we always default to 64-bits.
|
||||
currentBuilder = NumericBuilder.createLongBuilder(initialCapacity, IntegerType.INT_64);
|
||||
} else if (NumericConverter.isFloatLike(o)) {
|
||||
currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity);
|
||||
currentBuilder = NumericBuilder.createInferringDoubleBuilder(initialCapacity);
|
||||
} else if (o instanceof String) {
|
||||
currentBuilder = new StringBuilder(initialCapacity, TextType.VARIABLE_LENGTH);
|
||||
} else if (o instanceof BigInteger) {
|
||||
@ -156,7 +156,7 @@ public class InferredBuilder extends Builder {
|
||||
|
||||
private void retypeToMixed() {
|
||||
ObjectBuilder objectBuilder = new MixedBuilder(initialSize);
|
||||
currentBuilder.writeTo(objectBuilder.getData());
|
||||
currentBuilder.retypeToMixed(objectBuilder.getData());
|
||||
objectBuilder.setCurrentSize(currentBuilder.getCurrentSize());
|
||||
objectBuilder.setPreExistingProblems(currentBuilder.getProblems());
|
||||
currentBuilder = objectBuilder;
|
||||
|
@ -0,0 +1,192 @@
|
||||
package org.enso.table.data.column.builder;
|
||||
|
||||
import org.enso.base.polyglot.NumericConverter;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.error.ValueTypeMismatchException;
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* A double builder variant that preserves types and can be retyped to Mixed.
|
||||
*/
|
||||
public class InferringDoubleBuilder extends DoubleBuilder {
|
||||
/**
|
||||
* Converts the provided LongBuilder to a DoubleBuilder.
|
||||
*
|
||||
* <p>The original LongBuilder becomes invalidated after this operation and should no longer be
|
||||
* used.
|
||||
*/
|
||||
static InferringDoubleBuilder retypeFromLongBuilder(LongBuilder longBuilder) {
|
||||
int currentSize = longBuilder.currentSize;
|
||||
InferringDoubleBuilder newBuilder =
|
||||
new InferringDoubleBuilder(longBuilder.isMissing, longBuilder.data, currentSize);
|
||||
|
||||
// Invalidate the old builder.
|
||||
longBuilder.data = null;
|
||||
longBuilder.isMissing = null;
|
||||
longBuilder.currentSize = -1;
|
||||
|
||||
// Assume all longs will be compacted.
|
||||
newBuilder.isLongCompactedAsDouble.set(0, currentSize, true);
|
||||
|
||||
// Translate the data in-place to avoid unnecessary allocations.
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
if (!newBuilder.isMissing.get(i)) {
|
||||
long currentIntegerValue = newBuilder.data[i];
|
||||
double convertedFloatValue = (double) currentIntegerValue;
|
||||
boolean isLossy = currentIntegerValue != (long) convertedFloatValue;
|
||||
if (isLossy) {
|
||||
// Save it raw for recovery.
|
||||
newBuilder.setRaw(i, currentIntegerValue);
|
||||
newBuilder.reportPrecisionLoss(currentIntegerValue, convertedFloatValue);
|
||||
// Unmark the long that did not fit:
|
||||
newBuilder.isLongCompactedAsDouble.set(i, false);
|
||||
}
|
||||
|
||||
newBuilder.data[i] = Double.doubleToRawLongBits(convertedFloatValue);
|
||||
}
|
||||
}
|
||||
|
||||
return newBuilder;
|
||||
}
|
||||
|
||||
InferringDoubleBuilder(BitSet isMissing, long[] doubleData, int currentSize) {
|
||||
super(isMissing, doubleData, currentSize);
|
||||
rawData = null;
|
||||
isLongCompactedAsDouble = new BitSet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores the raw data as passed to append, in order to be able to reconstruct the original values when retyping to
|
||||
* mixed.
|
||||
*/
|
||||
private Number[] rawData;
|
||||
|
||||
/**
|
||||
* Specifies at which indices we encountered integers that can be reconstructed from double without loss of
|
||||
* precision.
|
||||
* <p>
|
||||
* This is used for reconstructing the original values when retyping to mixed. Integers that are small enough can be
|
||||
* reconstructed from their double values, so we do not need to store them as `rawData`, instead we only mark that
|
||||
* they need to be converted back into integers when retyping. This allows us to completely avoid allocating the
|
||||
* `rawData` array for most practical scenarios when the integers are not too large. The cost of allocating this
|
||||
* BitSet should be significantly lower than allocating the `rawData` array.
|
||||
*/
|
||||
private final BitSet isLongCompactedAsDouble;
|
||||
|
||||
@Override
|
||||
public void retypeToMixed(Object[] items) {
|
||||
int rawN = rawData == null ? 0 : rawData.length;
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
if (isMissing.get(i)) {
|
||||
items[i] = null;
|
||||
} else {
|
||||
if (isLongCompactedAsDouble.get(i)) {
|
||||
double value = Double.longBitsToDouble(data[i]);
|
||||
long reconstructed = (long) value;
|
||||
items[i] = reconstructed;
|
||||
} else if (i < rawN && rawData[i] != null) {
|
||||
items[i] = rawData[i];
|
||||
} else {
|
||||
double value = Double.longBitsToDouble(data[i]);
|
||||
items[i] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Since we are retyping to Mixed, the precision loss warnings should not be inherited - thus we reset them.
|
||||
precisionLoss = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendBulkStorage(Storage<?> storage) {
|
||||
throw new UnsupportedOperationException("appendBulkStorage is not supported on InferringDoubleBuilder. " +
|
||||
"A DoubleBuilder or MixedBuilder should be used instead. This is a bug in the Table library.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendDouble(double x) {
|
||||
if (currentSize >= this.data.length) {
|
||||
grow();
|
||||
}
|
||||
|
||||
data[currentSize] = Double.doubleToRawLongBits(x);
|
||||
currentSize++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendLong(long integer) {
|
||||
if (currentSize >= this.data.length) {
|
||||
grow();
|
||||
}
|
||||
|
||||
appendLongNoGrow(integer);
|
||||
}
|
||||
|
||||
private void appendLongNoGrow(long integer) {
|
||||
double convertedFloatValue = (double) integer;
|
||||
boolean isLossy = integer != (long) convertedFloatValue;
|
||||
if (isLossy) {
|
||||
setRaw(currentSize, integer);
|
||||
reportPrecisionLoss(integer, convertedFloatValue);
|
||||
} else {
|
||||
isLongCompactedAsDouble.set(currentSize, true);
|
||||
}
|
||||
|
||||
data[currentSize] = Double.doubleToRawLongBits(convertedFloatValue);
|
||||
currentSize++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendBigInteger(BigInteger integer) {
|
||||
if (currentSize >= this.data.length) {
|
||||
grow();
|
||||
}
|
||||
|
||||
setRaw(currentSize, integer);
|
||||
double convertedFloatValue = convertBigIntegerToDouble(integer);
|
||||
data[currentSize] = Double.doubleToRawLongBits(convertedFloatValue);
|
||||
currentSize++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendNoGrow(Object o) {
|
||||
if (o == null) {
|
||||
isMissing.set(currentSize++);
|
||||
} else if (NumericConverter.isFloatLike(o)) {
|
||||
double value = NumericConverter.coerceToDouble(o);
|
||||
data[currentSize++] = Double.doubleToRawLongBits(value);
|
||||
} else if (NumericConverter.isCoercibleToLong(o)) {
|
||||
long value = NumericConverter.coerceToLong(o);
|
||||
appendLongNoGrow(value);
|
||||
} else if (o instanceof BigInteger bigInteger) {
|
||||
setRaw(currentSize, bigInteger);
|
||||
double converted = convertBigIntegerToDouble(bigInteger);
|
||||
data[currentSize++] = Double.doubleToRawLongBits(converted);
|
||||
} else {
|
||||
throw new ValueTypeMismatchException(getType(), o);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendRawNoGrow(long rawData) {
|
||||
throw new UnsupportedOperationException("appendRawNoGrow is not supported on InferringDoubleBuilder. " +
|
||||
"A DoubleBuilder should be used instead. This is a bug in the Table library.");
|
||||
}
|
||||
|
||||
private void setRaw(int ix, Number o) {
|
||||
if (rawData == null) {
|
||||
rawData = new Number[ix + 1];
|
||||
}
|
||||
|
||||
if (rawData.length <= ix) {
|
||||
int newLength = Math.max(rawData.length * 3 / 2 + 1, ix + 1);
|
||||
Number[] newRawData = new Number[newLength];
|
||||
System.arraycopy(rawData, 0, newRawData, 0, rawData.length);
|
||||
rawData = newRawData;
|
||||
}
|
||||
|
||||
rawData[ix] = o;
|
||||
}
|
||||
}
|
@ -35,7 +35,7 @@ public abstract class LongBuilder extends NumericBuilder {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(Object[] items) {
|
||||
public void retypeToMixed(Object[] items) {
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
if (isMissing.get(i)) {
|
||||
items[i] = null;
|
||||
@ -55,7 +55,7 @@ public abstract class LongBuilder extends NumericBuilder {
|
||||
if (Objects.equals(type, BigIntegerType.INSTANCE)) {
|
||||
return BigIntegerBuilder.retypeFromLongBuilder(this);
|
||||
} else if (Objects.equals(type, FloatType.FLOAT_64)) {
|
||||
return DoubleBuilder.retypeFromLongBuilder(this);
|
||||
return InferringDoubleBuilder.retypeFromLongBuilder(this);
|
||||
} else {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
@ -16,10 +16,19 @@ public abstract class NumericBuilder extends TypedBuilder {
|
||||
this.currentSize = currentSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link DoubleBuilder} that should be used to create columns of boolean type and are
|
||||
* not expected to be retyped.
|
||||
*/
|
||||
public static DoubleBuilder createDoubleBuilder(int size) {
|
||||
return new DoubleBuilder(new BitSet(), new long[size], 0);
|
||||
}
|
||||
|
||||
/** Creates a {@link DoubleBuilder} that may be retyped to Mixed type. */
|
||||
public static DoubleBuilder createInferringDoubleBuilder(int size) {
|
||||
return new InferringDoubleBuilder(new BitSet(), new long[size], 0);
|
||||
}
|
||||
|
||||
public static LongBuilder createLongBuilder(int size, IntegerType type) {
|
||||
return LongBuilder.make(size, type);
|
||||
}
|
||||
|
@ -23,7 +23,7 @@ public class ObjectBuilder extends TypedBuilder {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(Object[] items) {
|
||||
public void retypeToMixed(Object[] items) {
|
||||
throw new IllegalStateException("Broken invariant: rewriting the most general type.");
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@ public abstract class TypedBuilder extends Builder {
|
||||
*
|
||||
* @param items the buffer to dump elements into
|
||||
*/
|
||||
public abstract void writeTo(Object[] items);
|
||||
public abstract void retypeToMixed(Object[] items);
|
||||
|
||||
/**
|
||||
* Checks if the builder can be efficiently retyped to the given storage type.
|
||||
|
@ -18,7 +18,7 @@ public abstract class TypedBuilderImpl<T> extends TypedBuilder {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(Object[] items) {
|
||||
public void retypeToMixed(Object[] items) {
|
||||
if (currentSize >= 0) System.arraycopy(data, 0, items, 0, currentSize);
|
||||
}
|
||||
|
||||
|
@ -8,15 +8,41 @@ from Standard.Test import Bench
|
||||
options = Bench.options . set_warmup (Bench.phase_conf 2 3) . set_measure (Bench.phase_conf 2 3)
|
||||
|
||||
type Data
|
||||
Value ~ints
|
||||
Value ~ints ~floats ~floats_and_small_ints ~floats_and_large_ints
|
||||
|
||||
create num_rows =
|
||||
# 0-argument block to make it lazy
|
||||
ints =
|
||||
Vector.new num_rows i->
|
||||
i % 1000
|
||||
Data.Value ints
|
||||
floats =
|
||||
Vector.new num_rows i->
|
||||
(i % 1000) + 0.5
|
||||
|
||||
floats_and_small_ints =
|
||||
Vector.new num_rows i->
|
||||
case i % 2 of
|
||||
0 -> (i % 1000) + 0.5
|
||||
1 -> i % 1000
|
||||
|
||||
floats_and_large_ints =
|
||||
Vector.new num_rows i->
|
||||
case i % 3 of
|
||||
0 -> (i % 1000) + 0.5
|
||||
1 -> 2^60 + (i % 100)
|
||||
2 -> 2^100 + (i % 100)
|
||||
|
||||
Data.Value ints floats floats_and_small_ints floats_and_large_ints
|
||||
|
||||
## A flag allowing to run some additional benchmarks.
|
||||
They were used when analyzing performance of an optimization of Mixed
|
||||
handler. They are disabled on the CI to avoid increasing the time of the
|
||||
build, but they can be manually re-enabled in case this needs testing in the
|
||||
future.
|
||||
|
||||
When set to `False`, only the core measurements checking the most common
|
||||
cases are run.
|
||||
extended_tests = False
|
||||
|
||||
collect_benches = Bench.build builder->
|
||||
num_rows = 1000000
|
||||
@ -32,4 +58,18 @@ collect_benches = Bench.build builder->
|
||||
group_builder.specify "Integers_type_Auto" <|
|
||||
Column.from_vector "Auto" data.ints value_type=Auto
|
||||
|
||||
group_builder.specify "Floats_type_Auto" <|
|
||||
Column.from_vector "Auto" data.floats value_type=Auto
|
||||
group_builder.specify "Floats_type_Float" <|
|
||||
Column.from_vector "Floats" data.floats value_type=Value_Type.Float
|
||||
|
||||
if extended_tests then group_builder.specify "Numeric_Mix_Small_type_Auto" <|
|
||||
Column.from_vector "Small_Mix" data.floats_and_small_ints value_type=Auto
|
||||
if extended_tests then group_builder.specify "Numeric_Mix_Small_type_Float" <|
|
||||
Column.from_vector "Small_Mix" data.floats_and_small_ints value_type=Value_Type.Float
|
||||
if extended_tests then group_builder.specify "Numeric_Mix_Large_type_Auto" <|
|
||||
Column.from_vector "Large_Mix" data.floats_and_large_ints value_type=Auto
|
||||
if extended_tests then group_builder.specify "Numeric_Mix_Large_type_Float" <|
|
||||
Column.from_vector "Large_Mix" data.floats_and_large_ints value_type=Value_Type.Float
|
||||
|
||||
main = collect_benches . run_main
|
||||
|
@ -426,15 +426,16 @@ spec setup =
|
||||
c3 = t2.cast "super-mix" Value_Type.Char . at "super-mix"
|
||||
c3.value_type . should_equal Value_Type.Char
|
||||
date_time_str = Date_Time.new 2023 12 15 19 25 . to_text
|
||||
c3.to_vector . should_equal ["1.0", "2.25", "3", "-45.25", "2.0", "X", "2020-01-01", date_time_str, "12:30:00", "{{{MY Type [x=42] }}}", "True", Nothing, "True"]
|
||||
c3.to_vector . should_equal ["1", "2.25", "3", "-45.25", "2.0", "X", "2020-01-01", date_time_str, "12:30:00", "{{{MY Type [x=42] }}}", "True", Nothing, "True"]
|
||||
Problems.assume_no_problems c3
|
||||
|
||||
if setup.test_selection.fixed_length_text_columns then
|
||||
c3_2 = t2.cast "super-mix" (Value_Type.Char size=2) . at "super-mix"
|
||||
c3_2.value_type . should_equal (Value_Type.Char size=2)
|
||||
c3_2.to_vector . should_equal ["1.", "2.", "3", "-4", "2.", "X", "20", "20", "12", "{{", "Tr", Nothing, "Tr"]
|
||||
c3_2.to_vector . should_equal ["1", "2.", "3", "-4", "2.", "X", "20", "20", "12", "{{", "Tr", Nothing, "Tr"]
|
||||
w3_2 = Problems.expect_warning Conversion_Failure c3_2
|
||||
w3_2.affected_rows_count . should_equal 10
|
||||
w3_2.affected_rows_count . should_equal 9
|
||||
w3_2.to_display_text . should_contain "have a text representation that does not fit the target type"
|
||||
|
||||
c4 = t2.cast "super-mix" Value_Type.Boolean . at "super-mix"
|
||||
c4.value_type . should_equal Value_Type.Boolean
|
||||
@ -630,13 +631,13 @@ spec setup =
|
||||
t3.at "Y" . value_type . should_equal Value_Type.Char
|
||||
|
||||
Test.specify "will choose Decimal type if all values are integers but cannot fit long" <|
|
||||
c0 = table_builder [["X", [My_Type.Value 42, 1, 2, 2^100]]] . at "X"
|
||||
c0 = table_builder [["X", [My_Type.Value 42, 1, 2, (2^100)+1]]] . at "X"
|
||||
c1 = c0.drop 1
|
||||
|
||||
c1.value_type . should_equal Value_Type.Mixed
|
||||
c2 = c1.auto_value_type
|
||||
c2.value_type . should_be_a (Value_Type.Decimal ...)
|
||||
c2.to_vector . should_equal [1, 2, 2^100]
|
||||
c2.to_vector . should_equal [1, 2, (2^100)+1]
|
||||
|
||||
Test.specify "will try to find the smallest integer type to fit the value (if shrink_types=True)" <|
|
||||
[False, True].each is_mixed->
|
||||
|
@ -208,16 +208,31 @@ spec setup =
|
||||
t3.at "A" . value_type . variable_length . should_be_true
|
||||
|
||||
Test.specify "should find a common type that will fit the merged columns" <|
|
||||
t1 = table_builder [["int+float", [0, 1, 2]]]
|
||||
t2 = table_builder [["int+float", [1.0, 2.0, 2.5]]]
|
||||
t1 = table_builder [["A", [0, 1, 2]]]
|
||||
t2 = table_builder [["A", [1.0, 2.0, 2.5]]]
|
||||
|
||||
t1.at "int+float" . value_type . is_integer . should_be_true
|
||||
t2.at "int+float" . value_type . is_floating_point . should_be_true
|
||||
t1.at "A" . value_type . is_integer . should_be_true
|
||||
t2.at "A" . value_type . is_floating_point . should_be_true
|
||||
|
||||
t3 = t1.union t2
|
||||
expect_column_names ["int+float"] t3
|
||||
t3.at "int+float" . value_type . is_floating_point . should_be_true
|
||||
t3.at "int+float" . to_vector . should_equal [0, 1, 2, 1.0, 2.0, 2.5]
|
||||
expect_column_names ["A"] t3
|
||||
t3.at "A" . value_type . is_floating_point . should_be_true
|
||||
t3.at "A" . to_vector . should_equal [0, 1, 2, 1.0, 2.0, 2.5]
|
||||
|
||||
# Specific type tests that apply to in-memory. Database behaviour is up to implementation.
|
||||
if setup.is_database.not then
|
||||
t4 = table_builder [["A", [2^100, 2^10, 2]]]
|
||||
t4.at "A" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||
|
||||
t5 = t2.union t4
|
||||
expect_column_names ["A"] t5
|
||||
t5.at "A" . value_type . is_floating_point . should_be_true
|
||||
t5.at "A" . to_vector . should_equal [1.0, 2.0, 2.5, 2^100, 2^10, 2]
|
||||
|
||||
t6 = t1.union t4
|
||||
expect_column_names ["A"] t6
|
||||
t6.at "A" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||
t6.at "A" . to_vector . should_equal [0, 1, 2, 2^100, 2^10, 2]
|
||||
|
||||
# Database backends are not required to support Mixed types.
|
||||
if setup.is_database.not then
|
||||
@ -329,6 +344,24 @@ spec setup =
|
||||
t6.at "X" . value_type . should_equal Value_Type.Mixed
|
||||
t6.at "X" . to_vector . should_equal ["a", 1, Nothing, 1, 1.2, 2.3, 3.4, "a", "b", True, False]
|
||||
|
||||
Test.specify "when finding a common type for numeric columns to be Float, any precision loss should be reported" <|
|
||||
t1 = table_builder [["X", [1, (2^62)-1, 3]]]
|
||||
t2 = table_builder [["X", [1.5, 2.5, 3.5]]]
|
||||
t3 = table_builder [["X", [(2^100)+1, 2^10, 2]]]
|
||||
|
||||
t1.at "X" . value_type . should_equal Value_Type.Integer
|
||||
t2.at "X" . value_type . should_equal Value_Type.Float
|
||||
t3.at "X" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||
|
||||
t4 = t2.union [t1, t3] allow_type_widening=True
|
||||
t4.at "X" . value_type . should_equal Value_Type.Float
|
||||
# Inexact float equality will make this pass:
|
||||
t4.at "X" . to_vector . should_equal [1.5, 2.5, 3.5, 1, (2^62)-1, 3, (2^100)+1, 2^10, 2]
|
||||
|
||||
w = Problems.expect_only_warning Loss_Of_Integer_Precision t4
|
||||
# Losing precision on (2^62)-1 and 2^100+1.
|
||||
w.affected_rows_count . should_equal 2
|
||||
|
||||
Test.specify "if type mismatches cause all columns to be dropped, fail with No_Output_Columns" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]]]
|
||||
t2 = table_builder [["A", ['x']]]
|
||||
|
@ -65,7 +65,7 @@ spec =
|
||||
test_column.reverse.to_vector . should_equal expected_1.to_vector
|
||||
empty_column.reverse.to_vector . should_equal empty_column.to_vector
|
||||
|
||||
Test.specify "should allow to count duplicate value occurences" <|
|
||||
Test.specify "should allow to count duplicate value occurrences" <|
|
||||
c_1 = Column.from_vector "c 1" [0, 1, 2, 2, 1, 0, 2]
|
||||
c_1.duplicate_count.to_vector.should_equal [0, 0, 0, 1, 1, 1, 2]
|
||||
|
||||
@ -106,19 +106,48 @@ spec =
|
||||
c1.at 1 . should_be_a Float
|
||||
c1.at 0 . is_a Integer . should_be_false
|
||||
|
||||
Test.specify "will preserve the types if the column is Mixed" <|
|
||||
c1 = Column.from_vector "X" ["a", 1, 2.0]
|
||||
c1.value_type . should_equal Value_Type.Mixed
|
||||
c1.at 0 . should_be_a Text
|
||||
c1.at 1 . should_be_a Integer
|
||||
c1.at 2 . should_be_a Float
|
||||
Test.specify "will preserve the types if the column is Mixed, regardless of ordering" <|
|
||||
run_test vector =
|
||||
Test.with_clue vector.pretty+": " <|
|
||||
c = Column.from_vector "X" vector
|
||||
c.value_type . should_equal Value_Type.Mixed
|
||||
c.to_vector . should_equal vector
|
||||
Problems.assume_no_problems c
|
||||
|
||||
Test.specify "will preserve the types if the column is Mixed (2)" pending="ToDo: disabled due to issue #7352" <|
|
||||
c2 = Column.from_vector "X" [1, 2.0, "a"]
|
||||
c2.value_type . should_equal Value_Type.Mixed
|
||||
c2.at 0 . should_be_a Integer
|
||||
c2.at 1 . should_be_a Float
|
||||
c2.at 2 . should_be_a Text
|
||||
# Verify that types of the values are preserved.
|
||||
c.to_vector.zip vector got-> expected->
|
||||
Test.with_clue "(type of "+got.pretty+") " <|
|
||||
(Meta.type_of got) . should_equal (Meta.type_of expected)
|
||||
got.to_text . should_equal expected.to_text
|
||||
|
||||
## Testing various permutations to ensure that the behaviour is
|
||||
invariant to the order of the values.
|
||||
(It used to differ depending on the order.)
|
||||
big = (2^100)+1
|
||||
medium = (2^62)-1
|
||||
run_test ["a", 1, 2.0]
|
||||
run_test [1, 2.0, "a"]
|
||||
run_test [2.0, 1, "a"]
|
||||
run_test [2.5, 1, "a"]
|
||||
run_test [10, 1.5, "a"]
|
||||
run_test ["a", 1.5, 10]
|
||||
run_test [big, 1.5, "a"]
|
||||
run_test [1.5, big, "a"]
|
||||
run_test [2, big, 1.5, "a"]
|
||||
run_test [2, big, 1.5, "a"]
|
||||
run_test [1.5, big, 2, "a"]
|
||||
run_test [Nothing, 1.5, medium, 2, "a"]
|
||||
run_test [medium, 1.5, "a"]
|
||||
run_test [Nothing, medium, 1.5, "a"]
|
||||
|
||||
big_test_vector x y =
|
||||
a = Vector.fill 40 [x, 1.5, 2, y, x+123, y+123, y+5, 2.5]
|
||||
b = Vector.fill 100 [Nothing]
|
||||
c = Vector.fill 500 [2, 1.5, x]
|
||||
d = Vector.fill 5 [2, "a", 2, 2.5]
|
||||
(a+b+c+d).flatten
|
||||
run_test (big_test_vector medium big)
|
||||
run_test (big_test_vector 123 456)
|
||||
|
||||
Test.specify "should allow to set a specific type at construction" <|
|
||||
c1 = Column.from_vector "X" [1, 2] Value_Type.Float
|
||||
@ -203,6 +232,10 @@ spec =
|
||||
r8.should_fail_with Invalid_Value_Type
|
||||
r8.catch.to_display_text.should_contain "Expected type Byte, but got a value 1000000000 of type Integer (32 bits)"
|
||||
|
||||
r9 = Column.from_vector "X" [100, 1.5] Value_Type.Integer
|
||||
r9.should_fail_with Invalid_Value_Type
|
||||
r9.catch.to_display_text.should_contain "Expected type Integer (64 bits), but got a value 1.5 of type Float"
|
||||
|
||||
Test.specify "will not allow to construct a column with Char size=0" <|
|
||||
r1 = Column.from_vector "X" [] (Value_Type.Char size=0 variable_length=False)
|
||||
r1.should_fail_with Illegal_Argument
|
||||
|
@ -70,17 +70,13 @@ spec =
|
||||
w4 = Problems.expect_only_warning Loss_Of_Integer_Precision c4
|
||||
w4.affected_rows_count . should_equal 2
|
||||
|
||||
# Until #7352 is fixed, these tests are also useful:
|
||||
# If we start with mixed before encountering large values, no issues occur:
|
||||
# No precision loss should happen if the column is mixed:
|
||||
c5 = Column.from_vector "X" ["a", 1.0, x+1]
|
||||
Problems.assume_no_problems c5
|
||||
c5.to_vector.map .to_text . should_equal (["a", 1.0, x+1].map .to_text)
|
||||
# But if we first have floats and ints and only later enter mixed, we again get precision loss:
|
||||
c6 = Column.from_vector "X" [x+1, 1.0, x+2, "a", x+3]
|
||||
w6 = Problems.expect_only_warning Loss_Of_Integer_Precision c6
|
||||
# The 2 integers that got into DoubleBuilder are affected, the one after upcasting to Mixed is preserved OK.
|
||||
w6.affected_rows_count . should_equal 2
|
||||
c6.to_vector.map .to_text . should_equal ([x+0.0, 1.0, x+0.0, "a", x+3].map .to_text)
|
||||
Problems.assume_no_problems c6
|
||||
c6.to_vector.map .to_text . should_equal ([x+1, 1.0, x+2, "a", x+3].map .to_text)
|
||||
|
||||
# It should also work for from_vector_repeated:
|
||||
c7 = Column.from_vector_repeated "X" [1.5, x+2, 100] 10
|
||||
|
@ -687,9 +687,15 @@ spec =
|
||||
Test.specify "Should handle parent-less file paths" <|
|
||||
transient = enso_project.data / "transient"
|
||||
f1 = transient / "./test1.txt"
|
||||
f2 = File.new ("parentlesstest" + Date_Time.now.to_unix_epoch_milliseconds.to_text + ".txt")
|
||||
f2_filename = "parentlesstest" + Date_Time.now.to_unix_epoch_milliseconds.to_text + ".txt"
|
||||
f2 = File.new f2_filename
|
||||
|
||||
Panic.with_finalizer f2.delete <|
|
||||
do_cleanup =
|
||||
f2.delete_if_exists
|
||||
f2_bak = File.new (f2_filename + ".bak")
|
||||
f2_bak.delete_if_exists
|
||||
|
||||
Panic.with_finalizer do_cleanup <|
|
||||
txt = "MY CONTENT"
|
||||
txt.write f1
|
||||
txt.write f2
|
||||
|
Loading…
Reference in New Issue
Block a user