mirror of
https://github.com/enso-org/enso.git
synced 2025-01-09 03:57:54 +03:00
Fix inconsistency when building a Mixed column, fixes to Union (#7919)
- Fixes #7352 by remembering original value types in type inference mode to be able to reconstruct them for Mixed. - Added more benchmarks for comparing performance of constructing columns. - Fixes missing implementations that caused `Table.union` crashing on some type pairs. - Ensures that `Loss_Of_Integer_Precision` warning is not swallowed when numeric columns are unioned to create a `Float` column. - Adds test for all of the above cases. - Allow to output benchmark results to a CSV by setting an environment variable - useful for quickly comparing benchmarks, e.g. in Enso.
This commit is contained in:
parent
08cd449a99
commit
0cd446432f
@ -2390,7 +2390,9 @@ concat_columns column_set all_tables result_type result_row_count on_problems =
|
|||||||
_ : Integer ->
|
_ : Integer ->
|
||||||
storage = parent_table.at i . java_column . getStorage
|
storage = parent_table.at i . java_column . getStorage
|
||||||
storage_builder.appendBulkStorage storage
|
storage_builder.appendBulkStorage storage
|
||||||
Column.from_storage column_set.name storage_builder.seal
|
sealed_storage = Java_Problems.unpack_value_with_aggregated_problems on_problems <|
|
||||||
|
storage_builder.sealWithProblems
|
||||||
|
Column.from_storage column_set.name sealed_storage
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Conversion method to a Table from a Column.
|
Conversion method to a Table from a Column.
|
||||||
|
@ -157,7 +157,13 @@ type Bench
|
|||||||
## Run the specified set of benchmarks.
|
## Run the specified set of benchmarks.
|
||||||
run_main self =
|
run_main self =
|
||||||
count = self.total_specs
|
count = self.total_specs
|
||||||
IO.println <| "Found " + count.to_text + " cases to execute"
|
IO.println <| "Found " + count.to_text + " cases to execute (ETA " + self.estimated_runtime.to_display_text + ")"
|
||||||
|
|
||||||
|
case Environment.get "ENSO_BENCHMARK_REPORT_PATH" of
|
||||||
|
Nothing -> Nothing
|
||||||
|
path ->
|
||||||
|
line = 'Label,Phase,"Invocations count","Average time (ms)"'
|
||||||
|
line.write path on_existing_file=Existing_File_Behavior.Backup
|
||||||
|
|
||||||
self.fold Nothing _-> g-> s->
|
self.fold Nothing _-> g-> s->
|
||||||
c = g.configuration
|
c = g.configuration
|
||||||
@ -168,10 +174,10 @@ type Bench
|
|||||||
## Measure the amount of time it takes to execute a given computation.
|
## Measure the amount of time it takes to execute a given computation.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
- act: The action to perform.
|
|
||||||
- label: A name for the benchmark.
|
- label: A name for the benchmark.
|
||||||
- warmup_conf: Warmup phase configuration.
|
- warmup_conf: Warmup phase configuration.
|
||||||
- measure_conf: Measurement phase configuration.
|
- measure_conf: Measurement phase configuration.
|
||||||
|
- act: The action to perform.
|
||||||
|
|
||||||
> Example
|
> Example
|
||||||
Measure a computation called "foo" with an iteration size of 2 and a number
|
Measure a computation called "foo" with an iteration size of 2 and a number
|
||||||
@ -193,8 +199,8 @@ type Bench
|
|||||||
IO.println <| "[DRY-RUN] Benchmark '" + label + "' finished in " + fmt_duration_ms + " ms"
|
IO.println <| "[DRY-RUN] Benchmark '" + label + "' finished in " + fmt_duration_ms + " ms"
|
||||||
False ->
|
False ->
|
||||||
measure_start = System.nano_time
|
measure_start = System.nano_time
|
||||||
Bench.run_phase "Warmup" warmup_conf act
|
Bench.run_phase label "Warmup" warmup_conf act
|
||||||
Bench.run_phase "Measurement" measure_conf act
|
Bench.run_phase label "Measurement" measure_conf act
|
||||||
measure_end = System.nano_time
|
measure_end = System.nano_time
|
||||||
measure_duration_ms = (measure_end - measure_start) / 1000000
|
measure_duration_ms = (measure_end - measure_start) / 1000000
|
||||||
fmt_duration_ms = measure_duration_ms.format "#.###"
|
fmt_duration_ms = measure_duration_ms.format "#.###"
|
||||||
@ -215,11 +221,12 @@ type Bench
|
|||||||
so that it is the same as in JMH.
|
so that it is the same as in JMH.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
- label: A name for the benchmark.
|
||||||
- phase_name: The name of the phase.
|
- phase_name: The name of the phase.
|
||||||
- conf: The phase configuration.
|
- conf: The phase configuration.
|
||||||
- act: Method that should be measured - benchmark body.
|
- act: Method that should be measured - benchmark body.
|
||||||
run_phase : Text -> Phase_Conf -> (Any -> Any) -> Nothing
|
run_phase : Text -> Text -> Phase_Conf -> (Any -> Any) -> Nothing
|
||||||
run_phase (phase_name:Text) (conf:Phase_Conf) ~act =
|
run_phase (label:Text) (phase_name:Text) (conf:Phase_Conf) ~act =
|
||||||
duration_ns = conf.iterations * conf.seconds * 1000000000
|
duration_ns = conf.iterations * conf.seconds * 1000000000
|
||||||
phase_start = System.nano_time
|
phase_start = System.nano_time
|
||||||
stop_ns = phase_start + duration_ns
|
stop_ns = phase_start + duration_ns
|
||||||
@ -230,14 +237,29 @@ type Bench
|
|||||||
durations = go [] phase_start
|
durations = go [] phase_start
|
||||||
sum = durations.reduce (_ + _)
|
sum = durations.reduce (_ + _)
|
||||||
run_iters = durations.length
|
run_iters = durations.length
|
||||||
avg = sum / run_iters
|
avg = (sum / run_iters) / 1000000
|
||||||
fmt = (avg / 1000000).format "#.###"
|
|
||||||
phase_end = System.nano_time
|
phase_end = System.nano_time
|
||||||
phase_duration_ms = (phase_end - phase_start) / 1000000
|
phase_duration = Duration.new nanoseconds=(phase_end - phase_start)
|
||||||
IO.println <| phase_name + " duration: " + phase_duration_ms.to_text + " ms"
|
Bench.summarize_phase label phase_name run_iters avg phase_duration
|
||||||
IO.println <| phase_name + " invocations: " + run_iters.to_text
|
|
||||||
|
## PRIVATE
|
||||||
|
This is a very simple implementation of summarizing the benchmark
|
||||||
|
results.
|
||||||
|
|
||||||
|
We may want to improve it later, but it gets the job done to give us
|
||||||
|
simple summary that can be analysed more easily than logs.
|
||||||
|
summarize_phase (label:Text) (phase_name:Text) (invocations:Integer) (average_time:Float) (phase_duration:Duration) =
|
||||||
|
fmt = average_time.format "#.###"
|
||||||
|
IO.println <| phase_name + " duration: " + (phase_duration.total_milliseconds.format "#.##") + " ms"
|
||||||
|
IO.println <| phase_name + " invocations: " + invocations.to_text
|
||||||
IO.println <| phase_name + " avg time: " + fmt + " ms"
|
IO.println <| phase_name + " avg time: " + fmt + " ms"
|
||||||
|
|
||||||
|
case Environment.get "ENSO_BENCHMARK_REPORT_PATH" of
|
||||||
|
Nothing -> Nothing
|
||||||
|
path ->
|
||||||
|
line = '\n"'+label+'","'+phase_name+'",'+invocations.to_text+','+fmt
|
||||||
|
line.write path on_existing_file=Existing_File_Behavior.Append
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
|
||||||
Checks whether the given name is a valid benchmark name - either group name
|
Checks whether the given name is a valid benchmark name - either group name
|
||||||
|
@ -4,10 +4,12 @@ import java.math.BigInteger;
|
|||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import org.enso.base.polyglot.NumericConverter;
|
import org.enso.base.polyglot.NumericConverter;
|
||||||
import org.enso.table.data.column.storage.Storage;
|
import org.enso.table.data.column.storage.Storage;
|
||||||
|
import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
|
||||||
import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
|
import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
|
||||||
import org.enso.table.data.column.storage.type.AnyObjectType;
|
import org.enso.table.data.column.storage.type.AnyObjectType;
|
||||||
import org.enso.table.data.column.storage.type.BigIntegerType;
|
import org.enso.table.data.column.storage.type.BigIntegerType;
|
||||||
import org.enso.table.data.column.storage.type.FloatType;
|
import org.enso.table.data.column.storage.type.FloatType;
|
||||||
|
import org.enso.table.data.column.storage.type.IntegerType;
|
||||||
import org.enso.table.data.column.storage.type.StorageType;
|
import org.enso.table.data.column.storage.type.StorageType;
|
||||||
import org.enso.table.error.ValueTypeMismatchException;
|
import org.enso.table.error.ValueTypeMismatchException;
|
||||||
import org.graalvm.polyglot.Context;
|
import org.graalvm.polyglot.Context;
|
||||||
@ -24,8 +26,8 @@ public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeTo(Object[] items) {
|
public void retypeToMixed(Object[] items) {
|
||||||
super.writeTo(items);
|
super.retypeToMixed(items);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -36,7 +38,7 @@ public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
|||||||
@Override
|
@Override
|
||||||
public TypedBuilder retypeTo(StorageType type) {
|
public TypedBuilder retypeTo(StorageType type) {
|
||||||
if (type instanceof FloatType) {
|
if (type instanceof FloatType) {
|
||||||
DoubleBuilder res = NumericBuilder.createDoubleBuilder(currentSize);
|
DoubleBuilder res = NumericBuilder.createInferringDoubleBuilder(currentSize);
|
||||||
for (int i = 0; i < currentSize; i++) {
|
for (int i = 0; i < currentSize; i++) {
|
||||||
if (data[i] == null) {
|
if (data[i] == null) {
|
||||||
res.appendNulls(1);
|
res.appendNulls(1);
|
||||||
@ -98,4 +100,28 @@ public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
|||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void appendBulkStorage(Storage<?> storage) {
|
||||||
|
if (storage.getType() instanceof IntegerType) {
|
||||||
|
if (storage instanceof AbstractLongStorage longStorage) {
|
||||||
|
int n = longStorage.size();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (storage.isNa(i)) {
|
||||||
|
data[currentSize++] = null;
|
||||||
|
} else {
|
||||||
|
long item = longStorage.getItem(i);
|
||||||
|
data[currentSize++] = BigInteger.valueOf(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Unexpected storage implementation for type INTEGER: "
|
||||||
|
+ storage
|
||||||
|
+ ". This is a bug in the Table library.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
super.appendBulkStorage(storage);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -95,7 +95,7 @@ public class BoolBuilder extends TypedBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeTo(Object[] items) {
|
public void retypeToMixed(Object[] items) {
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
if (isNa.get(i)) {
|
if (isNa.get(i)) {
|
||||||
items[i] = null;
|
items[i] = null;
|
||||||
|
@ -5,7 +5,9 @@ import org.enso.table.data.column.operation.cast.ToFloatStorageConverter;
|
|||||||
import org.enso.table.data.column.storage.BoolStorage;
|
import org.enso.table.data.column.storage.BoolStorage;
|
||||||
import org.enso.table.data.column.storage.Storage;
|
import org.enso.table.data.column.storage.Storage;
|
||||||
import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
|
import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
|
||||||
|
import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
|
||||||
import org.enso.table.data.column.storage.numeric.DoubleStorage;
|
import org.enso.table.data.column.storage.numeric.DoubleStorage;
|
||||||
|
import org.enso.table.data.column.storage.type.BigIntegerType;
|
||||||
import org.enso.table.data.column.storage.type.BooleanType;
|
import org.enso.table.data.column.storage.type.BooleanType;
|
||||||
import org.enso.table.data.column.storage.type.FloatType;
|
import org.enso.table.data.column.storage.type.FloatType;
|
||||||
import org.enso.table.data.column.storage.type.IntegerType;
|
import org.enso.table.data.column.storage.type.IntegerType;
|
||||||
@ -28,14 +30,10 @@ public class DoubleBuilder extends NumericBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeTo(Object[] items) {
|
public void retypeToMixed(Object[] items) {
|
||||||
for (int i = 0; i < currentSize; i++) {
|
throw new IllegalStateException("The DoubleBuilder cannot be retyped to the Mixed type, because it would lose " +
|
||||||
if (isMissing.get(i)) {
|
"type information about integers that were converted to doubles. If recasting is needed, " +
|
||||||
items[i] = null;
|
"InferringDoubleBuilder should be used instead. This error leaking is a bug in the Table library.");
|
||||||
} else {
|
|
||||||
items[i] = Double.longBitsToDouble(data[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -53,38 +51,11 @@ public class DoubleBuilder extends NumericBuilder {
|
|||||||
return FloatType.FLOAT_64;
|
return FloatType.FLOAT_64;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Converts the provided LongBuilder to a DoubleBuilder.
|
|
||||||
*
|
|
||||||
* <p>The original LongBuilder becomes invalidated after this operation and should no longer be
|
|
||||||
* used.
|
|
||||||
*/
|
|
||||||
static DoubleBuilder retypeFromLongBuilder(LongBuilder longBuilder) {
|
|
||||||
int currentSize = longBuilder.currentSize;
|
|
||||||
DoubleBuilder newBuilder = new DoubleBuilder(longBuilder.isMissing, longBuilder.data, currentSize);
|
|
||||||
|
|
||||||
// Invalidate the old builder.
|
|
||||||
longBuilder.data = null;
|
|
||||||
longBuilder.isMissing = null;
|
|
||||||
longBuilder.currentSize = -1;
|
|
||||||
|
|
||||||
// Translate the data in-place to avoid unnecessary allocations.
|
|
||||||
for (int i = 0; i < currentSize; i++) {
|
|
||||||
if (!newBuilder.isMissing.get(i)) {
|
|
||||||
long currentIntegerValue = newBuilder.data[i];
|
|
||||||
double convertedFloatValue = newBuilder.convertIntegerToDouble(currentIntegerValue);
|
|
||||||
newBuilder.data[i] = Double.doubleToRawLongBits(convertedFloatValue);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return newBuilder;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void appendNoGrow(Object o) {
|
public void appendNoGrow(Object o) {
|
||||||
if (o == null) {
|
if (o == null) {
|
||||||
isMissing.set(currentSize++);
|
isMissing.set(currentSize++);
|
||||||
} else if (NumericConverter.isFloatLike(o)){
|
} else if (NumericConverter.isFloatLike(o)) {
|
||||||
double value = NumericConverter.coerceToDouble(o);
|
double value = NumericConverter.coerceToDouble(o);
|
||||||
data[currentSize++] = Double.doubleToRawLongBits(value);
|
data[currentSize++] = Double.doubleToRawLongBits(value);
|
||||||
} else if (NumericConverter.isCoercibleToLong(o)) {
|
} else if (NumericConverter.isCoercibleToLong(o)) {
|
||||||
@ -134,7 +105,25 @@ public class DoubleBuilder extends NumericBuilder {
|
|||||||
+ storage
|
+ storage
|
||||||
+ ". This is a bug in the Table library.");
|
+ ". This is a bug in the Table library.");
|
||||||
}
|
}
|
||||||
} else if (Objects.equals(storage.getType(), BooleanType.INSTANCE)) {
|
} else if (storage.getType() instanceof BigIntegerType) {
|
||||||
|
if (storage instanceof BigIntegerStorage bigIntegerStorage) {
|
||||||
|
int n = bigIntegerStorage.size();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
BigInteger item = bigIntegerStorage.getItem(i);
|
||||||
|
if (item == null) {
|
||||||
|
isMissing.set(currentSize++);
|
||||||
|
} else {
|
||||||
|
double converted = convertBigIntegerToDouble(item);
|
||||||
|
data[currentSize++] = Double.doubleToRawLongBits(converted);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Unexpected storage implementation for type BIG INTEGER: "
|
||||||
|
+ storage
|
||||||
|
+ ". This is a bug in the Table library.");
|
||||||
|
}
|
||||||
|
} else if (storage.getType() instanceof BooleanType) {
|
||||||
if (storage instanceof BoolStorage boolStorage) {
|
if (storage instanceof BoolStorage boolStorage) {
|
||||||
int n = boolStorage.size();
|
int n = boolStorage.size();
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
@ -201,34 +190,34 @@ public class DoubleBuilder extends NumericBuilder {
|
|||||||
* <p>
|
* <p>
|
||||||
* It verifies if the integer can be exactly represented in a double, and if not, it reports a warning.
|
* It verifies if the integer can be exactly represented in a double, and if not, it reports a warning.
|
||||||
*/
|
*/
|
||||||
private double convertIntegerToDouble(long integer) {
|
protected double convertIntegerToDouble(long integer) {
|
||||||
double floatingPointValue = (double) integer;
|
double floatingPointValue = (double) integer;
|
||||||
boolean isLosingPrecision = (long) floatingPointValue != integer;
|
boolean isLosingPrecision = (long) floatingPointValue != integer;
|
||||||
if (isLosingPrecision) {
|
if (isLosingPrecision) {
|
||||||
if (precisionLoss == null) {
|
reportPrecisionLoss(integer, floatingPointValue);
|
||||||
precisionLoss = new LossOfIntegerPrecision(integer, floatingPointValue);
|
|
||||||
} else {
|
|
||||||
precisionLoss.incrementAffectedRows();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return floatingPointValue;
|
return floatingPointValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
private double convertBigIntegerToDouble(BigInteger bigInteger) {
|
protected double convertBigIntegerToDouble(BigInteger bigInteger) {
|
||||||
double floatingPointValue = bigInteger.doubleValue();
|
double floatingPointValue = bigInteger.doubleValue();
|
||||||
BigInteger reconstructed = BigDecimal.valueOf(floatingPointValue).toBigInteger();
|
BigInteger reconstructed = BigDecimal.valueOf(floatingPointValue).toBigInteger();
|
||||||
boolean isLosingPrecision = !bigInteger.equals(reconstructed);
|
boolean isLosingPrecision = !bigInteger.equals(reconstructed);
|
||||||
if (isLosingPrecision) {
|
if (isLosingPrecision) {
|
||||||
if (precisionLoss == null) {
|
reportPrecisionLoss(bigInteger, floatingPointValue);
|
||||||
precisionLoss = new LossOfIntegerPrecision(bigInteger, floatingPointValue);
|
|
||||||
} else {
|
|
||||||
precisionLoss.incrementAffectedRows();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return floatingPointValue;
|
return floatingPointValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
private LossOfIntegerPrecision precisionLoss = null;
|
protected final void reportPrecisionLoss(Number number, double approximation) {
|
||||||
|
if (precisionLoss == null) {
|
||||||
|
precisionLoss = new LossOfIntegerPrecision(number, approximation);
|
||||||
|
} else {
|
||||||
|
precisionLoss.incrementAffectedRows();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected LossOfIntegerPrecision precisionLoss = null;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AggregatedProblems getProblems() {
|
public AggregatedProblems getProblems() {
|
||||||
|
@ -102,7 +102,7 @@ public class InferredBuilder extends Builder {
|
|||||||
// In inferred builder, we always default to 64-bits.
|
// In inferred builder, we always default to 64-bits.
|
||||||
currentBuilder = NumericBuilder.createLongBuilder(initialCapacity, IntegerType.INT_64);
|
currentBuilder = NumericBuilder.createLongBuilder(initialCapacity, IntegerType.INT_64);
|
||||||
} else if (NumericConverter.isFloatLike(o)) {
|
} else if (NumericConverter.isFloatLike(o)) {
|
||||||
currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity);
|
currentBuilder = NumericBuilder.createInferringDoubleBuilder(initialCapacity);
|
||||||
} else if (o instanceof String) {
|
} else if (o instanceof String) {
|
||||||
currentBuilder = new StringBuilder(initialCapacity, TextType.VARIABLE_LENGTH);
|
currentBuilder = new StringBuilder(initialCapacity, TextType.VARIABLE_LENGTH);
|
||||||
} else if (o instanceof BigInteger) {
|
} else if (o instanceof BigInteger) {
|
||||||
@ -156,7 +156,7 @@ public class InferredBuilder extends Builder {
|
|||||||
|
|
||||||
private void retypeToMixed() {
|
private void retypeToMixed() {
|
||||||
ObjectBuilder objectBuilder = new MixedBuilder(initialSize);
|
ObjectBuilder objectBuilder = new MixedBuilder(initialSize);
|
||||||
currentBuilder.writeTo(objectBuilder.getData());
|
currentBuilder.retypeToMixed(objectBuilder.getData());
|
||||||
objectBuilder.setCurrentSize(currentBuilder.getCurrentSize());
|
objectBuilder.setCurrentSize(currentBuilder.getCurrentSize());
|
||||||
objectBuilder.setPreExistingProblems(currentBuilder.getProblems());
|
objectBuilder.setPreExistingProblems(currentBuilder.getProblems());
|
||||||
currentBuilder = objectBuilder;
|
currentBuilder = objectBuilder;
|
||||||
|
@ -0,0 +1,192 @@
|
|||||||
|
package org.enso.table.data.column.builder;
|
||||||
|
|
||||||
|
import org.enso.base.polyglot.NumericConverter;
|
||||||
|
import org.enso.table.data.column.storage.Storage;
|
||||||
|
import org.enso.table.error.ValueTypeMismatchException;
|
||||||
|
|
||||||
|
import java.math.BigInteger;
|
||||||
|
import java.util.BitSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A double builder variant that preserves types and can be retyped to Mixed.
|
||||||
|
*/
|
||||||
|
public class InferringDoubleBuilder extends DoubleBuilder {
|
||||||
|
/**
|
||||||
|
* Converts the provided LongBuilder to a DoubleBuilder.
|
||||||
|
*
|
||||||
|
* <p>The original LongBuilder becomes invalidated after this operation and should no longer be
|
||||||
|
* used.
|
||||||
|
*/
|
||||||
|
static InferringDoubleBuilder retypeFromLongBuilder(LongBuilder longBuilder) {
|
||||||
|
int currentSize = longBuilder.currentSize;
|
||||||
|
InferringDoubleBuilder newBuilder =
|
||||||
|
new InferringDoubleBuilder(longBuilder.isMissing, longBuilder.data, currentSize);
|
||||||
|
|
||||||
|
// Invalidate the old builder.
|
||||||
|
longBuilder.data = null;
|
||||||
|
longBuilder.isMissing = null;
|
||||||
|
longBuilder.currentSize = -1;
|
||||||
|
|
||||||
|
// Assume all longs will be compacted.
|
||||||
|
newBuilder.isLongCompactedAsDouble.set(0, currentSize, true);
|
||||||
|
|
||||||
|
// Translate the data in-place to avoid unnecessary allocations.
|
||||||
|
for (int i = 0; i < currentSize; i++) {
|
||||||
|
if (!newBuilder.isMissing.get(i)) {
|
||||||
|
long currentIntegerValue = newBuilder.data[i];
|
||||||
|
double convertedFloatValue = (double) currentIntegerValue;
|
||||||
|
boolean isLossy = currentIntegerValue != (long) convertedFloatValue;
|
||||||
|
if (isLossy) {
|
||||||
|
// Save it raw for recovery.
|
||||||
|
newBuilder.setRaw(i, currentIntegerValue);
|
||||||
|
newBuilder.reportPrecisionLoss(currentIntegerValue, convertedFloatValue);
|
||||||
|
// Unmark the long that did not fit:
|
||||||
|
newBuilder.isLongCompactedAsDouble.set(i, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
newBuilder.data[i] = Double.doubleToRawLongBits(convertedFloatValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return newBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
|
InferringDoubleBuilder(BitSet isMissing, long[] doubleData, int currentSize) {
|
||||||
|
super(isMissing, doubleData, currentSize);
|
||||||
|
rawData = null;
|
||||||
|
isLongCompactedAsDouble = new BitSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stores the raw data as passed to append, in order to be able to reconstruct the original values when retyping to
|
||||||
|
* mixed.
|
||||||
|
*/
|
||||||
|
private Number[] rawData;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specifies at which indices we encountered integers that can be reconstructed from double without loss of
|
||||||
|
* precision.
|
||||||
|
* <p>
|
||||||
|
* This is used for reconstructing the original values when retyping to mixed. Integers that are small enough can be
|
||||||
|
* reconstructed from their double values, so we do not need to store them as `rawData`, instead we only mark that
|
||||||
|
* they need to be converted back into integers when retyping. This allows us to completely avoid allocating the
|
||||||
|
* `rawData` array for most practical scenarios when the integers are not too large. The cost of allocating this
|
||||||
|
* BitSet should be significantly lower than allocating the `rawData` array.
|
||||||
|
*/
|
||||||
|
private final BitSet isLongCompactedAsDouble;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void retypeToMixed(Object[] items) {
|
||||||
|
int rawN = rawData == null ? 0 : rawData.length;
|
||||||
|
for (int i = 0; i < currentSize; i++) {
|
||||||
|
if (isMissing.get(i)) {
|
||||||
|
items[i] = null;
|
||||||
|
} else {
|
||||||
|
if (isLongCompactedAsDouble.get(i)) {
|
||||||
|
double value = Double.longBitsToDouble(data[i]);
|
||||||
|
long reconstructed = (long) value;
|
||||||
|
items[i] = reconstructed;
|
||||||
|
} else if (i < rawN && rawData[i] != null) {
|
||||||
|
items[i] = rawData[i];
|
||||||
|
} else {
|
||||||
|
double value = Double.longBitsToDouble(data[i]);
|
||||||
|
items[i] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since we are retyping to Mixed, the precision loss warnings should not be inherited - thus we reset them.
|
||||||
|
precisionLoss = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void appendBulkStorage(Storage<?> storage) {
|
||||||
|
throw new UnsupportedOperationException("appendBulkStorage is not supported on InferringDoubleBuilder. " +
|
||||||
|
"A DoubleBuilder or MixedBuilder should be used instead. This is a bug in the Table library.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void appendDouble(double x) {
|
||||||
|
if (currentSize >= this.data.length) {
|
||||||
|
grow();
|
||||||
|
}
|
||||||
|
|
||||||
|
data[currentSize] = Double.doubleToRawLongBits(x);
|
||||||
|
currentSize++;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void appendLong(long integer) {
|
||||||
|
if (currentSize >= this.data.length) {
|
||||||
|
grow();
|
||||||
|
}
|
||||||
|
|
||||||
|
appendLongNoGrow(integer);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void appendLongNoGrow(long integer) {
|
||||||
|
double convertedFloatValue = (double) integer;
|
||||||
|
boolean isLossy = integer != (long) convertedFloatValue;
|
||||||
|
if (isLossy) {
|
||||||
|
setRaw(currentSize, integer);
|
||||||
|
reportPrecisionLoss(integer, convertedFloatValue);
|
||||||
|
} else {
|
||||||
|
isLongCompactedAsDouble.set(currentSize, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
data[currentSize] = Double.doubleToRawLongBits(convertedFloatValue);
|
||||||
|
currentSize++;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void appendBigInteger(BigInteger integer) {
|
||||||
|
if (currentSize >= this.data.length) {
|
||||||
|
grow();
|
||||||
|
}
|
||||||
|
|
||||||
|
setRaw(currentSize, integer);
|
||||||
|
double convertedFloatValue = convertBigIntegerToDouble(integer);
|
||||||
|
data[currentSize] = Double.doubleToRawLongBits(convertedFloatValue);
|
||||||
|
currentSize++;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void appendNoGrow(Object o) {
|
||||||
|
if (o == null) {
|
||||||
|
isMissing.set(currentSize++);
|
||||||
|
} else if (NumericConverter.isFloatLike(o)) {
|
||||||
|
double value = NumericConverter.coerceToDouble(o);
|
||||||
|
data[currentSize++] = Double.doubleToRawLongBits(value);
|
||||||
|
} else if (NumericConverter.isCoercibleToLong(o)) {
|
||||||
|
long value = NumericConverter.coerceToLong(o);
|
||||||
|
appendLongNoGrow(value);
|
||||||
|
} else if (o instanceof BigInteger bigInteger) {
|
||||||
|
setRaw(currentSize, bigInteger);
|
||||||
|
double converted = convertBigIntegerToDouble(bigInteger);
|
||||||
|
data[currentSize++] = Double.doubleToRawLongBits(converted);
|
||||||
|
} else {
|
||||||
|
throw new ValueTypeMismatchException(getType(), o);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void appendRawNoGrow(long rawData) {
|
||||||
|
throw new UnsupportedOperationException("appendRawNoGrow is not supported on InferringDoubleBuilder. " +
|
||||||
|
"A DoubleBuilder should be used instead. This is a bug in the Table library.");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setRaw(int ix, Number o) {
|
||||||
|
if (rawData == null) {
|
||||||
|
rawData = new Number[ix + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rawData.length <= ix) {
|
||||||
|
int newLength = Math.max(rawData.length * 3 / 2 + 1, ix + 1);
|
||||||
|
Number[] newRawData = new Number[newLength];
|
||||||
|
System.arraycopy(rawData, 0, newRawData, 0, rawData.length);
|
||||||
|
rawData = newRawData;
|
||||||
|
}
|
||||||
|
|
||||||
|
rawData[ix] = o;
|
||||||
|
}
|
||||||
|
}
|
@ -35,7 +35,7 @@ public abstract class LongBuilder extends NumericBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeTo(Object[] items) {
|
public void retypeToMixed(Object[] items) {
|
||||||
for (int i = 0; i < currentSize; i++) {
|
for (int i = 0; i < currentSize; i++) {
|
||||||
if (isMissing.get(i)) {
|
if (isMissing.get(i)) {
|
||||||
items[i] = null;
|
items[i] = null;
|
||||||
@ -55,7 +55,7 @@ public abstract class LongBuilder extends NumericBuilder {
|
|||||||
if (Objects.equals(type, BigIntegerType.INSTANCE)) {
|
if (Objects.equals(type, BigIntegerType.INSTANCE)) {
|
||||||
return BigIntegerBuilder.retypeFromLongBuilder(this);
|
return BigIntegerBuilder.retypeFromLongBuilder(this);
|
||||||
} else if (Objects.equals(type, FloatType.FLOAT_64)) {
|
} else if (Objects.equals(type, FloatType.FLOAT_64)) {
|
||||||
return DoubleBuilder.retypeFromLongBuilder(this);
|
return InferringDoubleBuilder.retypeFromLongBuilder(this);
|
||||||
} else {
|
} else {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
@ -16,10 +16,19 @@ public abstract class NumericBuilder extends TypedBuilder {
|
|||||||
this.currentSize = currentSize;
|
this.currentSize = currentSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link DoubleBuilder} that should be used to create columns of boolean type and are
|
||||||
|
* not expected to be retyped.
|
||||||
|
*/
|
||||||
public static DoubleBuilder createDoubleBuilder(int size) {
|
public static DoubleBuilder createDoubleBuilder(int size) {
|
||||||
return new DoubleBuilder(new BitSet(), new long[size], 0);
|
return new DoubleBuilder(new BitSet(), new long[size], 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Creates a {@link DoubleBuilder} that may be retyped to Mixed type. */
|
||||||
|
public static DoubleBuilder createInferringDoubleBuilder(int size) {
|
||||||
|
return new InferringDoubleBuilder(new BitSet(), new long[size], 0);
|
||||||
|
}
|
||||||
|
|
||||||
public static LongBuilder createLongBuilder(int size, IntegerType type) {
|
public static LongBuilder createLongBuilder(int size, IntegerType type) {
|
||||||
return LongBuilder.make(size, type);
|
return LongBuilder.make(size, type);
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,7 @@ public class ObjectBuilder extends TypedBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeTo(Object[] items) {
|
public void retypeToMixed(Object[] items) {
|
||||||
throw new IllegalStateException("Broken invariant: rewriting the most general type.");
|
throw new IllegalStateException("Broken invariant: rewriting the most general type.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ public abstract class TypedBuilder extends Builder {
|
|||||||
*
|
*
|
||||||
* @param items the buffer to dump elements into
|
* @param items the buffer to dump elements into
|
||||||
*/
|
*/
|
||||||
public abstract void writeTo(Object[] items);
|
public abstract void retypeToMixed(Object[] items);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if the builder can be efficiently retyped to the given storage type.
|
* Checks if the builder can be efficiently retyped to the given storage type.
|
||||||
|
@ -18,7 +18,7 @@ public abstract class TypedBuilderImpl<T> extends TypedBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeTo(Object[] items) {
|
public void retypeToMixed(Object[] items) {
|
||||||
if (currentSize >= 0) System.arraycopy(data, 0, items, 0, currentSize);
|
if (currentSize >= 0) System.arraycopy(data, 0, items, 0, currentSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,15 +8,41 @@ from Standard.Test import Bench
|
|||||||
options = Bench.options . set_warmup (Bench.phase_conf 2 3) . set_measure (Bench.phase_conf 2 3)
|
options = Bench.options . set_warmup (Bench.phase_conf 2 3) . set_measure (Bench.phase_conf 2 3)
|
||||||
|
|
||||||
type Data
|
type Data
|
||||||
Value ~ints
|
Value ~ints ~floats ~floats_and_small_ints ~floats_and_large_ints
|
||||||
|
|
||||||
create num_rows =
|
create num_rows =
|
||||||
# 0-argument block to make it lazy
|
# 0-argument block to make it lazy
|
||||||
ints =
|
ints =
|
||||||
Vector.new num_rows i->
|
Vector.new num_rows i->
|
||||||
i % 1000
|
i % 1000
|
||||||
Data.Value ints
|
floats =
|
||||||
|
Vector.new num_rows i->
|
||||||
|
(i % 1000) + 0.5
|
||||||
|
|
||||||
|
floats_and_small_ints =
|
||||||
|
Vector.new num_rows i->
|
||||||
|
case i % 2 of
|
||||||
|
0 -> (i % 1000) + 0.5
|
||||||
|
1 -> i % 1000
|
||||||
|
|
||||||
|
floats_and_large_ints =
|
||||||
|
Vector.new num_rows i->
|
||||||
|
case i % 3 of
|
||||||
|
0 -> (i % 1000) + 0.5
|
||||||
|
1 -> 2^60 + (i % 100)
|
||||||
|
2 -> 2^100 + (i % 100)
|
||||||
|
|
||||||
|
Data.Value ints floats floats_and_small_ints floats_and_large_ints
|
||||||
|
|
||||||
|
## A flag allowing to run some additional benchmarks.
|
||||||
|
They were used when analyzing performance of an optimization of Mixed
|
||||||
|
handler. They are disabled on the CI to avoid increasing the time of the
|
||||||
|
build, but they can be manually re-enabled in case this needs testing in the
|
||||||
|
future.
|
||||||
|
|
||||||
|
When set to `False`, only the core measurements checking the most common
|
||||||
|
cases are run.
|
||||||
|
extended_tests = False
|
||||||
|
|
||||||
collect_benches = Bench.build builder->
|
collect_benches = Bench.build builder->
|
||||||
num_rows = 1000000
|
num_rows = 1000000
|
||||||
@ -32,4 +58,18 @@ collect_benches = Bench.build builder->
|
|||||||
group_builder.specify "Integers_type_Auto" <|
|
group_builder.specify "Integers_type_Auto" <|
|
||||||
Column.from_vector "Auto" data.ints value_type=Auto
|
Column.from_vector "Auto" data.ints value_type=Auto
|
||||||
|
|
||||||
|
group_builder.specify "Floats_type_Auto" <|
|
||||||
|
Column.from_vector "Auto" data.floats value_type=Auto
|
||||||
|
group_builder.specify "Floats_type_Float" <|
|
||||||
|
Column.from_vector "Floats" data.floats value_type=Value_Type.Float
|
||||||
|
|
||||||
|
if extended_tests then group_builder.specify "Numeric_Mix_Small_type_Auto" <|
|
||||||
|
Column.from_vector "Small_Mix" data.floats_and_small_ints value_type=Auto
|
||||||
|
if extended_tests then group_builder.specify "Numeric_Mix_Small_type_Float" <|
|
||||||
|
Column.from_vector "Small_Mix" data.floats_and_small_ints value_type=Value_Type.Float
|
||||||
|
if extended_tests then group_builder.specify "Numeric_Mix_Large_type_Auto" <|
|
||||||
|
Column.from_vector "Large_Mix" data.floats_and_large_ints value_type=Auto
|
||||||
|
if extended_tests then group_builder.specify "Numeric_Mix_Large_type_Float" <|
|
||||||
|
Column.from_vector "Large_Mix" data.floats_and_large_ints value_type=Value_Type.Float
|
||||||
|
|
||||||
main = collect_benches . run_main
|
main = collect_benches . run_main
|
||||||
|
@ -426,15 +426,16 @@ spec setup =
|
|||||||
c3 = t2.cast "super-mix" Value_Type.Char . at "super-mix"
|
c3 = t2.cast "super-mix" Value_Type.Char . at "super-mix"
|
||||||
c3.value_type . should_equal Value_Type.Char
|
c3.value_type . should_equal Value_Type.Char
|
||||||
date_time_str = Date_Time.new 2023 12 15 19 25 . to_text
|
date_time_str = Date_Time.new 2023 12 15 19 25 . to_text
|
||||||
c3.to_vector . should_equal ["1.0", "2.25", "3", "-45.25", "2.0", "X", "2020-01-01", date_time_str, "12:30:00", "{{{MY Type [x=42] }}}", "True", Nothing, "True"]
|
c3.to_vector . should_equal ["1", "2.25", "3", "-45.25", "2.0", "X", "2020-01-01", date_time_str, "12:30:00", "{{{MY Type [x=42] }}}", "True", Nothing, "True"]
|
||||||
Problems.assume_no_problems c3
|
Problems.assume_no_problems c3
|
||||||
|
|
||||||
if setup.test_selection.fixed_length_text_columns then
|
if setup.test_selection.fixed_length_text_columns then
|
||||||
c3_2 = t2.cast "super-mix" (Value_Type.Char size=2) . at "super-mix"
|
c3_2 = t2.cast "super-mix" (Value_Type.Char size=2) . at "super-mix"
|
||||||
c3_2.value_type . should_equal (Value_Type.Char size=2)
|
c3_2.value_type . should_equal (Value_Type.Char size=2)
|
||||||
c3_2.to_vector . should_equal ["1.", "2.", "3", "-4", "2.", "X", "20", "20", "12", "{{", "Tr", Nothing, "Tr"]
|
c3_2.to_vector . should_equal ["1", "2.", "3", "-4", "2.", "X", "20", "20", "12", "{{", "Tr", Nothing, "Tr"]
|
||||||
w3_2 = Problems.expect_warning Conversion_Failure c3_2
|
w3_2 = Problems.expect_warning Conversion_Failure c3_2
|
||||||
w3_2.affected_rows_count . should_equal 10
|
w3_2.affected_rows_count . should_equal 9
|
||||||
|
w3_2.to_display_text . should_contain "have a text representation that does not fit the target type"
|
||||||
|
|
||||||
c4 = t2.cast "super-mix" Value_Type.Boolean . at "super-mix"
|
c4 = t2.cast "super-mix" Value_Type.Boolean . at "super-mix"
|
||||||
c4.value_type . should_equal Value_Type.Boolean
|
c4.value_type . should_equal Value_Type.Boolean
|
||||||
@ -630,13 +631,13 @@ spec setup =
|
|||||||
t3.at "Y" . value_type . should_equal Value_Type.Char
|
t3.at "Y" . value_type . should_equal Value_Type.Char
|
||||||
|
|
||||||
Test.specify "will choose Decimal type if all values are integers but cannot fit long" <|
|
Test.specify "will choose Decimal type if all values are integers but cannot fit long" <|
|
||||||
c0 = table_builder [["X", [My_Type.Value 42, 1, 2, 2^100]]] . at "X"
|
c0 = table_builder [["X", [My_Type.Value 42, 1, 2, (2^100)+1]]] . at "X"
|
||||||
c1 = c0.drop 1
|
c1 = c0.drop 1
|
||||||
|
|
||||||
c1.value_type . should_equal Value_Type.Mixed
|
c1.value_type . should_equal Value_Type.Mixed
|
||||||
c2 = c1.auto_value_type
|
c2 = c1.auto_value_type
|
||||||
c2.value_type . should_be_a (Value_Type.Decimal ...)
|
c2.value_type . should_be_a (Value_Type.Decimal ...)
|
||||||
c2.to_vector . should_equal [1, 2, 2^100]
|
c2.to_vector . should_equal [1, 2, (2^100)+1]
|
||||||
|
|
||||||
Test.specify "will try to find the smallest integer type to fit the value (if shrink_types=True)" <|
|
Test.specify "will try to find the smallest integer type to fit the value (if shrink_types=True)" <|
|
||||||
[False, True].each is_mixed->
|
[False, True].each is_mixed->
|
||||||
|
@ -208,16 +208,31 @@ spec setup =
|
|||||||
t3.at "A" . value_type . variable_length . should_be_true
|
t3.at "A" . value_type . variable_length . should_be_true
|
||||||
|
|
||||||
Test.specify "should find a common type that will fit the merged columns" <|
|
Test.specify "should find a common type that will fit the merged columns" <|
|
||||||
t1 = table_builder [["int+float", [0, 1, 2]]]
|
t1 = table_builder [["A", [0, 1, 2]]]
|
||||||
t2 = table_builder [["int+float", [1.0, 2.0, 2.5]]]
|
t2 = table_builder [["A", [1.0, 2.0, 2.5]]]
|
||||||
|
|
||||||
t1.at "int+float" . value_type . is_integer . should_be_true
|
t1.at "A" . value_type . is_integer . should_be_true
|
||||||
t2.at "int+float" . value_type . is_floating_point . should_be_true
|
t2.at "A" . value_type . is_floating_point . should_be_true
|
||||||
|
|
||||||
t3 = t1.union t2
|
t3 = t1.union t2
|
||||||
expect_column_names ["int+float"] t3
|
expect_column_names ["A"] t3
|
||||||
t3.at "int+float" . value_type . is_floating_point . should_be_true
|
t3.at "A" . value_type . is_floating_point . should_be_true
|
||||||
t3.at "int+float" . to_vector . should_equal [0, 1, 2, 1.0, 2.0, 2.5]
|
t3.at "A" . to_vector . should_equal [0, 1, 2, 1.0, 2.0, 2.5]
|
||||||
|
|
||||||
|
# Specific type tests that apply to in-memory. Database behaviour is up to implementation.
|
||||||
|
if setup.is_database.not then
|
||||||
|
t4 = table_builder [["A", [2^100, 2^10, 2]]]
|
||||||
|
t4.at "A" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||||
|
|
||||||
|
t5 = t2.union t4
|
||||||
|
expect_column_names ["A"] t5
|
||||||
|
t5.at "A" . value_type . is_floating_point . should_be_true
|
||||||
|
t5.at "A" . to_vector . should_equal [1.0, 2.0, 2.5, 2^100, 2^10, 2]
|
||||||
|
|
||||||
|
t6 = t1.union t4
|
||||||
|
expect_column_names ["A"] t6
|
||||||
|
t6.at "A" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||||
|
t6.at "A" . to_vector . should_equal [0, 1, 2, 2^100, 2^10, 2]
|
||||||
|
|
||||||
# Database backends are not required to support Mixed types.
|
# Database backends are not required to support Mixed types.
|
||||||
if setup.is_database.not then
|
if setup.is_database.not then
|
||||||
@ -329,6 +344,24 @@ spec setup =
|
|||||||
t6.at "X" . value_type . should_equal Value_Type.Mixed
|
t6.at "X" . value_type . should_equal Value_Type.Mixed
|
||||||
t6.at "X" . to_vector . should_equal ["a", 1, Nothing, 1, 1.2, 2.3, 3.4, "a", "b", True, False]
|
t6.at "X" . to_vector . should_equal ["a", 1, Nothing, 1, 1.2, 2.3, 3.4, "a", "b", True, False]
|
||||||
|
|
||||||
|
Test.specify "when finding a common type for numeric columns to be Float, any precision loss should be reported" <|
|
||||||
|
t1 = table_builder [["X", [1, (2^62)-1, 3]]]
|
||||||
|
t2 = table_builder [["X", [1.5, 2.5, 3.5]]]
|
||||||
|
t3 = table_builder [["X", [(2^100)+1, 2^10, 2]]]
|
||||||
|
|
||||||
|
t1.at "X" . value_type . should_equal Value_Type.Integer
|
||||||
|
t2.at "X" . value_type . should_equal Value_Type.Float
|
||||||
|
t3.at "X" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||||
|
|
||||||
|
t4 = t2.union [t1, t3] allow_type_widening=True
|
||||||
|
t4.at "X" . value_type . should_equal Value_Type.Float
|
||||||
|
# Inexact float equality will make this pass:
|
||||||
|
t4.at "X" . to_vector . should_equal [1.5, 2.5, 3.5, 1, (2^62)-1, 3, (2^100)+1, 2^10, 2]
|
||||||
|
|
||||||
|
w = Problems.expect_only_warning Loss_Of_Integer_Precision t4
|
||||||
|
# Losing precision on (2^62)-1 and 2^100+1.
|
||||||
|
w.affected_rows_count . should_equal 2
|
||||||
|
|
||||||
Test.specify "if type mismatches cause all columns to be dropped, fail with No_Output_Columns" <|
|
Test.specify "if type mismatches cause all columns to be dropped, fail with No_Output_Columns" <|
|
||||||
t1 = table_builder [["A", [1, 2, 3]]]
|
t1 = table_builder [["A", [1, 2, 3]]]
|
||||||
t2 = table_builder [["A", ['x']]]
|
t2 = table_builder [["A", ['x']]]
|
||||||
|
@ -65,7 +65,7 @@ spec =
|
|||||||
test_column.reverse.to_vector . should_equal expected_1.to_vector
|
test_column.reverse.to_vector . should_equal expected_1.to_vector
|
||||||
empty_column.reverse.to_vector . should_equal empty_column.to_vector
|
empty_column.reverse.to_vector . should_equal empty_column.to_vector
|
||||||
|
|
||||||
Test.specify "should allow to count duplicate value occurences" <|
|
Test.specify "should allow to count duplicate value occurrences" <|
|
||||||
c_1 = Column.from_vector "c 1" [0, 1, 2, 2, 1, 0, 2]
|
c_1 = Column.from_vector "c 1" [0, 1, 2, 2, 1, 0, 2]
|
||||||
c_1.duplicate_count.to_vector.should_equal [0, 0, 0, 1, 1, 1, 2]
|
c_1.duplicate_count.to_vector.should_equal [0, 0, 0, 1, 1, 1, 2]
|
||||||
|
|
||||||
@ -106,19 +106,48 @@ spec =
|
|||||||
c1.at 1 . should_be_a Float
|
c1.at 1 . should_be_a Float
|
||||||
c1.at 0 . is_a Integer . should_be_false
|
c1.at 0 . is_a Integer . should_be_false
|
||||||
|
|
||||||
Test.specify "will preserve the types if the column is Mixed" <|
|
Test.specify "will preserve the types if the column is Mixed, regardless of ordering" <|
|
||||||
c1 = Column.from_vector "X" ["a", 1, 2.0]
|
run_test vector =
|
||||||
c1.value_type . should_equal Value_Type.Mixed
|
Test.with_clue vector.pretty+": " <|
|
||||||
c1.at 0 . should_be_a Text
|
c = Column.from_vector "X" vector
|
||||||
c1.at 1 . should_be_a Integer
|
c.value_type . should_equal Value_Type.Mixed
|
||||||
c1.at 2 . should_be_a Float
|
c.to_vector . should_equal vector
|
||||||
|
Problems.assume_no_problems c
|
||||||
|
|
||||||
Test.specify "will preserve the types if the column is Mixed (2)" pending="ToDo: disabled due to issue #7352" <|
|
# Verify that types of the values are preserved.
|
||||||
c2 = Column.from_vector "X" [1, 2.0, "a"]
|
c.to_vector.zip vector got-> expected->
|
||||||
c2.value_type . should_equal Value_Type.Mixed
|
Test.with_clue "(type of "+got.pretty+") " <|
|
||||||
c2.at 0 . should_be_a Integer
|
(Meta.type_of got) . should_equal (Meta.type_of expected)
|
||||||
c2.at 1 . should_be_a Float
|
got.to_text . should_equal expected.to_text
|
||||||
c2.at 2 . should_be_a Text
|
|
||||||
|
## Testing various permutations to ensure that the behaviour is
|
||||||
|
invariant to the order of the values.
|
||||||
|
(It used to differ depending on the order.)
|
||||||
|
big = (2^100)+1
|
||||||
|
medium = (2^62)-1
|
||||||
|
run_test ["a", 1, 2.0]
|
||||||
|
run_test [1, 2.0, "a"]
|
||||||
|
run_test [2.0, 1, "a"]
|
||||||
|
run_test [2.5, 1, "a"]
|
||||||
|
run_test [10, 1.5, "a"]
|
||||||
|
run_test ["a", 1.5, 10]
|
||||||
|
run_test [big, 1.5, "a"]
|
||||||
|
run_test [1.5, big, "a"]
|
||||||
|
run_test [2, big, 1.5, "a"]
|
||||||
|
run_test [2, big, 1.5, "a"]
|
||||||
|
run_test [1.5, big, 2, "a"]
|
||||||
|
run_test [Nothing, 1.5, medium, 2, "a"]
|
||||||
|
run_test [medium, 1.5, "a"]
|
||||||
|
run_test [Nothing, medium, 1.5, "a"]
|
||||||
|
|
||||||
|
big_test_vector x y =
|
||||||
|
a = Vector.fill 40 [x, 1.5, 2, y, x+123, y+123, y+5, 2.5]
|
||||||
|
b = Vector.fill 100 [Nothing]
|
||||||
|
c = Vector.fill 500 [2, 1.5, x]
|
||||||
|
d = Vector.fill 5 [2, "a", 2, 2.5]
|
||||||
|
(a+b+c+d).flatten
|
||||||
|
run_test (big_test_vector medium big)
|
||||||
|
run_test (big_test_vector 123 456)
|
||||||
|
|
||||||
Test.specify "should allow to set a specific type at construction" <|
|
Test.specify "should allow to set a specific type at construction" <|
|
||||||
c1 = Column.from_vector "X" [1, 2] Value_Type.Float
|
c1 = Column.from_vector "X" [1, 2] Value_Type.Float
|
||||||
@ -203,6 +232,10 @@ spec =
|
|||||||
r8.should_fail_with Invalid_Value_Type
|
r8.should_fail_with Invalid_Value_Type
|
||||||
r8.catch.to_display_text.should_contain "Expected type Byte, but got a value 1000000000 of type Integer (32 bits)"
|
r8.catch.to_display_text.should_contain "Expected type Byte, but got a value 1000000000 of type Integer (32 bits)"
|
||||||
|
|
||||||
|
r9 = Column.from_vector "X" [100, 1.5] Value_Type.Integer
|
||||||
|
r9.should_fail_with Invalid_Value_Type
|
||||||
|
r9.catch.to_display_text.should_contain "Expected type Integer (64 bits), but got a value 1.5 of type Float"
|
||||||
|
|
||||||
Test.specify "will not allow to construct a column with Char size=0" <|
|
Test.specify "will not allow to construct a column with Char size=0" <|
|
||||||
r1 = Column.from_vector "X" [] (Value_Type.Char size=0 variable_length=False)
|
r1 = Column.from_vector "X" [] (Value_Type.Char size=0 variable_length=False)
|
||||||
r1.should_fail_with Illegal_Argument
|
r1.should_fail_with Illegal_Argument
|
||||||
|
@ -70,17 +70,13 @@ spec =
|
|||||||
w4 = Problems.expect_only_warning Loss_Of_Integer_Precision c4
|
w4 = Problems.expect_only_warning Loss_Of_Integer_Precision c4
|
||||||
w4.affected_rows_count . should_equal 2
|
w4.affected_rows_count . should_equal 2
|
||||||
|
|
||||||
# Until #7352 is fixed, these tests are also useful:
|
# No precision loss should happen if the column is mixed:
|
||||||
# If we start with mixed before encountering large values, no issues occur:
|
|
||||||
c5 = Column.from_vector "X" ["a", 1.0, x+1]
|
c5 = Column.from_vector "X" ["a", 1.0, x+1]
|
||||||
Problems.assume_no_problems c5
|
Problems.assume_no_problems c5
|
||||||
c5.to_vector.map .to_text . should_equal (["a", 1.0, x+1].map .to_text)
|
c5.to_vector.map .to_text . should_equal (["a", 1.0, x+1].map .to_text)
|
||||||
# But if we first have floats and ints and only later enter mixed, we again get precision loss:
|
|
||||||
c6 = Column.from_vector "X" [x+1, 1.0, x+2, "a", x+3]
|
c6 = Column.from_vector "X" [x+1, 1.0, x+2, "a", x+3]
|
||||||
w6 = Problems.expect_only_warning Loss_Of_Integer_Precision c6
|
Problems.assume_no_problems c6
|
||||||
# The 2 integers that got into DoubleBuilder are affected, the one after upcasting to Mixed is preserved OK.
|
c6.to_vector.map .to_text . should_equal ([x+1, 1.0, x+2, "a", x+3].map .to_text)
|
||||||
w6.affected_rows_count . should_equal 2
|
|
||||||
c6.to_vector.map .to_text . should_equal ([x+0.0, 1.0, x+0.0, "a", x+3].map .to_text)
|
|
||||||
|
|
||||||
# It should also work for from_vector_repeated:
|
# It should also work for from_vector_repeated:
|
||||||
c7 = Column.from_vector_repeated "X" [1.5, x+2, 100] 10
|
c7 = Column.from_vector_repeated "X" [1.5, x+2, 100] 10
|
||||||
|
@ -687,9 +687,15 @@ spec =
|
|||||||
Test.specify "Should handle parent-less file paths" <|
|
Test.specify "Should handle parent-less file paths" <|
|
||||||
transient = enso_project.data / "transient"
|
transient = enso_project.data / "transient"
|
||||||
f1 = transient / "./test1.txt"
|
f1 = transient / "./test1.txt"
|
||||||
f2 = File.new ("parentlesstest" + Date_Time.now.to_unix_epoch_milliseconds.to_text + ".txt")
|
f2_filename = "parentlesstest" + Date_Time.now.to_unix_epoch_milliseconds.to_text + ".txt"
|
||||||
|
f2 = File.new f2_filename
|
||||||
|
|
||||||
Panic.with_finalizer f2.delete <|
|
do_cleanup =
|
||||||
|
f2.delete_if_exists
|
||||||
|
f2_bak = File.new (f2_filename + ".bak")
|
||||||
|
f2_bak.delete_if_exists
|
||||||
|
|
||||||
|
Panic.with_finalizer do_cleanup <|
|
||||||
txt = "MY CONTENT"
|
txt = "MY CONTENT"
|
||||||
txt.write f1
|
txt.write f1
|
||||||
txt.write f2
|
txt.write f2
|
||||||
|
Loading…
Reference in New Issue
Block a user