mirror of
https://github.com/enso-org/enso.git
synced 2024-12-22 10:11:37 +03:00
Snowflake Dialect pt. 4 - reading a column of small integers as Integer type, other type mapping tests (#10518)
- Related to #9486 - Ensures that even though an integer column in Snowflake is represented by `Decimal` type, if the values are small enough, they are materialized as `Integer`. - If the values are larger, they are still read in as `Decimal`. - Adds tests for some other `Decimal` edge cases (various precisions and scales), and for `Float`.
This commit is contained in:
parent
4c0fbf0e19
commit
632355f85b
@ -88,14 +88,16 @@ long_fetcher bits =
|
||||
## PRIVATE
|
||||
big_integer_fetcher : Column_Fetcher
|
||||
big_integer_fetcher =
|
||||
fetch_value rs i =
|
||||
big_decimal = rs.getBigDecimal i
|
||||
if rs.wasNull then Nothing else
|
||||
big_decimal.toBigIntegerExact
|
||||
make_builder initial_size java_problem_aggregator =
|
||||
java_builder = Java_Exports.make_biginteger_builder initial_size java_problem_aggregator
|
||||
make_builder_from_java_object_builder java_builder
|
||||
Column_Fetcher.Value fetch_value make_builder
|
||||
Column_Fetcher.Value fetch_big_integer make_builder
|
||||
|
||||
## PRIVATE
|
||||
fetch_big_integer rs i =
|
||||
big_decimal = rs.getBigDecimal i
|
||||
if rs.wasNull then Nothing else
|
||||
big_decimal.toBigIntegerExact
|
||||
|
||||
## PRIVATE
|
||||
big_decimal_fetcher : Column_Fetcher
|
||||
|
@ -17,6 +17,7 @@ import Standard.Database.SQL_Type.SQL_Type
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
||||
|
||||
polyglot java import java.sql.Types
|
||||
polyglot java import org.enso.snowflake.SnowflakeIntegerColumnMaterializer
|
||||
|
||||
## PRIVATE
|
||||
type Snowflake_Type_Mapping
|
||||
@ -33,8 +34,13 @@ type Snowflake_Type_Mapping
|
||||
Value_Type.Decimal precision scale -> case precision of
|
||||
# If precision is not set, scale is also lost because SQL is unable to express a scale without a precision.
|
||||
Nothing -> SQL_Type.Value Types.DECIMAL "NUMBER" Nothing Nothing
|
||||
# Scale can be set or not, if precision is given, so no check needed.
|
||||
_ -> SQL_Type.Value Types.DECIMAL "NUMBER" precision scale
|
||||
# Scale can be set or not, but if it is set, it must be in range 0-37.
|
||||
# If scale or precision is out of range, we fall back to Nothing.
|
||||
_ -> if (precision < 1) || (precision > 38) then SQL_Type.Value Types.DECIMAL "NUMBER" Nothing Nothing else
|
||||
if scale.is_nothing then SQL_Type.Value Types.DECIMAL "NUMBER" precision Nothing else
|
||||
if (scale < 0) || (scale > 37) then SQL_Type.Value Types.DECIMAL "NUMBER" Nothing Nothing else
|
||||
SQL_Type.Value Types.DECIMAL "NUMBER" precision scale
|
||||
|
||||
Value_Type.Char size _ ->
|
||||
# Snowflake does not support fixed length strings, so we use VARCHAR.
|
||||
is_unbounded = case size of
|
||||
@ -118,6 +124,9 @@ type Snowflake_Type_Mapping
|
||||
case value_type of
|
||||
Value_Type.Time -> time_fetcher
|
||||
Value_Type.Date_Time _ -> date_time_fetcher
|
||||
# If we encounter a Decimal column with scale 0 we will try to fetch it as Integer if the values fit.
|
||||
Value_Type.Decimal _ 0 -> smart_integer_fetcher
|
||||
# Other Decimal columns get the default behaviour - fetched as BigInteger or BigDecimal.
|
||||
_ -> Column_Fetcher_Module.default_fetcher_for_value_type value_type
|
||||
|
||||
## PRIVATE
|
||||
@ -212,6 +221,20 @@ date_time_fetcher =
|
||||
Column_Fetcher_Module.make_builder_from_java_object_builder java_builder
|
||||
Column_Fetcher.Value fetch_value make_builder
|
||||
|
||||
## PRIVATE
|
||||
A fetcher for Snowflake Decimal integer columns.
|
||||
Integer columns in Snowflake are represented as `NUMBER(38, 0)`, meaning
|
||||
there is no separate Integer type.
|
||||
|
||||
In Enso, using `Decimal` values incurs a significant overhead. Thus, when
|
||||
fetching such an integer column from Snowflake, we try to first fetch it as
|
||||
lightweight `Integer` and only fall back to `Decimal` if needed.
|
||||
smart_integer_fetcher =
|
||||
make_builder initial_size _ =
|
||||
java_builder = SnowflakeIntegerColumnMaterializer.new initial_size
|
||||
Column_Fetcher_Module.make_builder_from_java_object_builder java_builder
|
||||
Column_Fetcher.Value Column_Fetcher_Module.fetch_big_integer make_builder
|
||||
|
||||
## PRIVATE
|
||||
The actual SQL type that Snowflake uses for all integer types.
|
||||
integer_type = SQL_Type.Value Types.DECIMAL "NUMERIC" 38 0
|
||||
|
@ -0,0 +1,158 @@
|
||||
package org.enso.snowflake;
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import org.enso.table.data.column.builder.Builder;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
|
||||
import org.enso.table.data.column.storage.numeric.LongStorage;
|
||||
import org.enso.table.data.column.storage.type.BigIntegerType;
|
||||
import org.enso.table.data.column.storage.type.IntegerType;
|
||||
import org.enso.table.data.column.storage.type.StorageType;
|
||||
import org.enso.table.error.ValueTypeMismatchException;
|
||||
import org.graalvm.polyglot.Context;
|
||||
|
||||
public class SnowflakeIntegerColumnMaterializer extends Builder {
|
||||
private static final BigInteger LONG_MIN = BigInteger.valueOf(Long.MIN_VALUE);
|
||||
private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE);
|
||||
// We start in integer mode and will switch to BigInteger mode if we encounter a value that
|
||||
// exceeds the range
|
||||
private long[] ints;
|
||||
private BitSet intsMissing;
|
||||
private BigInteger[] bigInts;
|
||||
private int currentSize;
|
||||
private Mode mode;
|
||||
|
||||
public SnowflakeIntegerColumnMaterializer(int initialCapacity) {
|
||||
ints = new long[initialCapacity];
|
||||
intsMissing = new BitSet();
|
||||
bigInts = null;
|
||||
currentSize = 0;
|
||||
mode = Mode.LONG;
|
||||
}
|
||||
|
||||
private void retypeToBigIntegers() {
|
||||
assert mode == Mode.LONG;
|
||||
Context context = Context.getCurrent();
|
||||
bigInts = new BigInteger[ints.length];
|
||||
for (int i = 0; i < currentSize; i++) {
|
||||
if (intsMissing.get(i)) {
|
||||
bigInts[i] = null;
|
||||
} else {
|
||||
bigInts[i] = BigInteger.valueOf(ints[i]);
|
||||
}
|
||||
|
||||
context.safepoint();
|
||||
}
|
||||
|
||||
ints = null;
|
||||
intsMissing = null;
|
||||
mode = Mode.BIG_INTEGER;
|
||||
}
|
||||
|
||||
private boolean fitsInLong(BigInteger bigInteger) {
|
||||
return bigInteger.compareTo(LONG_MIN) >= 0 && bigInteger.compareTo(LONG_MAX) <= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendNoGrow(Object o) {
|
||||
switch (o) {
|
||||
case BigInteger bigInteger -> {
|
||||
switch (mode) {
|
||||
case BIG_INTEGER -> bigInts[currentSize++] = bigInteger;
|
||||
|
||||
case LONG -> {
|
||||
if (fitsInLong(bigInteger)) {
|
||||
ints[currentSize++] = bigInteger.longValue();
|
||||
} else {
|
||||
retypeToBigIntegers();
|
||||
bigInts[currentSize++] = bigInteger;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case null -> appendNulls(1);
|
||||
default -> throw new ValueTypeMismatchException(BigIntegerType.INSTANCE, o);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(Object o) {
|
||||
if (currentSize >= capacity()) {
|
||||
grow();
|
||||
}
|
||||
|
||||
appendNoGrow(o);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendNulls(int count) {
|
||||
if (mode == Mode.LONG) {
|
||||
intsMissing.set(currentSize, currentSize + count);
|
||||
}
|
||||
|
||||
currentSize += count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendBulkStorage(Storage<?> storage) {
|
||||
throw new IllegalStateException(
|
||||
"SnowflakeIntegerColumnMaterializer.appendBulkStorage: Not supported.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCurrentSize() {
|
||||
return currentSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage<?> seal() {
|
||||
resize(currentSize);
|
||||
return switch (mode) {
|
||||
case LONG -> new LongStorage(ints, currentSize, intsMissing, IntegerType.INT_64);
|
||||
case BIG_INTEGER -> new BigIntegerStorage(bigInts, currentSize);
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public StorageType getType() {
|
||||
// The type of the builder can change over time, so we do not report any stable type here.
|
||||
// Same as in InferredBuilder.
|
||||
return null;
|
||||
}
|
||||
|
||||
private int capacity() {
|
||||
return mode == Mode.LONG ? ints.length : bigInts.length;
|
||||
}
|
||||
|
||||
private void grow() {
|
||||
int desiredCapacity = 3;
|
||||
if (capacity() > 1) {
|
||||
desiredCapacity = (capacity() * 3 / 2);
|
||||
}
|
||||
|
||||
// It is possible for the `currentSize` to grow arbitrarily larger than
|
||||
// the capacity, because when nulls are being added the array is not
|
||||
// resized, only the counter is incremented. Thus, we need to ensure
|
||||
// that we have allocated enough space for at least one element.
|
||||
if (currentSize >= desiredCapacity) {
|
||||
desiredCapacity = currentSize + 1;
|
||||
}
|
||||
|
||||
resize(desiredCapacity);
|
||||
}
|
||||
|
||||
private void resize(int desiredCapacity) {
|
||||
switch (mode) {
|
||||
case LONG -> ints = Arrays.copyOf(ints, desiredCapacity);
|
||||
case BIG_INTEGER -> bigInts = Arrays.copyOf(bigInts, desiredCapacity);
|
||||
}
|
||||
}
|
||||
|
||||
private enum Mode {
|
||||
LONG,
|
||||
BIG_INTEGER
|
||||
}
|
||||
}
|
@ -16,7 +16,6 @@ import org.enso.table.error.ValueTypeMismatchException;
|
||||
import org.enso.table.problems.ProblemAggregator;
|
||||
import org.graalvm.polyglot.Context;
|
||||
|
||||
// For now the BigInteger builder is just a stub, reusing the ObjectBuilder and adding a warning.
|
||||
public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
||||
// The problem aggregator is only used so that when we are retyping, we can pass it on.
|
||||
private final ProblemAggregator problemAggregator;
|
||||
|
@ -204,13 +204,39 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
|
||||
t1.at "big_ints" . value_type . should_equal (Value_Type.Decimal 38 0)
|
||||
|
||||
in_memory = t1.read
|
||||
# But when read back to in-memory, they are inferred as Integer type to avoid the BigInteger overhead
|
||||
in_memory.at "small_ints" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
||||
in_memory.at "big_ints" . value_type . should_equal (Value_Type.Decimal 38 0)
|
||||
# Unless the values are actually big, then the Decimal type is kept, but its precision is lost, as in-memory BigInteger does not store it.
|
||||
in_memory.at "big_ints" . value_type . should_equal (Value_Type.Decimal Nothing 0)
|
||||
|
||||
# Check correctness of values
|
||||
in_memory.at "small_ints" . to_vector . should_equal [1, 2, 3]
|
||||
in_memory.at "big_ints" . to_vector . should_equal [2^100, 2^110, 1]
|
||||
|
||||
group_builder.specify "correctly handles Decimal and Float types" <|
|
||||
table_name = Name_Generator.random_name "DecimalFloat"
|
||||
t1 = default_connection.get.create_table table_name [Column_Description.Value "d1" (Value_Type.Decimal 38 6), Column_Description.Value "d2" (Value_Type.Decimal 10 2), Column_Description.Value "d3" (Value_Type.Decimal 24 -3), Column_Description.Value "f" (Value_Type.Float)] primary_key=[]
|
||||
t1.at "d1" . value_type . should_equal (Value_Type.Decimal 38 6)
|
||||
t1.at "d2" . value_type . should_equal (Value_Type.Decimal 10 2)
|
||||
# Negative scale is not supported so we fallback to defaults:
|
||||
t1.at "d3" . value_type . should_equal (Value_Type.Decimal 38 0)
|
||||
t1.at "f" . value_type . should_equal Value_Type.Float
|
||||
|
||||
t1.update_rows (Table.new [["d1", [1.2345678910]], ["d2", [12.3456]], ["d3", [1234567.8910]], ["f", [1.5]]]) update_action=Update_Action.Insert . should_succeed
|
||||
|
||||
m1 = t1.read
|
||||
# Currently in-memory does not support precision and scale in Decimals so they are all change to Nothing
|
||||
m1.at "d1" . value_type . should_equal (Value_Type.Decimal Nothing Nothing)
|
||||
m1.at "d2" . value_type . should_equal (Value_Type.Decimal Nothing Nothing)
|
||||
# The `d3` column got coerced to `Value_Type.Decimal 38 0` so given that the value is relatively small, it is now fetched as integer.
|
||||
m1.at "d3" . value_type . should_equal Value_Type.Integer
|
||||
m1.at "f" . value_type . should_equal Value_Type.Float
|
||||
|
||||
m1.at "d1" . to_vector . should_equal [Decimal.new "1.234568"]
|
||||
m1.at "d2" . to_vector . should_equal [Decimal.new "12.35"]
|
||||
m1.at "d3" . to_vector . should_equal [1234568]
|
||||
m1.at "f" . to_vector . should_equal [1.5]
|
||||
|
||||
suite_builder.group "[Snowflake] Dialect-specific codegen" group_builder->
|
||||
data = Snowflake_Info_Data.setup default_connection
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user