mirror of
https://github.com/enso-org/enso.git
synced 2024-12-22 21:01:37 +03:00
Snowflake Dialect pt. 4 - reading a column of small integers as Integer type, other type mapping tests (#10518)
- Related to #9486 - Ensures that even though an integer column in Snowflake is represented by `Decimal` type, if the values are small enough, they are materialized as `Integer`. - If the values are larger, they are still read in as `Decimal`. - Adds tests for some other `Decimal` edge cases (various precisions and scales), and for `Float`.
This commit is contained in:
parent
4c0fbf0e19
commit
632355f85b
@ -88,14 +88,16 @@ long_fetcher bits =
|
|||||||
## PRIVATE
|
## PRIVATE
|
||||||
big_integer_fetcher : Column_Fetcher
|
big_integer_fetcher : Column_Fetcher
|
||||||
big_integer_fetcher =
|
big_integer_fetcher =
|
||||||
fetch_value rs i =
|
|
||||||
big_decimal = rs.getBigDecimal i
|
|
||||||
if rs.wasNull then Nothing else
|
|
||||||
big_decimal.toBigIntegerExact
|
|
||||||
make_builder initial_size java_problem_aggregator =
|
make_builder initial_size java_problem_aggregator =
|
||||||
java_builder = Java_Exports.make_biginteger_builder initial_size java_problem_aggregator
|
java_builder = Java_Exports.make_biginteger_builder initial_size java_problem_aggregator
|
||||||
make_builder_from_java_object_builder java_builder
|
make_builder_from_java_object_builder java_builder
|
||||||
Column_Fetcher.Value fetch_value make_builder
|
Column_Fetcher.Value fetch_big_integer make_builder
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
fetch_big_integer rs i =
|
||||||
|
big_decimal = rs.getBigDecimal i
|
||||||
|
if rs.wasNull then Nothing else
|
||||||
|
big_decimal.toBigIntegerExact
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
big_decimal_fetcher : Column_Fetcher
|
big_decimal_fetcher : Column_Fetcher
|
||||||
|
@ -17,6 +17,7 @@ import Standard.Database.SQL_Type.SQL_Type
|
|||||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
from Standard.Database.Errors import Unsupported_Database_Operation
|
||||||
|
|
||||||
polyglot java import java.sql.Types
|
polyglot java import java.sql.Types
|
||||||
|
polyglot java import org.enso.snowflake.SnowflakeIntegerColumnMaterializer
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
type Snowflake_Type_Mapping
|
type Snowflake_Type_Mapping
|
||||||
@ -33,8 +34,13 @@ type Snowflake_Type_Mapping
|
|||||||
Value_Type.Decimal precision scale -> case precision of
|
Value_Type.Decimal precision scale -> case precision of
|
||||||
# If precision is not set, scale is also lost because SQL is unable to express a scale without a precision.
|
# If precision is not set, scale is also lost because SQL is unable to express a scale without a precision.
|
||||||
Nothing -> SQL_Type.Value Types.DECIMAL "NUMBER" Nothing Nothing
|
Nothing -> SQL_Type.Value Types.DECIMAL "NUMBER" Nothing Nothing
|
||||||
# Scale can be set or not, if precision is given, so no check needed.
|
# Scale can be set or not, but if it is set, it must be in range 0-37.
|
||||||
_ -> SQL_Type.Value Types.DECIMAL "NUMBER" precision scale
|
# If scale or precision is out of range, we fall back to Nothing.
|
||||||
|
_ -> if (precision < 1) || (precision > 38) then SQL_Type.Value Types.DECIMAL "NUMBER" Nothing Nothing else
|
||||||
|
if scale.is_nothing then SQL_Type.Value Types.DECIMAL "NUMBER" precision Nothing else
|
||||||
|
if (scale < 0) || (scale > 37) then SQL_Type.Value Types.DECIMAL "NUMBER" Nothing Nothing else
|
||||||
|
SQL_Type.Value Types.DECIMAL "NUMBER" precision scale
|
||||||
|
|
||||||
Value_Type.Char size _ ->
|
Value_Type.Char size _ ->
|
||||||
# Snowflake does not support fixed length strings, so we use VARCHAR.
|
# Snowflake does not support fixed length strings, so we use VARCHAR.
|
||||||
is_unbounded = case size of
|
is_unbounded = case size of
|
||||||
@ -118,6 +124,9 @@ type Snowflake_Type_Mapping
|
|||||||
case value_type of
|
case value_type of
|
||||||
Value_Type.Time -> time_fetcher
|
Value_Type.Time -> time_fetcher
|
||||||
Value_Type.Date_Time _ -> date_time_fetcher
|
Value_Type.Date_Time _ -> date_time_fetcher
|
||||||
|
# If we encounter a Decimal column with scale 0 we will try to fetch it as Integer if the values fit.
|
||||||
|
Value_Type.Decimal _ 0 -> smart_integer_fetcher
|
||||||
|
# Other Decimal columns get the default behaviour - fetched as BigInteger or BigDecimal.
|
||||||
_ -> Column_Fetcher_Module.default_fetcher_for_value_type value_type
|
_ -> Column_Fetcher_Module.default_fetcher_for_value_type value_type
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
@ -212,6 +221,20 @@ date_time_fetcher =
|
|||||||
Column_Fetcher_Module.make_builder_from_java_object_builder java_builder
|
Column_Fetcher_Module.make_builder_from_java_object_builder java_builder
|
||||||
Column_Fetcher.Value fetch_value make_builder
|
Column_Fetcher.Value fetch_value make_builder
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
A fetcher for Snowflake Decimal integer columns.
|
||||||
|
Integer columns in Snowflake are represented as `NUMBER(38, 0)`, meaning
|
||||||
|
there is no separate Integer type.
|
||||||
|
|
||||||
|
In Enso, using `Decimal` values incurs a significant overhead. Thus, when
|
||||||
|
fetching such an integer column from Snowflake, we try to first fetch it as
|
||||||
|
lightweight `Integer` and only fall back to `Decimal` if needed.
|
||||||
|
smart_integer_fetcher =
|
||||||
|
make_builder initial_size _ =
|
||||||
|
java_builder = SnowflakeIntegerColumnMaterializer.new initial_size
|
||||||
|
Column_Fetcher_Module.make_builder_from_java_object_builder java_builder
|
||||||
|
Column_Fetcher.Value Column_Fetcher_Module.fetch_big_integer make_builder
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
The actual SQL type that Snowflake uses for all integer types.
|
The actual SQL type that Snowflake uses for all integer types.
|
||||||
integer_type = SQL_Type.Value Types.DECIMAL "NUMERIC" 38 0
|
integer_type = SQL_Type.Value Types.DECIMAL "NUMERIC" 38 0
|
||||||
|
@ -0,0 +1,158 @@
|
|||||||
|
package org.enso.snowflake;
|
||||||
|
|
||||||
|
import java.math.BigInteger;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.BitSet;
|
||||||
|
import org.enso.table.data.column.builder.Builder;
|
||||||
|
import org.enso.table.data.column.storage.Storage;
|
||||||
|
import org.enso.table.data.column.storage.numeric.BigIntegerStorage;
|
||||||
|
import org.enso.table.data.column.storage.numeric.LongStorage;
|
||||||
|
import org.enso.table.data.column.storage.type.BigIntegerType;
|
||||||
|
import org.enso.table.data.column.storage.type.IntegerType;
|
||||||
|
import org.enso.table.data.column.storage.type.StorageType;
|
||||||
|
import org.enso.table.error.ValueTypeMismatchException;
|
||||||
|
import org.graalvm.polyglot.Context;
|
||||||
|
|
||||||
|
public class SnowflakeIntegerColumnMaterializer extends Builder {
|
||||||
|
private static final BigInteger LONG_MIN = BigInteger.valueOf(Long.MIN_VALUE);
|
||||||
|
private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE);
|
||||||
|
// We start in integer mode and will switch to BigInteger mode if we encounter a value that
|
||||||
|
// exceeds the range
|
||||||
|
private long[] ints;
|
||||||
|
private BitSet intsMissing;
|
||||||
|
private BigInteger[] bigInts;
|
||||||
|
private int currentSize;
|
||||||
|
private Mode mode;
|
||||||
|
|
||||||
|
public SnowflakeIntegerColumnMaterializer(int initialCapacity) {
|
||||||
|
ints = new long[initialCapacity];
|
||||||
|
intsMissing = new BitSet();
|
||||||
|
bigInts = null;
|
||||||
|
currentSize = 0;
|
||||||
|
mode = Mode.LONG;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void retypeToBigIntegers() {
|
||||||
|
assert mode == Mode.LONG;
|
||||||
|
Context context = Context.getCurrent();
|
||||||
|
bigInts = new BigInteger[ints.length];
|
||||||
|
for (int i = 0; i < currentSize; i++) {
|
||||||
|
if (intsMissing.get(i)) {
|
||||||
|
bigInts[i] = null;
|
||||||
|
} else {
|
||||||
|
bigInts[i] = BigInteger.valueOf(ints[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.safepoint();
|
||||||
|
}
|
||||||
|
|
||||||
|
ints = null;
|
||||||
|
intsMissing = null;
|
||||||
|
mode = Mode.BIG_INTEGER;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean fitsInLong(BigInteger bigInteger) {
|
||||||
|
return bigInteger.compareTo(LONG_MIN) >= 0 && bigInteger.compareTo(LONG_MAX) <= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void appendNoGrow(Object o) {
|
||||||
|
switch (o) {
|
||||||
|
case BigInteger bigInteger -> {
|
||||||
|
switch (mode) {
|
||||||
|
case BIG_INTEGER -> bigInts[currentSize++] = bigInteger;
|
||||||
|
|
||||||
|
case LONG -> {
|
||||||
|
if (fitsInLong(bigInteger)) {
|
||||||
|
ints[currentSize++] = bigInteger.longValue();
|
||||||
|
} else {
|
||||||
|
retypeToBigIntegers();
|
||||||
|
bigInts[currentSize++] = bigInteger;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
case null -> appendNulls(1);
|
||||||
|
default -> throw new ValueTypeMismatchException(BigIntegerType.INSTANCE, o);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void append(Object o) {
|
||||||
|
if (currentSize >= capacity()) {
|
||||||
|
grow();
|
||||||
|
}
|
||||||
|
|
||||||
|
appendNoGrow(o);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void appendNulls(int count) {
|
||||||
|
if (mode == Mode.LONG) {
|
||||||
|
intsMissing.set(currentSize, currentSize + count);
|
||||||
|
}
|
||||||
|
|
||||||
|
currentSize += count;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void appendBulkStorage(Storage<?> storage) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"SnowflakeIntegerColumnMaterializer.appendBulkStorage: Not supported.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getCurrentSize() {
|
||||||
|
return currentSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Storage<?> seal() {
|
||||||
|
resize(currentSize);
|
||||||
|
return switch (mode) {
|
||||||
|
case LONG -> new LongStorage(ints, currentSize, intsMissing, IntegerType.INT_64);
|
||||||
|
case BIG_INTEGER -> new BigIntegerStorage(bigInts, currentSize);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageType getType() {
|
||||||
|
// The type of the builder can change over time, so we do not report any stable type here.
|
||||||
|
// Same as in InferredBuilder.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int capacity() {
|
||||||
|
return mode == Mode.LONG ? ints.length : bigInts.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void grow() {
|
||||||
|
int desiredCapacity = 3;
|
||||||
|
if (capacity() > 1) {
|
||||||
|
desiredCapacity = (capacity() * 3 / 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// It is possible for the `currentSize` to grow arbitrarily larger than
|
||||||
|
// the capacity, because when nulls are being added the array is not
|
||||||
|
// resized, only the counter is incremented. Thus, we need to ensure
|
||||||
|
// that we have allocated enough space for at least one element.
|
||||||
|
if (currentSize >= desiredCapacity) {
|
||||||
|
desiredCapacity = currentSize + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
resize(desiredCapacity);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void resize(int desiredCapacity) {
|
||||||
|
switch (mode) {
|
||||||
|
case LONG -> ints = Arrays.copyOf(ints, desiredCapacity);
|
||||||
|
case BIG_INTEGER -> bigInts = Arrays.copyOf(bigInts, desiredCapacity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private enum Mode {
|
||||||
|
LONG,
|
||||||
|
BIG_INTEGER
|
||||||
|
}
|
||||||
|
}
|
@ -16,7 +16,6 @@ import org.enso.table.error.ValueTypeMismatchException;
|
|||||||
import org.enso.table.problems.ProblemAggregator;
|
import org.enso.table.problems.ProblemAggregator;
|
||||||
import org.graalvm.polyglot.Context;
|
import org.graalvm.polyglot.Context;
|
||||||
|
|
||||||
// For now the BigInteger builder is just a stub, reusing the ObjectBuilder and adding a warning.
|
|
||||||
public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
||||||
// The problem aggregator is only used so that when we are retyping, we can pass it on.
|
// The problem aggregator is only used so that when we are retyping, we can pass it on.
|
||||||
private final ProblemAggregator problemAggregator;
|
private final ProblemAggregator problemAggregator;
|
||||||
|
@ -204,13 +204,39 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
|
|||||||
t1.at "big_ints" . value_type . should_equal (Value_Type.Decimal 38 0)
|
t1.at "big_ints" . value_type . should_equal (Value_Type.Decimal 38 0)
|
||||||
|
|
||||||
in_memory = t1.read
|
in_memory = t1.read
|
||||||
|
# But when read back to in-memory, they are inferred as Integer type to avoid the BigInteger overhead
|
||||||
in_memory.at "small_ints" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
in_memory.at "small_ints" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
||||||
in_memory.at "big_ints" . value_type . should_equal (Value_Type.Decimal 38 0)
|
# Unless the values are actually big, then the Decimal type is kept, but its precision is lost, as in-memory BigInteger does not store it.
|
||||||
|
in_memory.at "big_ints" . value_type . should_equal (Value_Type.Decimal Nothing 0)
|
||||||
|
|
||||||
# Check correctness of values
|
# Check correctness of values
|
||||||
in_memory.at "small_ints" . to_vector . should_equal [1, 2, 3]
|
in_memory.at "small_ints" . to_vector . should_equal [1, 2, 3]
|
||||||
in_memory.at "big_ints" . to_vector . should_equal [2^100, 2^110, 1]
|
in_memory.at "big_ints" . to_vector . should_equal [2^100, 2^110, 1]
|
||||||
|
|
||||||
|
group_builder.specify "correctly handles Decimal and Float types" <|
|
||||||
|
table_name = Name_Generator.random_name "DecimalFloat"
|
||||||
|
t1 = default_connection.get.create_table table_name [Column_Description.Value "d1" (Value_Type.Decimal 38 6), Column_Description.Value "d2" (Value_Type.Decimal 10 2), Column_Description.Value "d3" (Value_Type.Decimal 24 -3), Column_Description.Value "f" (Value_Type.Float)] primary_key=[]
|
||||||
|
t1.at "d1" . value_type . should_equal (Value_Type.Decimal 38 6)
|
||||||
|
t1.at "d2" . value_type . should_equal (Value_Type.Decimal 10 2)
|
||||||
|
# Negative scale is not supported so we fallback to defaults:
|
||||||
|
t1.at "d3" . value_type . should_equal (Value_Type.Decimal 38 0)
|
||||||
|
t1.at "f" . value_type . should_equal Value_Type.Float
|
||||||
|
|
||||||
|
t1.update_rows (Table.new [["d1", [1.2345678910]], ["d2", [12.3456]], ["d3", [1234567.8910]], ["f", [1.5]]]) update_action=Update_Action.Insert . should_succeed
|
||||||
|
|
||||||
|
m1 = t1.read
|
||||||
|
# Currently in-memory does not support precision and scale in Decimals so they are all change to Nothing
|
||||||
|
m1.at "d1" . value_type . should_equal (Value_Type.Decimal Nothing Nothing)
|
||||||
|
m1.at "d2" . value_type . should_equal (Value_Type.Decimal Nothing Nothing)
|
||||||
|
# The `d3` column got coerced to `Value_Type.Decimal 38 0` so given that the value is relatively small, it is now fetched as integer.
|
||||||
|
m1.at "d3" . value_type . should_equal Value_Type.Integer
|
||||||
|
m1.at "f" . value_type . should_equal Value_Type.Float
|
||||||
|
|
||||||
|
m1.at "d1" . to_vector . should_equal [Decimal.new "1.234568"]
|
||||||
|
m1.at "d2" . to_vector . should_equal [Decimal.new "12.35"]
|
||||||
|
m1.at "d3" . to_vector . should_equal [1234568]
|
||||||
|
m1.at "f" . to_vector . should_equal [1.5]
|
||||||
|
|
||||||
suite_builder.group "[Snowflake] Dialect-specific codegen" group_builder->
|
suite_builder.group "[Snowflake] Dialect-specific codegen" group_builder->
|
||||||
data = Snowflake_Info_Data.setup default_connection
|
data = Snowflake_Info_Data.setup default_connection
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user