Refactoring the Unary operations so uncoupled from Storage. (#9090)

In order to allow clever masking, slicing, filtering and arrow backing stores...

- Adding ColumnStorage interface with the base API a storage will need.
- Refactored each of the unary operations to a new `UnaryOperation` interface which makes them responsible for deciding if they can be executed.
This commit is contained in:
James Dunkerley 2024-02-19 17:11:52 +00:00 committed by GitHub
parent 96082c3bae
commit ee66b9fb1d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
53 changed files with 1223 additions and 791 deletions

View File

@ -1455,7 +1455,7 @@ type DB_Column
Returns a column of `Integer` type.
year : DB_Column ! Invalid_Value_Type
year self = Value_Type.expect_has_date self <|
simple_unary_op self "year"
self.make_unary_op "year"
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1465,7 +1465,7 @@ type DB_Column
Returns a column of `Integer` type.
month : DB_Column ! Invalid_Value_Type
month self = Value_Type.expect_has_date self <|
simple_unary_op self "month"
self.make_unary_op "month"
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1476,7 +1476,7 @@ type DB_Column
Returns a column of `Integer` type.
day : DB_Column ! Invalid_Value_Type
day self = Value_Type.expect_has_date self <|
simple_unary_op self "day"
self.make_unary_op "day"
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1486,7 +1486,7 @@ type DB_Column
Returns a column of `Integer` type.
hour : DB_Column ! Invalid_Value_Type
hour self = Value_Type.expect_has_time self <|
simple_unary_op self "hour"
self.make_unary_op "hour"
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1496,7 +1496,7 @@ type DB_Column
Returns a column of `Integer` type.
minute : DB_Column ! Invalid_Value_Type
minute self = Value_Type.expect_has_time self <|
simple_unary_op self "minute"
self.make_unary_op "minute"
## GROUP Standard.Base.Selections
ICON select_row
@ -1506,7 +1506,7 @@ type DB_Column
Returns a column of `Integer` type.
second : DB_Column ! Invalid_Value_Type
second self = Value_Type.expect_has_time self <|
simple_unary_op self "second"
self.make_unary_op "second"
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1516,7 +1516,7 @@ type DB_Column
@period Date_Time_Helpers.make_period_selector_for_column
date_part : Date_Period | Time_Period -> DB_Column ! Invalid_Value_Type | Illegal_Argument
date_part self period =
Date_Time_Helpers.make_date_part_function self period simple_unary_op self.naming_helper
Date_Time_Helpers.make_date_part_function self period (column->op_name-> column.make_unary_op op_name) self.naming_helper
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1912,11 +1912,6 @@ adapt_unified_column column expected_type =
adapted = dialect.adapt_unified_column column.as_internal expected_type infer_return_type
DB_Column.Value name=column.name connection=column.connection sql_type_reference=adapted.sql_type_reference expression=adapted.expression context=column.context
## PRIVATE
A shorthand to be able to share the implementations between in-memory and
database.
simple_unary_op column op_kind = column.make_unary_op op_kind
## PRIVATE
Column.from (that:DB_Column) =
_ = [that]

View File

@ -33,7 +33,17 @@ from project.Internal.Column_Format import all
from project.Internal.Java_Exports import make_date_builder_adapter, make_string_builder
polyglot java import org.enso.base.Time_Utils
polyglot java import org.enso.table.data.column.operation.UnaryOperation
polyglot java import org.enso.table.data.column.operation.cast.CastProblemAggregator
polyglot java import org.enso.table.data.column.operation.unary.DatePartOperation
polyglot java import org.enso.table.data.column.operation.unary.IsEmptyOperation
polyglot java import org.enso.table.data.column.operation.unary.IsInfiniteOperation
polyglot java import org.enso.table.data.column.operation.unary.IsNaNOperation
polyglot java import org.enso.table.data.column.operation.unary.IsNothingOperation
polyglot java import org.enso.table.data.column.operation.unary.NotOperation
polyglot java import org.enso.table.data.column.operation.unary.TextLengthOperation
polyglot java import org.enso.table.data.column.operation.unary.TruncatedTimePartOperation
polyglot java import org.enso.table.data.column.operation.unary.UnaryRoundOperation
polyglot java import org.enso.table.data.column.storage.Storage as Java_Storage
polyglot java import org.enso.table.data.mask.OrderMask
polyglot java import org.enso.table.data.table.Column as Java_Column
@ -769,7 +779,7 @@ type Column
not self =
Value_Type.expect_boolean self <|
new_name = naming_helper.concat ["not", naming_helper.to_expression_text self]
run_vectorized_unary_op self Java_Storage.Maps.NOT new_name
apply_unary_operation self NotOperation.INSTANCE new_name
## ALIAS if
GROUP Standard.Base.Logical
@ -921,7 +931,7 @@ type Column
True ->
self.rename new_name
False ->
simple_unary_op self Java_Storage.Maps.TRUNCATE
apply_unary_operation self UnaryRoundOperation.TRUNCATE_INSTANCE
False -> case precise_value_type == Value_Type.Date_Time of
True ->
fun = _.date
@ -946,7 +956,7 @@ type Column
new_name = naming_helper.function_name "ceil" [self]
self.rename new_name
False ->
simple_unary_op self Java_Storage.Maps.CEIL
apply_unary_operation self UnaryRoundOperation.CEIL_INSTANCE
## GROUP Standard.Base.Rounding
ICON math
@ -966,7 +976,7 @@ type Column
new_name = naming_helper.function_name "floor" [self]
self.rename new_name
False ->
simple_unary_op self Java_Storage.Maps.FLOOR
apply_unary_operation self UnaryRoundOperation.FLOOR_INSTANCE
## GROUP Standard.Base.Logical
ICON operators
@ -1045,7 +1055,7 @@ type Column
is_nothing : Column
is_nothing self =
new_name = naming_helper.concat [naming_helper.to_expression_text self, "is Nothing"]
run_vectorized_unary_op self Java_Storage.Maps.IS_NOTHING new_name fallback_fn=(x-> x == Nothing) expected_result_type=Value_Type.Boolean skip_nulls=False
apply_unary_operation self IsNothingOperation.INSTANCE new_name
## GROUP Standard.Base.Math
ICON math
@ -1061,9 +1071,8 @@ type Column
new_name = naming_helper.function_name "is_nan" [self]
fallback x = case x of
_ : Float -> x.is_nan
Nothing -> Nothing
_ -> False
run_vectorized_unary_op self Java_Storage.Maps.IS_NAN new_name fallback_fn=fallback expected_result_type=Value_Type.Boolean
apply_unary_operation self IsNaNOperation.INSTANCE new_name if_unsupported=(apply_unary_map self new_name fallback Value_Type.Boolean)
## GROUP Standard.Base.Math
ICON math
@ -1075,9 +1084,8 @@ type Column
new_name = naming_helper.function_name "is_infinite" [self]
fallback x = case x of
_ : Float -> x.is_infinite
Nothing -> Nothing
_ -> False
run_vectorized_unary_op self Java_Storage.Maps.IS_INFINITE new_name fallback_fn=fallback expected_result_type=Value_Type.Boolean
apply_unary_operation self IsInfiniteOperation.INSTANCE new_name if_unsupported=(apply_unary_map self new_name fallback Value_Type.Boolean)
## PRIVATE
Returns a column of booleans, with `True` items at the positions where
@ -1094,7 +1102,7 @@ type Column
_ : Text -> x == ""
Nothing -> True
_ -> False
run_vectorized_unary_op self Java_Storage.Maps.IS_EMPTY new_name fallback_fn=fallback expected_result_type=Value_Type.Boolean skip_nulls=False
apply_unary_operation self IsEmptyOperation.INSTANCE new_name if_unsupported=(apply_unary_map self new_name fallback Value_Type.Boolean nothing_unchanged=False)
## GROUP Standard.Base.Logical
ICON preparation
@ -1287,7 +1295,7 @@ type Column
text_length : Column
text_length self =
Value_Type.expect_text self <|
simple_unary_op self Java_Storage.Maps.TEXT_LENGTH
apply_unary_operation self TextLengthOperation.INSTANCE
## GROUP Standard.Base.Text
ICON preparation
@ -1473,7 +1481,7 @@ type Column
Returns a column of `Integer` type.
year : Column ! Invalid_Value_Type
year self = Value_Type.expect_has_date self <|
simple_unary_op self Java_Storage.Maps.YEAR
apply_unary_operation self DatePartOperation.YEAR_INSTANCE
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1483,7 +1491,7 @@ type Column
Returns a column of `Integer` type.
month : Column ! Invalid_Value_Type
month self = Value_Type.expect_has_date self <|
simple_unary_op self Java_Storage.Maps.MONTH
apply_unary_operation self DatePartOperation.MONTH_INSTANCE
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1494,7 +1502,7 @@ type Column
Returns a column of `Integer` type.
day : Column ! Invalid_Value_Type
day self = Value_Type.expect_has_date self <|
simple_unary_op self Java_Storage.Maps.DAY
apply_unary_operation self DatePartOperation.DAY_INSTANCE
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1504,7 +1512,7 @@ type Column
Returns a column of `Integer` type.
hour : Column ! Invalid_Value_Type
hour self = Value_Type.expect_has_time self <|
simple_unary_op self Java_Storage.Maps.HOUR
apply_unary_operation self DatePartOperation.HOUR_INSTANCE
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1514,7 +1522,7 @@ type Column
Returns a column of `Integer` type.
minute : Column ! Invalid_Value_Type
minute self = Value_Type.expect_has_time self <|
simple_unary_op self Java_Storage.Maps.MINUTE
apply_unary_operation self DatePartOperation.MINUTE_INSTANCE
## GROUP Standard.Base.Selections
ICON select_row
@ -1524,7 +1532,7 @@ type Column
Returns a column of `Integer` type.
second : Column ! Invalid_Value_Type
second self = Value_Type.expect_has_time self <|
simple_unary_op self Java_Storage.Maps.SECOND
apply_unary_operation self DatePartOperation.SECOND_INSTANCE
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1534,7 +1542,21 @@ type Column
@period Date_Time_Helpers.make_period_selector_for_column
date_part : Date_Period | Time_Period -> Column ! Invalid_Value_Type | Illegal_Argument
date_part self period =
Date_Time_Helpers.make_date_part_function self period simple_unary_op naming_helper
date_part_operation col name =
operation = case name of
DatePartOperation.YEAR -> DatePartOperation.YEAR_INSTANCE
DatePartOperation.QUARTER -> DatePartOperation.QUARTER_INSTANCE
DatePartOperation.MONTH -> DatePartOperation.MONTH_INSTANCE
DatePartOperation.WEEK -> DatePartOperation.WEEK_INSTANCE
DatePartOperation.DAY -> DatePartOperation.DAY_INSTANCE
DatePartOperation.HOUR -> DatePartOperation.HOUR_INSTANCE
DatePartOperation.MINUTE -> DatePartOperation.MINUTE_INSTANCE
DatePartOperation.SECOND -> DatePartOperation.SECOND_INSTANCE
DatePartOperation.MILLISECOND -> DatePartOperation.MILLISECOND_INSTANCE
TruncatedTimePartOperation.MICROSECOND -> TruncatedTimePartOperation.MICROSECOND_INSTANCE
TruncatedTimePartOperation.NANOSECOND -> TruncatedTimePartOperation.NANOSECOND_INSTANCE
apply_unary_operation col operation
Date_Time_Helpers.make_date_part_function self period date_part_operation naming_helper
## GROUP Standard.Base.DateTime
ICON date_and_time
@ -1644,7 +1666,7 @@ type Column
run_vectorized_binary_op self op_name as_vector expected_result_type=Value_Type.Boolean skip_nulls=False new_name=result_name
False ->
set = Set.from_vector as_vector error_on_duplicates=False
run_unary_op self set.contains_relational new_name=result_name skip_nulls=False expected_result_type=Value_Type.Boolean
apply_unary_map self result_name set.contains_relational Value_Type.Boolean nothing_unchanged=False
## GROUP Standard.Base.Conversions
ICON convert
@ -2500,27 +2522,6 @@ run_binary_op column function operand new_name skip_nulls=True expected_result_t
s.binaryMap function operand skip_nulls storage_type problem_builder
Column.Value (Java_Column.new new_name new_storage)
## PRIVATE
Runs a unary operation over the provided column.
Arguments:
- column: The column to execute the operation over.
- function: The function to apply to each element of `column`.
- new_name: The name of the column created as the result of this operation.
- skip_nulls: Specifies if nulls should be skipped. If set to `True`, a null
value results in null without passing it to the function. If set to
`False`, the null values are passed as any other value and can have custom
handling logic.
- expected_result_type: The expected result type of the operation. If set to
`Nothing`, the result type is inferred from the values.
run_unary_op column function new_name skip_nulls=True expected_result_type=Nothing =
s = column.java_column.getStorage
storage_type = resolve_storage_type expected_result_type
Java_Problems.with_map_operation_problem_aggregator column.name Problem_Behavior.Report_Warning problem_builder->
new_storage = Polyglot_Helpers.handle_polyglot_dataflow_errors <|
s.unaryMap function skip_nulls storage_type problem_builder
Column.Value (Java_Column.new new_name new_storage)
## PRIVATE
Executes a vectorized binary operation over the provided column.
@ -2560,30 +2561,6 @@ run_vectorized_binary_op_with_fallback_problem_handling column name operand fall
s1.vectorizedOrFallbackBinaryMap name problem_builder applied_fn operand skip_nulls storage_type
Column.Value (Java_Column.new new_name rs)
## PRIVATE
Executes a vectorized unary operation over the provided column.
Arguments:
- column: The column to execute the operation over.
- name: The name of the vectorized operation.
- new_name: The name of the column created as the result of this operation.
- fallback_fn: The function to call if a vectorized operation is not
available. It should never raise dataflow errors.
- expected_result_type: The expected result type of the operation.
- skip_nulls: Specifies if nulls should be skipped. If set to `True`, a null
value results in null without passing it to the function. If set to
`False`, the null values are passed as any other value and can have custom
handling logic.
run_vectorized_unary_op : Column -> Text -> Text -> (Any -> Any) -> Value_Type -> Any -> Column
run_vectorized_unary_op column name new_name fallback_fn=Nothing expected_result_type=Nothing skip_nulls=True =
Java_Problems.with_map_operation_problem_aggregator column.name Problem_Behavior.Report_Warning problem_builder->
s = column.java_column.getStorage
storage_type = resolve_storage_type expected_result_type
rs = Polyglot_Helpers.handle_polyglot_dataflow_errors <|
s.vectorizedOrFallbackUnaryMap name problem_builder fallback_fn skip_nulls storage_type
Column.Value (Java_Column.new new_name rs)
## PRIVATE
Gets a textual representation of the item at position `ix` in `column`.
@ -2652,11 +2629,6 @@ run_vectorized_binary_case_text_op left op other case_sensitivity fallback new_n
See: https://www.pivotaltracker.com/n/projects/2539304/stories/184093260
run_binary_op left fallback other new_name=new_name skip_nulls=True expected_result_type=result_type
## PRIVATE
simple_unary_op column op_name =
new_name = naming_helper.function_name op_name [column]
run_vectorized_unary_op column op_name new_name
## PRIVATE
Converts the value type to a storage type, passing `Nothing` through.
@ -2719,3 +2691,34 @@ Column.from (that:Range) (name:Text="Range") = Column.from_vector name that.to_v
## PRIVATE
Conversion method to a Column from a Vector.
Column.from (that:Date_Range) (name:Text=that.default_column_name) = Column.from_vector name that.to_vector
## PRIVATE
Applies a UnaryOperation to a column.
Arguments:
- column: The column to apply the operation to.
- operation: The UnaryOperation to apply.
- new_name: The name of the column created as the result of this operation.
- if_unsupported: The result if the operation is not supported for the column.
apply_unary_operation column:Column operation:UnaryOperation new_name:Text|Nothing=Nothing ~if_unsupported=(Panic.throw (Illegal_State.Error "Unsupported unary operation for column called. This is a bug in the Table library.")) =
used_name = new_name.if_nothing (naming_helper.function_name operation.getName [column])
Java_Problems.with_map_operation_problem_aggregator column.name Problem_Behavior.Report_Warning java_problem_aggregator->
java_column = UnaryOperation.apply column.java_column operation used_name java_problem_aggregator
if java_column.is_nothing then if_unsupported else Column.Value java_column
## PRIVATE
Applies a function to every row in the column.
Arguments:
- column: The column to apply the operation to.
- new_name: The name of the column created as the result of this operation.
- function: A function applied to every element. It should not raise errors.
- expected_result_type: The expected result type of the operation.
- nothing_unchanged: If set to `True`, `Nothing` values will be passed through
the operation without being changed. If set to `False`, `Nothing` values will
be passed to the function.
apply_unary_map column:Column new_name:Text function expected_result_type:Value_Type|Nothing=Nothing nothing_unchanged:Boolean=True =
storage_type = resolve_storage_type expected_result_type
Java_Problems.with_map_operation_problem_aggregator column.name Problem_Behavior.Report_Warning java_problem_aggregator->
map_column = UnaryOperation.mapFunction column.java_column function nothing_unchanged storage_type new_name java_problem_aggregator
Column.Value map_column

View File

@ -0,0 +1,178 @@
package org.enso.table.data.column.operation;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.LongConsumer;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.ColumnBooleanStorage;
import org.enso.table.data.column.storage.ColumnDoubleStorage;
import org.enso.table.data.column.storage.ColumnLongStorage;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.ColumnStorageWithInferredStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.table.Column;
import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.Value;
/** A UnaryOperation is an operation that can be applied to a single ColumnStorage. */
public interface UnaryOperation {
/**
* Applies the operation to the given Column. If an unsupported by the operation returns null
*
* @param column the column to apply the operation to.
* @param operation the operation to apply.
* @param newColumnName the name of the new column.
* @param problemAggregator the problem aggregator to report problems to.
*/
static Column apply(
Column column,
UnaryOperation operation,
String newColumnName,
MapOperationProblemAggregator problemAggregator) {
ColumnStorage storage = column.getStorage();
// If the storage has an inferred storage (e.g. a Mixed column) and the first level can't do get
// an inferred storage.
if (!operation.canApply(storage)
&& storage instanceof ColumnStorageWithInferredStorage withInferredStorage) {
var inferredStorage = withInferredStorage.getInferredStorage();
if (inferredStorage != null && operation.canApply(inferredStorage)) {
storage = inferredStorage;
}
}
if (!operation.canApply(storage)) {
return null;
}
var result = operation.apply(column.getStorage(), problemAggregator);
return new Column(newColumnName, (Storage<?>) result);
}
/**
* Applies a function to every row in a column.
*
* @param column the column to apply the operation to.
* @param function the function to apply.
* @param nothingUnchanged whether to keep nothing values unchanged.
* @param expectedResultType the expected type of the result.
* @param newColumnName the name of the new column.
* @param problemAggregator the problem aggregator to report problems to.
*/
static Column mapFunction(
Column column,
Function<Object, Value> function,
boolean nothingUnchanged,
StorageType expectedResultType,
String newColumnName,
MapOperationProblemAggregator problemAggregator) {
Builder storageBuilder =
Builder.getForType(expectedResultType, column.getSize(), problemAggregator);
applyStorageInner(
column.getStorage(),
nothingUnchanged,
storageBuilder,
i -> {
Value result = function.apply(column.getStorage().getItemAsObject(i));
Object converted = Polyglot_Utils.convertPolyglotValue(result);
storageBuilder.appendNoGrow(converted);
});
return new Column(newColumnName, storageBuilder.seal());
}
/** Gets the name of the Operation. */
String getName();
/** Can the operation be applied to the given Storage? */
boolean canApply(ColumnStorage storage);
/** Applies the operation to the given Storage. */
ColumnStorage apply(ColumnStorage storage, MapOperationProblemAggregator problemAggregator);
private static void applyStorageInner(
ColumnStorage columnStorage,
boolean nothingUnchanged,
Builder builder,
LongConsumer callback) {
Context context = Context.getCurrent();
long size = columnStorage.getSize();
for (long i = 0; i < size; i++) {
if (nothingUnchanged && columnStorage.isNothing(i)) {
builder.appendNulls(1);
} else {
callback.accept(i);
}
context.safepoint();
}
}
/** Applies the operation to the given Storage. */
static void applyOverObjectStorage(
ColumnStorage objectStorage,
boolean nothingUnchanged,
Builder builder,
Consumer<Object> function) {
applyStorageInner(
objectStorage,
nothingUnchanged,
builder,
i -> function.accept(objectStorage.getItemAsObject(i)));
}
/** Applies the operation to the given Boolean Storage. */
static void applyOverBooleanStorage(
ColumnBooleanStorage booleanStorage,
boolean nothingUnchanged,
Builder builder,
BooleanRowApplier function) {
applyStorageInner(
booleanStorage,
nothingUnchanged,
builder,
i -> function.accept(booleanStorage.isNothing(i), booleanStorage.get(i)));
}
@FunctionalInterface
interface BooleanRowApplier {
void accept(boolean isNothing, boolean value);
}
/** Applies the operation to the given Long Storage. */
static void applyOverLongStorage(
ColumnLongStorage longStorage,
boolean nothingUnchanged,
Builder builder,
LongRowApplier function) {
applyStorageInner(
longStorage,
nothingUnchanged,
builder,
i -> function.accept(longStorage.isNothing(i), longStorage.get(i)));
}
@FunctionalInterface
interface LongRowApplier {
void accept(boolean isNothing, long value);
}
/** Applies the operation to the given Double Storage. */
static void applyOverDoubleStorage(
ColumnDoubleStorage doubleStorage,
boolean nothingUnchanged,
Builder builder,
DoubleRowApplier function) {
applyStorageInner(
doubleStorage,
nothingUnchanged,
builder,
i -> function.accept(doubleStorage.isNothing(i), doubleStorage.get(i)));
}
@FunctionalInterface
interface DoubleRowApplier {
void accept(boolean isNothing, double value);
}
}

View File

@ -11,37 +11,9 @@ import org.enso.table.data.column.storage.Storage;
* @param <S> the storage type handled by these operations.
*/
public class MapOperationStorage<T, S extends Storage<? super T>> {
private final Map<String, UnaryMapOperation<T, S>> unaryOps = new HashMap<>();
private final Map<String, BinaryMapOperation<T, S>> binaryOps = new HashMap<>();
private final Map<String, TernaryMapOperation<T, S>> ternaryOps = new HashMap<>();
/**
* Checks if a unary operation is supported by this set.
*
* @param n the operation name
* @return whether the operation is supported
*/
public boolean isSupportedUnary(String n) {
return n != null && unaryOps.get(n) != null;
}
/**
* Runs the specified unary operation in map node.
*
* @param n the operation name
* @param storage the storage to run operation on
* @param problemAggregator the aggregator allowing to report computation problems
* @return the result of running the operation
*/
public Storage<?> runUnaryMap(
String n, S storage, MapOperationProblemAggregator problemAggregator) {
if (!isSupportedUnary(n)) {
throw new IllegalStateException(
"Requested vectorized unary operation " + n + ", but no such operation is known.");
}
return unaryOps.get(n).runUnaryMap(storage, problemAggregator);
}
/**
* Checks if a binary operation is supported by this set.
*
@ -129,17 +101,6 @@ public class MapOperationStorage<T, S extends Storage<? super T>> {
return operation.runZip(storage, arg, problemAggregator);
}
/**
* Adds a new operation to this set.
*
* @param op the operation to add
* @return this operation set
*/
public MapOperationStorage<T, S> add(UnaryMapOperation<T, S> op) {
unaryOps.put(op.getName(), op);
return this;
}
/**
* Adds a new operation to this set.
*

View File

@ -1,27 +0,0 @@
package org.enso.table.data.column.operation.map;
import org.enso.table.data.column.storage.Storage;
/**
* A unary map-like operation.
*
* @param <I> the supported storage type
*/
public abstract class UnaryMapOperation<T, I extends Storage<? super T>> {
private final String name;
public UnaryMapOperation(String name) {
this.name = name;
}
/** Run the unary operation. */
protected abstract Storage<?> runUnaryMap(
I storage, MapOperationProblemAggregator problemAggregator);
/**
* @return the name of this operation
*/
public String getName() {
return name;
}
}

View File

@ -1,93 +0,0 @@
package org.enso.table.data.column.operation.map.datetime;
import java.time.temporal.ChronoField;
import java.time.temporal.IsoFields;
import java.time.temporal.Temporal;
import java.time.temporal.TemporalField;
import org.enso.table.data.column.operation.map.numeric.GenericUnaryIntegerOp;
import org.enso.table.data.column.storage.Storage;
public class DatePartExtractors {
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> make_op(String name, TemporalField field) {
return new GenericUnaryIntegerOp<>(name) {
@Override
protected long doGenericOperation(Temporal value) {
return value.getLong(field);
}
};
}
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> year() {
return make_op(Storage.Maps.YEAR, ChronoField.YEAR);
}
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> quarter() {
return new GenericUnaryIntegerOp<>(Storage.Maps.QUARTER) {
@Override
protected long doGenericOperation(Temporal value) {
long month = value.get(ChronoField.MONTH_OF_YEAR);
return (month - 1) / 3 + 1;
}
};
}
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> month() {
return make_op(Storage.Maps.MONTH, ChronoField.MONTH_OF_YEAR);
}
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> week() {
return make_op(Storage.Maps.WEEK, IsoFields.WEEK_OF_WEEK_BASED_YEAR);
}
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> day() {
return make_op(Storage.Maps.DAY, ChronoField.DAY_OF_MONTH);
}
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> hour() {
return make_op(Storage.Maps.HOUR, ChronoField.HOUR_OF_DAY);
}
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> minute() {
return make_op(Storage.Maps.MINUTE, ChronoField.MINUTE_OF_HOUR);
}
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> second() {
return make_op(Storage.Maps.SECOND, ChronoField.SECOND_OF_MINUTE);
}
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> millisecond() {
return make_op(Storage.Maps.MILLISECOND, ChronoField.MILLI_OF_SECOND);
}
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> microsecond() {
return new GenericUnaryIntegerOp<>(Storage.Maps.MICROSECOND) {
@Override
protected long doGenericOperation(Temporal value) {
long micros = value.get(ChronoField.MICRO_OF_SECOND);
return micros % 1000;
}
};
}
public static <T extends Temporal, I extends Storage<T>>
GenericUnaryIntegerOp<Temporal, T, I> nanosecond() {
return new GenericUnaryIntegerOp<>(Storage.Maps.NANOSECOND) {
@Override
protected long doGenericOperation(Temporal value) {
long micros = value.get(ChronoField.NANO_OF_SECOND);
return micros % 1000;
}
};
}
}

View File

@ -1,45 +0,0 @@
package org.enso.table.data.column.operation.map.numeric;
import java.util.BitSet;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.storage.numeric.DoubleStorage;
import org.enso.table.data.column.storage.numeric.LongStorage;
import org.enso.table.data.column.storage.type.IntegerType;
import org.graalvm.polyglot.Context;
public abstract class DoubleLongMapOpWithSpecialNumericHandling
extends UnaryMapOperation<Double, DoubleStorage> {
public DoubleLongMapOpWithSpecialNumericHandling(String name) {
super(name);
}
protected abstract long doOperation(double a);
@Override
public LongStorage runUnaryMap(
DoubleStorage storage, MapOperationProblemAggregator problemAggregator) {
Context context = Context.getCurrent();
long[] out = new long[storage.size()];
BitSet isMissing = new BitSet();
for (int i = 0; i < storage.size(); i++) {
if (!storage.isNa(i)) {
double item = storage.getItemAsDouble(i);
boolean special = Double.isNaN(item) || Double.isInfinite(item);
if (!special) {
out[i] = doOperation(item);
} else {
String msg = "Value is " + item;
problemAggregator.reportArithmeticError(msg, i);
isMissing.set(i);
}
} else {
isMissing.set(i);
}
context.safepoint();
}
return new LongStorage(out, storage.size(), isMissing, IntegerType.INT_64);
}
}

View File

@ -1,17 +0,0 @@
package org.enso.table.data.column.operation.map.numeric;
import org.enso.table.data.column.storage.Storage;
public abstract class GenericUnaryIntegerOp<U, T extends U, I extends Storage<T>>
extends UnaryIntegerOp<T, I> {
public GenericUnaryIntegerOp(String name) {
super(name);
}
protected abstract long doGenericOperation(U value);
@Override
protected long doOperation(T value) {
return doGenericOperation(value);
}
}

View File

@ -1,39 +0,0 @@
package org.enso.table.data.column.operation.map.numeric;
import java.util.BitSet;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.numeric.LongStorage;
import org.enso.table.data.column.storage.type.IntegerType;
import org.graalvm.polyglot.Context;
/** An operation that takes a single argument of some type and returns an integer. */
public abstract class UnaryIntegerOp<T, I extends Storage<T>> extends UnaryMapOperation<T, I> {
private static final IntegerType RESULT_TYPE = IntegerType.INT_64;
public UnaryIntegerOp(String name) {
super(name);
}
protected abstract long doOperation(T value);
@Override
protected Storage<?> runUnaryMap(I storage, MapOperationProblemAggregator problemAggregator) {
Context context = Context.getCurrent();
BitSet newMissing = new BitSet();
long[] newVals = new long[storage.size()];
for (int i = 0; i < storage.size(); i++) {
if (!storage.isNa(i)) {
newVals[i] = doOperation(storage.getItemBoxed(i));
} else {
newMissing.set(i);
}
context.safepoint();
}
return new LongStorage(newVals, newVals.length, newMissing, RESULT_TYPE);
}
}

View File

@ -1,38 +0,0 @@
package org.enso.table.data.column.operation.map.numeric;
import java.util.BitSet;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.storage.numeric.AbstractLongStorage;
import org.enso.table.data.column.storage.numeric.LongStorage;
import org.graalvm.polyglot.Context;
/** An operation that takes a single double argument and returns a long. */
public abstract class UnaryLongToLongOp extends UnaryMapOperation<Long, AbstractLongStorage> {
public UnaryLongToLongOp(String name) {
super(name);
}
protected abstract long doOperation(long value);
@Override
protected LongStorage runUnaryMap(
AbstractLongStorage storage, MapOperationProblemAggregator problemAggregator) {
Context context = Context.getCurrent();
BitSet newMissing = new BitSet();
long[] newVals = new long[storage.size()];
for (int i = 0; i < storage.size(); i++) {
if (!storage.isNa(i)) {
newVals[i] = doOperation(storage.getItem(i));
} else {
newMissing.set(i);
}
context.safepoint();
}
// TODO is inheriting type ok? it may not be enough!
return new LongStorage(newVals, newVals.length, newMissing, storage.getType());
}
}

View File

@ -0,0 +1,38 @@
package org.enso.table.data.column.operation.unary;
import org.enso.table.data.column.builder.BoolBuilder;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.ColumnStorage;
/** An abstract base class for unary operations returning a boolean column. */
abstract class AbstractUnaryBooleanOperation extends AbstractUnaryOperation {
/**
* Creates a new AbstractUnaryOperation.
*
* @param name the name of the operation
* @param nothingUnchanged whether the operation should return nothing if the input is nothing
*/
protected AbstractUnaryBooleanOperation(String name, boolean nothingUnchanged) {
super(name, nothingUnchanged);
}
@Override
protected BoolBuilder createBuilder(
ColumnStorage storage, MapOperationProblemAggregator problemAggregator) {
if (storage.getSize() > Integer.MAX_VALUE) {
throw new IllegalArgumentException(
"Cannot currently operate on columns larger than " + Integer.MAX_VALUE + ".");
}
return new BoolBuilder((int) storage.getSize());
}
@Override
protected final void applyObjectRow(
Object value, Builder builder, MapOperationProblemAggregator problemAggregator) {
applyObjectRow(value, (BoolBuilder) builder, problemAggregator);
}
protected abstract void applyObjectRow(
Object value, BoolBuilder builder, MapOperationProblemAggregator problemAggregator);
}

View File

@ -0,0 +1,45 @@
package org.enso.table.data.column.operation.unary;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.LongBuilder;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.type.IntegerType;
/** An abstract base class for unary operations returning a long column. */
abstract class AbstractUnaryLongOperation extends AbstractUnaryOperation {
private final IntegerType returnType;
/**
* Creates a new AbstractUnaryOperation.
*
* @param name the name of the operation
* @param nothingUnchanged whether the operation should return nothing if the input is nothing
* @param returnType the type of the column that will be returned
*/
protected AbstractUnaryLongOperation(
String name, boolean nothingUnchanged, IntegerType returnType) {
super(name, nothingUnchanged);
this.returnType = returnType;
}
@Override
protected LongBuilder createBuilder(
ColumnStorage storage, MapOperationProblemAggregator problemAggregator) {
if (storage.getSize() > Integer.MAX_VALUE) {
throw new IllegalArgumentException(
"Cannot currently operate on columns larger than " + Integer.MAX_VALUE + ".");
}
return LongBuilder.createLongBuilder((int) storage.getSize(), returnType, problemAggregator);
}
@Override
protected final void applyObjectRow(
Object value, Builder builder, MapOperationProblemAggregator problemAggregator) {
applyObjectRow(value, (LongBuilder) builder, problemAggregator);
}
protected abstract void applyObjectRow(
Object value, LongBuilder builder, MapOperationProblemAggregator problemAggregator);
}

View File

@ -0,0 +1,104 @@
package org.enso.table.data.column.operation.unary;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.InferredBuilder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.ColumnBooleanStorage;
import org.enso.table.data.column.storage.ColumnDoubleStorage;
import org.enso.table.data.column.storage.ColumnLongStorage;
import org.enso.table.data.column.storage.ColumnStorage;
/**
* An abstract base class for unary operations. This class provides a default implementation for
* applying the operation to a column storage.
*/
abstract class AbstractUnaryOperation implements UnaryOperation {
private final String name;
private final boolean nothingUnchanged;
/**
* Creates a new AbstractUnaryOperation.
*
* @param name the name of the operation
* @param nothingUnchanged whether the operation should return nothing if the input is nothing
*/
protected AbstractUnaryOperation(String name, boolean nothingUnchanged) {
this.name = name;
this.nothingUnchanged = nothingUnchanged;
}
@Override
public String getName() {
return name;
}
@Override
public abstract boolean canApply(ColumnStorage storage);
@Override
public ColumnStorage apply(
ColumnStorage storage, MapOperationProblemAggregator problemAggregator) {
var builder = createBuilder(storage, problemAggregator);
switch (storage) {
case ColumnBooleanStorage booleanStorage -> applyBoolean(
booleanStorage, builder, problemAggregator);
case ColumnLongStorage longStorage -> applyLong(longStorage, builder, problemAggregator);
case ColumnDoubleStorage doubleStorage -> applyDouble(
doubleStorage, builder, problemAggregator);
default -> applyObject(storage, builder, problemAggregator);
}
return builder.seal();
}
protected Builder createBuilder(
ColumnStorage storage, MapOperationProblemAggregator problemAggregator) {
if (storage.getSize() > Integer.MAX_VALUE) {
throw new IllegalArgumentException(
"Cannot currently operate on columns larger than " + Integer.MAX_VALUE + ".");
}
return new InferredBuilder((int) storage.getSize(), problemAggregator);
}
/** Apply the operation to a Boolean Storage. */
protected void applyBoolean(
ColumnBooleanStorage booleanStorage,
Builder builder,
MapOperationProblemAggregator problemAggregator) {
applyObject(booleanStorage, builder, problemAggregator);
}
/** Apply the operation to a Long Storage. */
protected void applyLong(
ColumnLongStorage longStorage,
Builder builder,
MapOperationProblemAggregator problemAggregator) {
applyObject(longStorage, builder, problemAggregator);
}
/** Apply the operation to a Double Storage. */
protected void applyDouble(
ColumnDoubleStorage doubleStorage,
Builder builder,
MapOperationProblemAggregator problemAggregator) {
applyObject(doubleStorage, builder, problemAggregator);
}
/** Apply the operation to an Object Storage. */
protected void applyObject(
ColumnStorage objectStorage,
Builder builder,
MapOperationProblemAggregator problemAggregator) {
UnaryOperation.applyOverObjectStorage(
objectStorage,
nothingUnchanged,
builder,
o -> applyObjectRow(o, builder, problemAggregator));
}
protected abstract void applyObjectRow(
Object value, Builder builder, MapOperationProblemAggregator problemAggregator);
}

View File

@ -0,0 +1,75 @@
package org.enso.table.data.column.operation.unary;
import java.time.temporal.ChronoField;
import java.time.temporal.IsoFields;
import java.time.temporal.Temporal;
import java.time.temporal.TemporalField;
import org.enso.table.data.column.builder.LongBuilder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.type.IntegerType;
public class DatePartOperation extends AbstractUnaryLongOperation {
public static final String YEAR = "year";
public static final UnaryOperation YEAR_INSTANCE =
new DatePartOperation(YEAR, ChronoField.YEAR, false);
public static final String QUARTER = "quarter";
public static final UnaryOperation QUARTER_INSTANCE =
new DatePartOperation(QUARTER, IsoFields.QUARTER_OF_YEAR, false);
public static final String MONTH = "month";
public static final UnaryOperation MONTH_INSTANCE =
new DatePartOperation(MONTH, ChronoField.MONTH_OF_YEAR, false);
public static final String WEEK = "week";
public static final UnaryOperation WEEK_INSTANCE =
new DatePartOperation(WEEK, IsoFields.WEEK_OF_WEEK_BASED_YEAR, false);
public static final String DAY = "day";
public static final UnaryOperation DAY_INSTANCE =
new DatePartOperation(DAY, ChronoField.DAY_OF_MONTH, false);
public static final String HOUR = "hour";
public static final UnaryOperation HOUR_INSTANCE =
new DatePartOperation(HOUR, ChronoField.HOUR_OF_DAY, true);
public static final String MINUTE = "minute";
public static final UnaryOperation MINUTE_INSTANCE =
new DatePartOperation(MINUTE, ChronoField.MINUTE_OF_HOUR, true);
public static final String SECOND = "second";
public static final UnaryOperation SECOND_INSTANCE =
new DatePartOperation(SECOND, ChronoField.SECOND_OF_MINUTE, true);
public static final String MILLISECOND = "millisecond";
public static final UnaryOperation MILLISECOND_INSTANCE =
new DatePartOperation(MILLISECOND, ChronoField.MILLI_OF_SECOND, true);
protected final TemporalField field;
protected final boolean timeField;
protected DatePartOperation(String name, TemporalField field, boolean timeField) {
super(name, true, IntegerType.INT_64);
this.field = field;
this.timeField = timeField;
}
@Override
public boolean canApply(ColumnStorage storage) {
return timeField ? storage.getType().hasTime() : storage.getType().hasDate();
}
@Override
protected void applyObjectRow(
Object value, LongBuilder builder, MapOperationProblemAggregator problemAggregator) {
if (value instanceof Temporal s) {
var longValue = s.getLong(field);
builder.appendLong(longValue);
} else {
throw new IllegalArgumentException(
"Unsupported type: " + value.getClass() + " (expected date/time type).");
}
}
}

View File

@ -0,0 +1,39 @@
package org.enso.table.data.column.operation.unary;
import org.enso.table.data.column.builder.BoolBuilder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.type.TextType;
/* An operation that checks if a column's row values are empty.
* Only supported on Text values. */
public class IsEmptyOperation extends AbstractUnaryBooleanOperation {
public static String NAME = "is_empty";
public static final UnaryOperation INSTANCE = new IsEmptyOperation();
private IsEmptyOperation() {
super(NAME, false);
}
@Override
public boolean canApply(ColumnStorage storage) {
return storage.getType() instanceof TextType;
}
@Override
protected void applyObjectRow(
Object value, BoolBuilder builder, MapOperationProblemAggregator problemAggregator) {
if (value == null) {
builder.appendBoolean(true);
} else {
if (value instanceof String s) {
builder.appendBoolean(s.isEmpty());
} else {
throw new IllegalArgumentException(
"Unsupported type: " + value.getClass() + " (expected text type).");
}
}
}
}

View File

@ -0,0 +1,71 @@
package org.enso.table.data.column.operation.unary;
import java.util.BitSet;
import org.enso.table.data.column.builder.BoolBuilder;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.*;
public class IsInfiniteOperation extends AbstractUnaryBooleanOperation {
public static final String NAME = "is_infinite";
public static final UnaryOperation INSTANCE = new IsInfiniteOperation();
private IsInfiniteOperation() {
super(NAME, true);
}
@Override
public boolean canApply(ColumnStorage storage) {
return storage.getType().isNumeric();
}
@Override
public ColumnStorage apply(
ColumnStorage storage, MapOperationProblemAggregator problemAggregator) {
if (storage instanceof ColumnLongStorage
&& storage instanceof ColumnStorageWithNothingMap withNothingMap) {
// For a Column of Longs where we have the Nothing map, we can produce result immediately.
return new BoolStorage(
new BitSet(), withNothingMap.getIsNothingMap(), (int) storage.getSize(), false);
}
return super.apply(storage, problemAggregator);
}
@Override
protected void applyLong(
ColumnLongStorage longStorage,
Builder builder,
MapOperationProblemAggregator problemAggregator) {
var boolBuilder = (BoolBuilder) builder;
UnaryOperation.applyOverLongStorage(
longStorage, true, builder, (isNothing, value) -> boolBuilder.appendBoolean(false));
}
@Override
protected void applyDouble(
ColumnDoubleStorage doubleStorage,
Builder builder,
MapOperationProblemAggregator problemAggregator) {
var boolBuilder = (BoolBuilder) builder;
UnaryOperation.applyOverDoubleStorage(
doubleStorage,
true,
builder,
(isNothing, value) -> boolBuilder.appendBoolean(Double.isInfinite(value)));
}
@Override
protected void applyObjectRow(
Object value, BoolBuilder builder, MapOperationProblemAggregator problemAggregator) {
// Null handled by base class
switch (value) {
case Double d -> builder.appendBoolean(Double.isInfinite(d));
case Float f -> builder.appendBoolean(Float.isInfinite(f));
case Number ignored -> builder.appendBoolean(false);
default -> throw new IllegalArgumentException(
"Unsupported type: " + value.getClass() + " (expected numeric type).");
}
}
}

View File

@ -0,0 +1,75 @@
package org.enso.table.data.column.operation.unary;
import java.util.BitSet;
import org.enso.table.data.column.builder.BoolBuilder;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.ColumnDoubleStorage;
import org.enso.table.data.column.storage.ColumnLongStorage;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.ColumnStorageWithNothingMap;
public class IsNaNOperation extends AbstractUnaryBooleanOperation {
public static final String NAME = "is_nan";
public static final UnaryOperation INSTANCE = new IsNaNOperation();
private IsNaNOperation() {
super(NAME, true);
}
@Override
public boolean canApply(ColumnStorage storage) {
return storage.getType().isNumeric();
}
@Override
public ColumnStorage apply(
ColumnStorage storage, MapOperationProblemAggregator problemAggregator) {
if (storage instanceof ColumnLongStorage
&& storage instanceof ColumnStorageWithNothingMap withNothingMap) {
// For a Column of Longs where we have the Nothing map, we can produce result immediately.
return new BoolStorage(
new BitSet(), withNothingMap.getIsNothingMap(), (int) storage.getSize(), false);
}
return super.apply(storage, problemAggregator);
}
@Override
protected void applyLong(
ColumnLongStorage longStorage,
Builder builder,
MapOperationProblemAggregator problemAggregator) {
var boolBuilder = (BoolBuilder) builder;
UnaryOperation.applyOverLongStorage(
longStorage, true, builder, (isNothing, value) -> boolBuilder.appendBoolean(false));
}
@Override
protected void applyDouble(
ColumnDoubleStorage doubleStorage,
Builder builder,
MapOperationProblemAggregator problemAggregator) {
var boolBuilder = (BoolBuilder) builder;
UnaryOperation.applyOverDoubleStorage(
doubleStorage,
true,
builder,
(isNothing, value) -> boolBuilder.appendBoolean(Double.isNaN(value)));
}
@Override
protected void applyObjectRow(
Object value, BoolBuilder builder, MapOperationProblemAggregator problemAggregator) {
// Null handled by base class
switch (value) {
case Double d -> builder.appendBoolean(Double.isNaN(d));
case Float f -> builder.appendBoolean(Float.isNaN(f));
case Number ignored -> builder.appendBoolean(false);
default -> throw new IllegalArgumentException(
"Unsupported type: " + value.getClass() + " (expected numeric type).");
}
}
}

View File

@ -0,0 +1,45 @@
package org.enso.table.data.column.operation.unary;
import java.util.BitSet;
import org.enso.table.data.column.builder.BoolBuilder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.ColumnStorageWithNothingMap;
public class IsNothingOperation extends AbstractUnaryBooleanOperation {
public static final String NAME = "is_nothing";
public static final UnaryOperation INSTANCE = new IsNothingOperation();
private IsNothingOperation() {
super(NAME, false);
}
@Override
public boolean canApply(ColumnStorage storage) {
return true;
}
@Override
public ColumnStorage apply(
ColumnStorage storage, MapOperationProblemAggregator problemAggregator) {
if (storage instanceof ColumnStorageWithNothingMap withNothingMap) {
return new BoolStorage(
withNothingMap.getIsNothingMap(), new BitSet(), (int) storage.getSize(), false);
}
var builder = createBuilder(storage, problemAggregator);
for (long i = 0; i < storage.getSize(); i++) {
builder.appendBoolean(storage.isNothing(i));
}
return builder.seal();
}
@Override
protected void applyObjectRow(
Object value, BoolBuilder builder, MapOperationProblemAggregator problemAggregator) {
throw new UnsupportedOperationException();
}
}

View File

@ -0,0 +1,63 @@
package org.enso.table.data.column.operation.unary;
import org.enso.table.data.column.builder.BoolBuilder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.ColumnBooleanStorage;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.type.BooleanType;
public class NotOperation extends AbstractUnaryBooleanOperation {
public static final String NAME = "not";
public static final UnaryOperation INSTANCE = new NotOperation();
private NotOperation() {
super(NAME, false);
}
@Override
public boolean canApply(ColumnStorage storage) {
return storage.getType() instanceof BooleanType;
}
@Override
public ColumnStorage apply(
ColumnStorage storage, MapOperationProblemAggregator problemAggregator) {
if (storage instanceof BoolStorage boolStorage) {
return new BoolStorage(
boolStorage.getValues(),
boolStorage.getIsNothingMap(),
boolStorage.size(),
!boolStorage.isNegated());
}
var builder = createBuilder(storage, problemAggregator);
if (storage instanceof ColumnBooleanStorage booleanStorage) {
UnaryOperation.applyOverBooleanStorage(
booleanStorage, true, builder, (isNothing, value) -> builder.appendBoolean(!value));
} else {
UnaryOperation.applyOverObjectStorage(
storage,
true,
builder,
(value) -> {
if (value instanceof Boolean b) {
builder.appendBoolean(!b);
} else {
throw new IllegalArgumentException(
"Unsupported type: " + value.getClass() + " (expected boolean type).");
}
});
}
return builder.seal();
}
@Override
protected void applyObjectRow(
Object value, BoolBuilder builder, MapOperationProblemAggregator problemAggregator) {
throw new UnsupportedOperationException();
}
}

View File

@ -0,0 +1,35 @@
package org.enso.table.data.column.operation.unary;
import org.enso.base.Text_Utils;
import org.enso.table.data.column.builder.LongBuilder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.column.storage.type.TextType;
public class TextLengthOperation extends AbstractUnaryLongOperation {
public static final String NAME = "text_length";
public static final UnaryOperation INSTANCE = new TextLengthOperation();
private TextLengthOperation() {
super(NAME, true, IntegerType.INT_64);
}
@Override
public boolean canApply(ColumnStorage storage) {
return storage.getType() instanceof TextType;
}
@Override
protected void applyObjectRow(
Object value, LongBuilder builder, MapOperationProblemAggregator problemAggregator) {
if (value instanceof String s) {
var longValue = Text_Utils.grapheme_length(s);
builder.appendLong(longValue);
} else {
throw new IllegalArgumentException(
"Unsupported type: " + value.getClass() + " (expected text type).");
}
}
}

View File

@ -0,0 +1,37 @@
package org.enso.table.data.column.operation.unary;
import java.time.temporal.ChronoField;
import java.time.temporal.Temporal;
import java.time.temporal.TemporalField;
import org.enso.table.data.column.builder.LongBuilder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
public class TruncatedTimePartOperation extends DatePartOperation {
public static final String MICROSECOND = "microsecond";
public static final UnaryOperation MICROSECOND_INSTANCE =
new TruncatedTimePartOperation(MICROSECOND, ChronoField.MICRO_OF_SECOND, 1000);
public static final String NANOSECOND = "nanosecond";
public static final UnaryOperation NANOSECOND_INSTANCE =
new TruncatedTimePartOperation(NANOSECOND, ChronoField.NANO_OF_SECOND, 1000);
private final int truncation;
private TruncatedTimePartOperation(String name, TemporalField field, int truncation) {
super(name, field, true);
this.truncation = truncation;
}
@Override
protected void applyObjectRow(
Object value, LongBuilder builder, MapOperationProblemAggregator problemAggregator) {
if (value instanceof Temporal s) {
var longValue = s.getLong(field);
builder.appendLong(longValue % truncation);
} else {
throw new IllegalArgumentException(
"Unsupported type: " + value.getClass() + " (expected date/time type).");
}
}
}

View File

@ -0,0 +1,67 @@
package org.enso.table.data.column.operation.unary;
import java.util.function.DoubleToLongFunction;
import org.enso.table.data.column.builder.LongBuilder;
import org.enso.table.data.column.operation.UnaryOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.storage.ColumnLongStorage;
import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.type.IntegerType;
public class UnaryRoundOperation extends AbstractUnaryLongOperation {
public static final String CEIL = "ceil";
public static final UnaryOperation CEIL_INSTANCE =
new UnaryRoundOperation(CEIL, d -> (long) Math.ceil(d));
public static final String FLOOR = "floor";
public static final UnaryOperation FLOOR_INSTANCE =
new UnaryRoundOperation(FLOOR, d -> (long) Math.floor(d));
public static String TRUNCATE = "truncate";
public static final UnaryOperation TRUNCATE_INSTANCE =
new UnaryRoundOperation(TRUNCATE, d -> (long) d);
private final DoubleToLongFunction function;
private UnaryRoundOperation(String name, DoubleToLongFunction function) {
super(name, true, IntegerType.INT_64);
this.function = function;
}
@Override
public boolean canApply(ColumnStorage storage) {
return storage.getType().isNumeric();
}
@Override
public ColumnStorage apply(
ColumnStorage storage, MapOperationProblemAggregator problemAggregator) {
if (storage instanceof ColumnLongStorage longStorage) {
// For a long storage, the operation is an identity operation.
return longStorage;
}
return super.apply(storage, problemAggregator);
}
@Override
protected void applyObjectRow(
Object value, LongBuilder builder, MapOperationProblemAggregator problemAggregator) {
// Null handled by base class
switch (value) {
case Double d -> {
if (Double.isNaN(d) || Double.isInfinite(d)) {
String msg = "Value is " + d;
problemAggregator.reportArithmeticError(msg, builder.getCurrentSize());
builder.appendNulls(1);
} else {
builder.appendLong(function.applyAsLong(d));
}
}
case Float f -> applyObjectRow((double) f, builder, problemAggregator);
case Number n -> applyObjectRow(n.doubleValue(), builder, problemAggregator);
default -> throw new IllegalArgumentException(
"Unsupported type: " + value.getClass() + " (expected numeric type).");
}
}
}

View File

@ -8,7 +8,6 @@ import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.map.BinaryMapOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.bool.BooleanIsInOp;
import org.enso.table.data.column.storage.type.BooleanType;
import org.enso.table.data.column.storage.type.StorageType;
@ -22,7 +21,8 @@ import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.Value;
/** A boolean column storage. */
public final class BoolStorage extends Storage<Boolean> {
public final class BoolStorage extends Storage<Boolean>
implements ColumnBooleanStorage, ColumnStorageWithNothingMap {
private static final MapOperationStorage<Boolean, BoolStorage> ops = buildOps();
private final BitSet values;
private final BitSet isMissing;
@ -69,17 +69,6 @@ public final class BoolStorage extends Storage<Boolean> {
return isMissing.get(idx) ? null : getItem(idx);
}
@Override
public boolean isUnaryOpVectorized(String name) {
return ops.isSupportedUnary(name);
}
@Override
public Storage<?> runVectorizedUnaryMap(
String name, MapOperationProblemAggregator problemAggregator) {
return ops.runUnaryMap(name, this, problemAggregator);
}
public boolean getItem(long idx) {
return negated != values.get((int) idx);
}
@ -256,15 +245,6 @@ public final class BoolStorage extends Storage<Boolean> {
private static MapOperationStorage<Boolean, BoolStorage> buildOps() {
MapOperationStorage<Boolean, BoolStorage> ops = new MapOperationStorage<>();
ops.add(
new UnaryMapOperation<>(Maps.NOT) {
@Override
protected BoolStorage runUnaryMap(
BoolStorage storage, MapOperationProblemAggregator problemAggregator) {
return new BoolStorage(
storage.values, storage.isMissing, storage.size, !storage.negated);
}
})
.add(
new BinaryMapOperation<>(Maps.EQ) {
@Override
public BoolStorage runBinaryMap(
@ -446,14 +426,6 @@ public final class BoolStorage extends Storage<Boolean> {
return new BoolStorage(out, missing, storage.size, negated);
}
})
.add(
new UnaryMapOperation<>(Maps.IS_NOTHING) {
@Override
public BoolStorage runUnaryMap(
BoolStorage storage, MapOperationProblemAggregator problemAggregator) {
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);
}
})
.add(new BooleanIsInOp());
return ops;
}
@ -505,4 +477,17 @@ public final class BoolStorage extends Storage<Boolean> {
return new BoolStorage(newValues, newMissing, newSize, negated);
}
@Override
public BitSet getIsNothingMap() {
return isMissing;
}
@Override
public boolean get(long index) throws ValueIsNothingException {
if (isNothing(index)) {
throw new ValueIsNothingException(index);
}
return getItem(index);
}
}

View File

@ -0,0 +1,6 @@
package org.enso.table.data.column.storage;
public interface ColumnBooleanStorage extends ColumnStorage {
/** Gets the value at a given index. Throws ValueIsNothingException if the index is nothing. */
boolean get(long index) throws ValueIsNothingException;
}

View File

@ -0,0 +1,6 @@
package org.enso.table.data.column.storage;
public interface ColumnDoubleStorage extends ColumnStorage {
/** Gets the value at a given index. Throws ValueIsNothingException if the index is nothing. */
double get(long index) throws ValueIsNothingException;
}

View File

@ -0,0 +1,6 @@
package org.enso.table.data.column.storage;
public interface ColumnLongStorage extends ColumnStorage {
/** Gets the value at a given index. Throws ValueIsNothingException if the index is nothing. */
long get(long index) throws ValueIsNothingException;
}

View File

@ -0,0 +1,18 @@
package org.enso.table.data.column.storage;
import org.enso.table.data.column.storage.type.StorageType;
/** Basic interface of a column storage. */
public interface ColumnStorage {
/* Gets the size of the storage. */
long getSize();
/* Gets the value type of the storage. */
StorageType getType();
/* Gets if a value is Nothing at a given index. */
boolean isNothing(long index);
/* Gets the value at a given index. */
Object getItemAsObject(long index);
}

View File

@ -0,0 +1,9 @@
package org.enso.table.data.column.storage;
import org.enso.table.data.column.storage.type.StorageType;
public interface ColumnStorageWithInferredStorage {
StorageType inferPreciseType();
ColumnStorage getInferredStorage();
}

View File

@ -0,0 +1,8 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
public interface ColumnStorageWithNothingMap extends ColumnStorage {
/** Gets the isNothing map for the storage. */
BitSet getIsNothingMap();
}

View File

@ -17,7 +17,7 @@ import org.graalvm.polyglot.Context;
* more precise type if all values have a common type, and will allow operations on this more
* specific type.
*/
public final class MixedStorage extends ObjectStorage {
public final class MixedStorage extends ObjectStorage implements ColumnStorageWithInferredStorage {
private StorageType inferredType = null;
/**
@ -119,7 +119,7 @@ public final class MixedStorage extends ObjectStorage {
return specialized.inferPreciseTypeShrunk();
}
private Storage<?> getInferredStorage() {
public Storage<?> getInferredStorage() {
if (!hasSpecializedStorageBeenInferred) {
StorageType inferredType = inferPreciseType();
if (inferredType instanceof AnyObjectType) {
@ -147,37 +147,6 @@ public final class MixedStorage extends ObjectStorage {
AVAILABLE_IN_SUPER
}
/**
* The resolution depends on the following philosophy:
*
* <p>1. If the inferred storage is already cached, we prefer to use it since it will provide us
* with a more efficient implementation.
*
* <p>2. If it is not yet cached, we do not want to compute it (since it is costly) unless it is
* necessary - if our basic storage already provides the operation, we will use that
* implementation - even if it may not be as fast as a specialized one, the cost of computing the
* precise storage may just not be worth it. If our storage does not provide the operation, we now
* need to try getting the inferred storage, to check if it may provide it.
*/
private VectorizedOperationAvailability resolveUnaryOp(String name) {
// Shortcut - if the storage is already specialized - we prefer it.
if (cachedInferredStorage != null && cachedInferredStorage.isUnaryOpVectorized(name)) {
return VectorizedOperationAvailability.AVAILABLE_IN_SPECIALIZED_STORAGE;
}
// Otherwise, we try to avoid specializing if not yet necessary.
if (super.isUnaryOpVectorized(name)) {
return VectorizedOperationAvailability.AVAILABLE_IN_SUPER;
} else {
// But if our storage does not provide the operation, we have to try checking the other one.
if (getInferredStorage() != null && getInferredStorage().isUnaryOpVectorized(name)) {
return VectorizedOperationAvailability.AVAILABLE_IN_SPECIALIZED_STORAGE;
} else {
return VectorizedOperationAvailability.NOT_AVAILABLE;
}
}
}
/** {@see resolveUnaryOp} for explanations. */
private VectorizedOperationAvailability resolveBinaryOp(String name) {
// Shortcut - if the storage is already specialized - we prefer it.
@ -218,22 +187,6 @@ public final class MixedStorage extends ObjectStorage {
}
}
@Override
public boolean isUnaryOpVectorized(String name) {
return resolveUnaryOp(name) != VectorizedOperationAvailability.NOT_AVAILABLE;
}
@Override
public Storage<?> runVectorizedUnaryMap(
String name, MapOperationProblemAggregator problemAggregator) {
if (resolveUnaryOp(name) == VectorizedOperationAvailability.AVAILABLE_IN_SPECIALIZED_STORAGE) {
return getInferredStorage().runVectorizedUnaryMap(name, problemAggregator);
} else {
// Even if the operation is not available, we rely on super to report an exception.
return super.runVectorizedUnaryMap(name, problemAggregator);
}
}
@Override
public boolean isBinaryOpVectorized(String name) {
return resolveBinaryOp(name) != VectorizedOperationAvailability.NOT_AVAILABLE;

View File

@ -56,17 +56,6 @@ public class MixedStorageFacade extends Storage<Object> {
return underlyingStorage.getItemBoxed(idx);
}
@Override
public boolean isUnaryOpVectorized(String name) {
return underlyingStorage.isUnaryOpVectorized(name);
}
@Override
public Storage<?> runVectorizedUnaryMap(
String name, MapOperationProblemAggregator problemAggregator) {
return underlyingStorage.runVectorizedUnaryMap(name, problemAggregator);
}
@Override
public boolean isBinaryOpVectorized(String name) {
return underlyingStorage.isBinaryOpVectorized(name);

View File

@ -1,12 +1,8 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.storage.type.AnyObjectType;
import org.enso.table.data.column.storage.type.StorageType;
import org.graalvm.polyglot.Context;
/** A column storing arbitrary Java objects. */
public sealed class ObjectStorage extends SpecializedStorage<Object> permits MixedStorage {
@ -35,23 +31,6 @@ public sealed class ObjectStorage extends SpecializedStorage<Object> permits Mix
public static <T, S extends SpecializedStorage<T>> MapOperationStorage<T, S> buildObjectOps() {
MapOperationStorage<T, S> ops = new MapOperationStorage<>();
ops.add(
new UnaryMapOperation<>(Maps.IS_NOTHING) {
@Override
protected BoolStorage runUnaryMap(
S storage, MapOperationProblemAggregator problemAggregator) {
Context context = Context.getCurrent();
BitSet r = new BitSet();
for (int i = 0; i < storage.size; i++) {
if (storage.data[i] == null) {
r.set(i);
}
context.safepoint();
}
return new BoolStorage(r, new BitSet(), storage.size, false);
}
});
return ops;
}
}

View File

@ -80,17 +80,6 @@ public abstract class SpecializedStorage<T> extends Storage<T> {
return data[(int) idx] == null;
}
@Override
public boolean isUnaryOpVectorized(String name) {
return ops.isSupportedUnary(name);
}
@Override
public Storage<?> runVectorizedUnaryMap(
String name, MapOperationProblemAggregator problemAggregator) {
return ops.runUnaryMap(name, this, problemAggregator);
}
@Override
public boolean isBinaryOpVectorized(String name) {
return ops.isSupportedBinary(name);

View File

@ -4,7 +4,6 @@ import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.operation.cast.CastProblemAggregator;
@ -20,7 +19,7 @@ import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.Value;
/** An abstract representation of a data column. */
public abstract class Storage<T> {
public abstract class Storage<T> implements ColumnStorage {
/** A constant representing the index of a missing value in a column. */
public static final int NOT_FOUND_INDEX = -1;
@ -99,45 +98,18 @@ public abstract class Storage<T> {
public static final String DIV = "/";
public static final String MOD = "%";
public static final String POWER = "^";
public static final String TRUNCATE = "truncate";
public static final String CEIL = "ceil";
public static final String FLOOR = "floor";
public static final String ROUND = "round";
public static final String NOT = "not";
public static final String AND = "&&";
public static final String OR = "||";
public static final String IS_NOTHING = "is_nothing";
public static final String IS_NAN = "is_nan";
public static final String IS_INFINITE = "is_infinite";
public static final String IS_EMPTY = "is_empty";
public static final String STARTS_WITH = "starts_with";
public static final String ENDS_WITH = "ends_with";
public static final String TEXT_LENGTH = "text_length";
public static final String TEXT_LEFT = "text_left";
public static final String TEXT_RIGHT = "text_right";
public static final String CONTAINS = "contains";
public static final String LIKE = "like";
public static final String IS_IN = "is_in";
public static final String YEAR = "year";
public static final String QUARTER = "quarter";
public static final String MONTH = "month";
public static final String WEEK = "week";
public static final String DAY = "day";
public static final String HOUR = "hour";
public static final String MINUTE = "minute";
public static final String SECOND = "second";
public static final String MILLISECOND = "millisecond";
public static final String MICROSECOND = "microsecond";
public static final String NANOSECOND = "nanosecond";
}
/* Specifies if the given unary operation has a vectorized implementation available for this storage.*/
public abstract boolean isUnaryOpVectorized(String name);
/** Runs a vectorized unary operation. */
public abstract Storage<?> runVectorizedUnaryMap(
String name, MapOperationProblemAggregator problemAggregator);
/* Specifies if the given binary operation has a vectorized implementation available for this storage.*/
public abstract boolean isBinaryOpVectorized(String name);
@ -166,37 +138,6 @@ public abstract class Storage<T> {
public abstract Storage<?> runVectorizedZip(
String name, Storage<?> argument, MapOperationProblemAggregator problemAggregator);
/**
* Runs a unary function on each non-null element in this storage.
*
* @param function the function to run.
* @param skipNa whether rows containing missing values should be passed to the function.
* @param expectedResultType the expected type for the result storage; it is ignored if the
* operation is vectorized
* @return the result of running the function on each row
*/
public final Storage<?> unaryMap(
Function<Object, Value> function,
boolean skipNa,
StorageType expectedResultType,
ProblemAggregator problemAggregator) {
Builder storageBuilder = Builder.getForType(expectedResultType, size(), problemAggregator);
Context context = Context.getCurrent();
for (int i = 0; i < size(); i++) {
Object it = getItemBoxed(i);
if (skipNa && it == null) {
storageBuilder.appendNulls(1);
} else {
Value result = function.apply(it);
Object converted = Polyglot_Utils.convertPolyglotValue(result);
storageBuilder.appendNoGrow(converted);
}
context.safepoint();
}
return storageBuilder.seal();
}
/**
* Runs a 2-argument function on each element in this storage.
*
@ -269,34 +210,6 @@ public abstract class Storage<T> {
return storageBuilder.seal();
}
/**
* Runs a unary operation.
*
* <p>If a vectorized implementation is available, it is used, otherwise the fallback is used.
*
* @param name the name of the vectorized operation
* @param problemAggregator the problem aggregator to use for the vectorized implementation
* @param fallback the fallback Enso function to run if vectorized implementation is not
* available; it should never raise dataflow errors.
* @param skipNa whether rows containing missing values should be passed to the fallback function.
* @param expectedResultType the expected type for the result storage; it is ignored if the
* operation is vectorized
* @return the result of running the operation on each row
*/
public final Storage<?> vectorizedOrFallbackUnaryMap(
String name,
MapOperationProblemAggregator problemAggregator,
Function<Object, Value> fallback,
boolean skipNa,
StorageType expectedResultType) {
if (isUnaryOpVectorized(name)) {
return runVectorizedUnaryMap(name, problemAggregator);
} else {
checkFallback(fallback, expectedResultType, name);
return unaryMap(fallback, skipNa, expectedResultType, problemAggregator);
}
}
/**
* Runs a binary operation with a scalar argument.
*
@ -532,4 +445,19 @@ public abstract class Storage<T> {
StorageConverter<?> converter = StorageConverter.fromStorageType(targetType);
return converter.cast(this, castProblemAggregator);
}
@Override
public long getSize() {
return size();
}
@Override
public boolean isNothing(long index) {
return isNa(index);
}
@Override
public Object getItemAsObject(long index) {
return getItemBoxed((int) index);
}
}

View File

@ -5,8 +5,6 @@ import org.enso.base.Text_Utils;
import org.enso.table.data.column.operation.map.BinaryMapOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.numeric.UnaryIntegerOp;
import org.enso.table.data.column.operation.map.text.LikeOp;
import org.enso.table.data.column.operation.map.text.StringBooleanOp;
import org.enso.table.data.column.operation.map.text.StringIsInOp;
@ -91,24 +89,6 @@ public final class StringStorage extends SpecializedStorage<String> {
return new BoolStorage(r, missing, storage.size(), false);
}
});
t.add(
new UnaryMapOperation<>(Maps.IS_EMPTY) {
@Override
protected BoolStorage runUnaryMap(
SpecializedStorage<String> storage, MapOperationProblemAggregator problemAggregator) {
BitSet r = new BitSet();
Context context = Context.getCurrent();
for (int i = 0; i < storage.size; i++) {
String s = storage.data[i];
if (s == null || s.isEmpty()) {
r.set(i);
}
context.safepoint();
}
return new BoolStorage(r, new BitSet(), storage.size, false);
}
});
t.add(
new StringBooleanOp(Maps.STARTS_WITH) {
@Override
@ -123,13 +103,6 @@ public final class StringStorage extends SpecializedStorage<String> {
return Text_Utils.ends_with(a, b);
}
});
t.add(
new UnaryIntegerOp<>(Maps.TEXT_LENGTH) {
@Override
protected long doOperation(String a) {
return Text_Utils.grapheme_length(a);
}
});
t.add(
new StringLongToStringOp(Maps.TEXT_LEFT) {
@Override

View File

@ -0,0 +1,9 @@
package org.enso.table.data.column.storage;
public class ValueIsNothingException extends RuntimeException {
public final long index;
public ValueIsNothingException(long index) {
this.index = index;
}
}

View File

@ -2,7 +2,6 @@ package org.enso.table.data.column.storage.datetime;
import java.time.LocalDate;
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.operation.map.datetime.DatePartExtractors;
import org.enso.table.data.column.operation.map.datetime.DateTimeIsInOp;
import org.enso.table.data.column.storage.ObjectStorage;
import org.enso.table.data.column.storage.SpecializedStorage;
@ -22,11 +21,6 @@ public final class DateStorage extends SpecializedStorage<LocalDate> {
MapOperationStorage<LocalDate, SpecializedStorage<LocalDate>> t =
ObjectStorage.buildObjectOps();
t.add(new DateTimeIsInOp<>(LocalDate.class));
t.add(DatePartExtractors.year());
t.add(DatePartExtractors.quarter());
t.add(DatePartExtractors.month());
t.add(DatePartExtractors.week());
t.add(DatePartExtractors.day());
return t;
}

View File

@ -6,7 +6,6 @@ import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.ObjectBuilder;
import org.enso.table.data.column.operation.map.GenericBinaryObjectMapOperation;
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.operation.map.datetime.DatePartExtractors;
import org.enso.table.data.column.operation.map.datetime.DateTimeIsInOp;
import org.enso.table.data.column.storage.ObjectStorage;
import org.enso.table.data.column.storage.SpecializedStorage;
@ -26,17 +25,6 @@ public final class DateTimeStorage extends SpecializedStorage<ZonedDateTime> {
MapOperationStorage<ZonedDateTime, SpecializedStorage<ZonedDateTime>> t =
ObjectStorage.buildObjectOps();
t.add(new DateTimeIsInOp<>(ZonedDateTime.class));
t.add(DatePartExtractors.year());
t.add(DatePartExtractors.quarter());
t.add(DatePartExtractors.month());
t.add(DatePartExtractors.week());
t.add(DatePartExtractors.day());
t.add(DatePartExtractors.hour());
t.add(DatePartExtractors.minute());
t.add(DatePartExtractors.second());
t.add(DatePartExtractors.millisecond());
t.add(DatePartExtractors.microsecond());
t.add(DatePartExtractors.nanosecond());
t.add(
new GenericBinaryObjectMapOperation<
ZonedDateTime, SpecializedStorage<ZonedDateTime>, Duration>(

View File

@ -6,7 +6,6 @@ import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.ObjectBuilder;
import org.enso.table.data.column.operation.map.GenericBinaryObjectMapOperation;
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.operation.map.datetime.DatePartExtractors;
import org.enso.table.data.column.operation.map.datetime.DateTimeIsInOp;
import org.enso.table.data.column.storage.ObjectStorage;
import org.enso.table.data.column.storage.SpecializedStorage;
@ -26,12 +25,6 @@ public final class TimeOfDayStorage extends SpecializedStorage<LocalTime> {
MapOperationStorage<LocalTime, SpecializedStorage<LocalTime>> t =
ObjectStorage.buildObjectOps();
t.add(new DateTimeIsInOp<>(LocalTime.class));
t.add(DatePartExtractors.hour());
t.add(DatePartExtractors.minute());
t.add(DatePartExtractors.second());
t.add(DatePartExtractors.millisecond());
t.add(DatePartExtractors.microsecond());
t.add(DatePartExtractors.nanosecond());
t.add(
new GenericBinaryObjectMapOperation<LocalTime, SpecializedStorage<LocalTime>, Duration>(
Maps.SUB, LocalTime.class, TimeOfDayStorage.class) {

View File

@ -3,9 +3,7 @@ package org.enso.table.data.column.storage.numeric;
import java.util.BitSet;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.numeric.LongRoundOp;
import org.enso.table.data.column.operation.map.numeric.UnaryLongToLongOp;
import org.enso.table.data.column.operation.map.numeric.arithmetic.AddOp;
import org.enso.table.data.column.operation.map.numeric.arithmetic.DivideOp;
import org.enso.table.data.column.operation.map.numeric.arithmetic.ModOp;
@ -19,29 +17,21 @@ import org.enso.table.data.column.operation.map.numeric.comparisons.LessComparis
import org.enso.table.data.column.operation.map.numeric.comparisons.LessOrEqualComparison;
import org.enso.table.data.column.operation.map.numeric.isin.LongIsInOp;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.ColumnLongStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.ValueIsNothingException;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.column.storage.type.StorageType;
import org.graalvm.polyglot.Context;
public abstract class AbstractLongStorage extends NumericStorage<Long> {
public abstract class AbstractLongStorage extends NumericStorage<Long>
implements ColumnLongStorage {
public abstract long getItem(int idx);
public abstract BitSet getIsMissing();
private static final MapOperationStorage<Long, AbstractLongStorage> ops = buildOps();
@Override
public boolean isUnaryOpVectorized(String name) {
return ops.isSupportedUnary(name);
}
@Override
public Storage<?> runVectorizedUnaryMap(
String name, MapOperationProblemAggregator problemAggregator) {
return ops.runUnaryMap(name, this, problemAggregator);
}
@Override
public boolean isBinaryOpVectorized(String name) {
return ops.isSupportedBinary(name);
@ -124,59 +114,12 @@ public abstract class AbstractLongStorage extends NumericStorage<Long> {
.add(new DivideOp<>())
.add(new ModOp<>())
.add(new PowerOp<>())
.add(
new UnaryLongToLongOp(Maps.TRUNCATE) {
@Override
protected long doOperation(long a) {
return a;
}
})
.add(
new UnaryLongToLongOp(Maps.CEIL) {
@Override
protected long doOperation(long a) {
return a;
}
})
.add(
new UnaryLongToLongOp(Maps.FLOOR) {
@Override
protected long doOperation(long a) {
return a;
}
})
.add(new LongRoundOp(Maps.ROUND))
.add(new LessComparison<>())
.add(new LessOrEqualComparison<>())
.add(new EqualsComparison<>())
.add(new GreaterOrEqualComparison<>())
.add(new GreaterComparison<>())
.add(
new UnaryMapOperation<>(Storage.Maps.IS_NOTHING) {
@Override
public BoolStorage runUnaryMap(
AbstractLongStorage storage, MapOperationProblemAggregator problemAggregator) {
return new BoolStorage(storage.getIsMissing(), new BitSet(), storage.size(), false);
}
})
.add(
new UnaryMapOperation<>(Storage.Maps.IS_NAN) {
@Override
public BoolStorage runUnaryMap(
AbstractLongStorage storage, MapOperationProblemAggregator problemAggregator) {
BitSet isNaN = new BitSet();
return new BoolStorage(isNaN, storage.getIsMissing(), storage.size(), false);
}
})
.add(
new UnaryMapOperation<>(Storage.Maps.IS_INFINITE) {
@Override
public BoolStorage runUnaryMap(
AbstractLongStorage storage, MapOperationProblemAggregator problemAggregator) {
BitSet isInfinite = new BitSet();
return new BoolStorage(isInfinite, storage.getIsMissing(), storage.size(), false);
}
})
.add(new LongIsInOp());
return ops;
}
@ -222,4 +165,12 @@ public abstract class AbstractLongStorage extends NumericStorage<Long> {
* <p>Ideally it should avoid copying the data, if it's possible.
*/
public abstract AbstractLongStorage widen(IntegerType widerType);
@Override
public long get(long index) throws ValueIsNothingException {
if (isNothing(index)) {
throw new ValueIsNothingException(index);
}
return getItem((int) index);
}
}

View File

@ -6,8 +6,6 @@ import java.util.List;
import org.enso.table.data.column.builder.NumericBuilder;
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
import org.enso.table.data.column.operation.map.MapOperationStorage;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.numeric.DoubleLongMapOpWithSpecialNumericHandling;
import org.enso.table.data.column.operation.map.numeric.DoubleRoundOp;
import org.enso.table.data.column.operation.map.numeric.arithmetic.AddOp;
import org.enso.table.data.column.operation.map.numeric.arithmetic.DivideOp;
@ -23,6 +21,7 @@ import org.enso.table.data.column.operation.map.numeric.comparisons.LessOrEqualC
import org.enso.table.data.column.operation.map.numeric.helpers.DoubleArrayAdapter;
import org.enso.table.data.column.operation.map.numeric.isin.DoubleIsInOp;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.ColumnStorageWithNothingMap;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.FloatType;
import org.enso.table.data.column.storage.type.IntegerType;
@ -35,7 +34,8 @@ import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.Value;
/** A column containing floating point numbers. */
public final class DoubleStorage extends NumericStorage<Double> implements DoubleArrayAdapter {
public final class DoubleStorage extends NumericStorage<Double>
implements DoubleArrayAdapter, ColumnStorageWithNothingMap {
private final long[] data;
private final BitSet isMissing;
private final int size;
@ -88,17 +88,6 @@ public final class DoubleStorage extends NumericStorage<Double> implements Doubl
return isMissing.get(idx) ? null : Double.longBitsToDouble(data[idx]);
}
@Override
public boolean isUnaryOpVectorized(String name) {
return ops.isSupportedUnary(name);
}
@Override
public Storage<?> runVectorizedUnaryMap(
String name, MapOperationProblemAggregator problemAggregator) {
return ops.runUnaryMap(name, this, problemAggregator);
}
/**
* @inheritDoc
*/
@ -309,75 +298,12 @@ public final class DoubleStorage extends NumericStorage<Double> implements Doubl
.add(new DivideOp<>())
.add(new ModOp<>())
.add(new PowerOp<>())
.add(
new DoubleLongMapOpWithSpecialNumericHandling(Maps.TRUNCATE) {
@Override
protected long doOperation(double a) {
return (long) a;
}
})
.add(
new DoubleLongMapOpWithSpecialNumericHandling(Maps.CEIL) {
@Override
protected long doOperation(double a) {
return (long) Math.ceil(a);
}
})
.add(
new DoubleLongMapOpWithSpecialNumericHandling(Maps.FLOOR) {
@Override
protected long doOperation(double a) {
return (long) Math.floor(a);
}
})
.add(new DoubleRoundOp(Maps.ROUND))
.add(new LessComparison<>())
.add(new LessOrEqualComparison<>())
.add(new EqualsComparison<>())
.add(new GreaterOrEqualComparison<>())
.add(new GreaterComparison<>())
.add(
new UnaryMapOperation<>(Maps.IS_NOTHING) {
@Override
public BoolStorage runUnaryMap(
DoubleStorage storage, MapOperationProblemAggregator problemAggregator) {
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);
}
})
.add(
new UnaryMapOperation<>(Maps.IS_NAN) {
@Override
public BoolStorage runUnaryMap(
DoubleStorage storage, MapOperationProblemAggregator problemAggregator) {
BitSet nans = new BitSet();
Context context = Context.getCurrent();
for (int i = 0; i < storage.size; i++) {
if (!storage.isNa(i) && Double.isNaN(storage.getItemAsDouble(i))) {
nans.set(i);
}
context.safepoint();
}
return new BoolStorage(nans, storage.isMissing, storage.size, false);
}
})
.add(
new UnaryMapOperation<>(Maps.IS_INFINITE) {
@Override
public BoolStorage runUnaryMap(
DoubleStorage storage, MapOperationProblemAggregator problemAggregator) {
BitSet infintes = new BitSet();
Context context = Context.getCurrent();
for (int i = 0; i < storage.size; i++) {
if (!storage.isNa(i) && Double.isInfinite(storage.getItemAsDouble(i))) {
infintes.set(i);
}
context.safepoint();
}
return new BoolStorage(infintes, storage.isMissing, storage.size, false);
}
})
.add(new DoubleIsInOp());
return ops;
}
@ -484,4 +410,9 @@ public final class DoubleStorage extends NumericStorage<Double> implements Doubl
// And rely on its shrinking logic.
return longAdapter.inferPreciseTypeShrunk();
}
@Override
public BitSet getIsNothingMap() {
return isMissing;
}
}

View File

@ -6,6 +6,7 @@ import java.util.List;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.builder.BigIntegerBuilder;
import org.enso.table.data.column.builder.NumericBuilder;
import org.enso.table.data.column.storage.ColumnStorageWithNothingMap;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.column.storage.type.StorageType;
@ -17,7 +18,7 @@ import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.Value;
/** A column storing 64-bit integers. */
public final class LongStorage extends AbstractLongStorage {
public final class LongStorage extends AbstractLongStorage implements ColumnStorageWithNothingMap {
// TODO [RW] at some point we will want to add separate storage classes for byte, short and int,
// for more compact storage and more efficient handling of smaller integers; for now we will be
// handling this just by checking the bounds
@ -259,4 +260,9 @@ public final class LongStorage extends AbstractLongStorage {
assert widerType.fits(type);
return new LongStorage(data, size, isMissing, widerType);
}
@Override
public BitSet getIsNothingMap() {
return isMissing;
}
}

View File

@ -2,4 +2,19 @@ package org.enso.table.data.column.storage.type;
public record AnyObjectType() implements StorageType {
public static final AnyObjectType INSTANCE = new AnyObjectType();
@Override
public boolean isNumeric() {
return false;
}
@Override
public boolean hasDate() {
return false;
}
@Override
public boolean hasTime() {
return false;
}
}

View File

@ -2,4 +2,19 @@ package org.enso.table.data.column.storage.type;
public record BigIntegerType() implements StorageType {
public static final BigIntegerType INSTANCE = new BigIntegerType();
@Override
public boolean isNumeric() {
return true;
}
@Override
public boolean hasDate() {
return false;
}
@Override
public boolean hasTime() {
return false;
}
}

View File

@ -2,4 +2,19 @@ package org.enso.table.data.column.storage.type;
public record BooleanType() implements StorageType {
public static final BooleanType INSTANCE = new BooleanType();
@Override
public boolean isNumeric() {
return false;
}
@Override
public boolean hasDate() {
return false;
}
@Override
public boolean hasTime() {
return false;
}
}

View File

@ -2,4 +2,19 @@ package org.enso.table.data.column.storage.type;
public record DateTimeType() implements StorageType {
public static final DateTimeType INSTANCE = new DateTimeType();
@Override
public boolean isNumeric() {
return false;
}
@Override
public boolean hasDate() {
return true;
}
@Override
public boolean hasTime() {
return true;
}
}

View File

@ -2,4 +2,19 @@ package org.enso.table.data.column.storage.type;
public record DateType() implements StorageType {
public static final DateType INSTANCE = new DateType();
@Override
public boolean isNumeric() {
return false;
}
@Override
public boolean hasDate() {
return true;
}
@Override
public boolean hasTime() {
return false;
}
}

View File

@ -8,4 +8,19 @@ public record FloatType(Bits bits) implements StorageType {
throw new IllegalArgumentException("Only 64-bit floats are currently supported.");
}
}
@Override
public boolean isNumeric() {
return true;
}
@Override
public boolean hasDate() {
return false;
}
@Override
public boolean hasTime() {
return false;
}
}

View File

@ -17,6 +17,21 @@ public record IntegerType(Bits bits) implements StorageType {
};
}
@Override
public boolean isNumeric() {
return true;
}
@Override
public boolean hasDate() {
return false;
}
@Override
public boolean hasTime() {
return false;
}
public long getMaxValue() {
return switch (bits) {
case BITS_8 -> Byte.MAX_VALUE;

View File

@ -50,4 +50,19 @@ public sealed interface StorageType
default -> AnyObjectType.INSTANCE;
};
}
/**
* @return true if the storage type is numeric.
*/
boolean isNumeric();
/**
* @return true if the storage type has a date part.
*/
boolean hasDate();
/**
* @return true if the storage type has a time part.
*/
boolean hasTime();
}

View File

@ -10,6 +10,21 @@ public record TextType(long maxLength, boolean fixedLength) implements StorageTy
}
}
@Override
public boolean isNumeric() {
return false;
}
@Override
public boolean hasDate() {
return false;
}
@Override
public boolean hasTime() {
return false;
}
public static final TextType VARIABLE_LENGTH = new TextType(-1, false);
public static TextType fixedLength(long length) {

View File

@ -2,4 +2,19 @@ package org.enso.table.data.column.storage.type;
public record TimeOfDayType() implements StorageType {
public static final TimeOfDayType INSTANCE = new TimeOfDayType();
@Override
public boolean isNumeric() {
return false;
}
@Override
public boolean hasDate() {
return false;
}
@Override
public boolean hasTime() {
return true;
}
}

View File

@ -60,22 +60,11 @@ public class ExplodingStorage extends Storage<Long> {
return getItem(idx);
}
@Override
public boolean isUnaryOpVectorized(String name) {
return false;
}
@Override
public boolean isBinaryOpVectorized(String name) {
return false;
}
@Override
public Storage<?> runVectorizedUnaryMap(
String name, MapOperationProblemAggregator problemAggregator) {
return null;
}
@Override
public Storage<?> runVectorizedBinaryMap(
String name, Object argument, MapOperationProblemAggregator problemAggregator) {