mirror of
https://github.com/enso-org/enso.git
synced 2025-01-03 19:21:54 +03:00
Expanding Derived Columns and Expression Syntax (#3782)
- Added expression ANTLR4 grammar and sbt based build. - Added expression support to `set` and `filter` on the Database and InMemory `Table`. - Added expression support to `aggregate` on the Database and InMemory `Table`. - Removed old aggregate functions (`sum`, `max`, `min` and `mean`) from `Column` types. - Adjusted database `Column` `+` operator to do concatenation (`||`) when text types. - Added power operator `^` to both `Column` types. - Adjust `iif` to allow for columns to be passed for `when_true` and `when_false` parameters. - Added `is_present` to database `Column` type. - Added `coalesce`, `min` and `max` functions to both `Column` types performing row based operation. - Added support for `Date`, `Time_Of_Day` and `Date_Time` constants in database. - Added `read` method to InMemory `Column` returning `self` (or a slice). # Important Notes - Moved approximate type computation to `SQL_Type`. - Fixed issue in `LongNumericOp` where it was always casting to a double. - Removed `head` from InMemory Table (still has `first` method).
This commit is contained in:
parent
c2633bc137
commit
45276b243d
@ -230,6 +230,8 @@
|
||||
- [Implemented `Table.rows` giving access to a vector of rows.][3827]
|
||||
- [Define Enso epoch start as 15th October 1582][3804]
|
||||
- [Implemented `Period` type][3818]
|
||||
- [Implemented new functions on Column and added expression syntax support to
|
||||
create derived Columns.][3782]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -368,6 +370,7 @@
|
||||
[3818]: https://github.com/enso-org/enso/pull/3818
|
||||
[3776]: https://github.com/enso-org/enso/pull/3776
|
||||
[3836]: https://github.com/enso-org/enso/pull/3836
|
||||
[3782]: https://github.com/enso-org/enso/pull/3782
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
11
build.sbt
11
build.sbt
@ -1872,17 +1872,26 @@ lazy val `std-base` = project
|
||||
|
||||
lazy val `std-table` = project
|
||||
.in(file("std-bits") / "table")
|
||||
.enablePlugins(Antlr4Plugin)
|
||||
.settings(
|
||||
frgaalJavaCompilerSetting,
|
||||
autoScalaLibrary := false,
|
||||
Compile / packageBin / artifactPath :=
|
||||
`table-polyglot-root` / "std-table.jar",
|
||||
Antlr4 / antlr4PackageName := Some("org.enso.table.expressions"),
|
||||
Antlr4 / antlr4Version := "4.10.1",
|
||||
Antlr4 / antlr4GenVisitor := true,
|
||||
Antlr4 / antlr4TreatWarningsAsErrors := true,
|
||||
Compile / managedSourceDirectories += {
|
||||
(Antlr4 / sourceManaged).value / "main" / "antlr4"
|
||||
},
|
||||
libraryDependencies ++= Seq(
|
||||
"org.graalvm.truffle" % "truffle-api" % graalVersion % "provided",
|
||||
"org.netbeans.api" % "org-openide-util-lookup" % netbeansApiVersion % "provided",
|
||||
"com.univocity" % "univocity-parsers" % "2.9.1",
|
||||
"org.apache.poi" % "poi-ooxml" % "5.2.2",
|
||||
"org.apache.xmlbeans" % "xmlbeans" % "5.1.0"
|
||||
"org.apache.xmlbeans" % "xmlbeans" % "5.1.0",
|
||||
"org.antlr" % "antlr4-runtime" % "4.10.1"
|
||||
),
|
||||
Compile / packageBin := Def.task {
|
||||
val result = (Compile / packageBin).value
|
||||
|
@ -8,7 +8,7 @@ import project.Data.SQL_Statement.SQL_Statement
|
||||
import project.Data.SQL_Type.SQL_Type
|
||||
import project.Data.Table as Database_Table
|
||||
import project.Internal.IR.Context.Context
|
||||
import project.Internal.IR.Expression.Expression
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.Query.Query
|
||||
|
||||
from project.Internal.Result_Set import read_column, result_set_to_table
|
||||
@ -182,7 +182,7 @@ type Connection
|
||||
|
||||
db_table = if create_table.is_error then create_table else self.query (SQL_Query.Table_Name name)
|
||||
if db_table.is_error.not then
|
||||
pairs = db_table.internal_columns.map col->[col.name, Expression.Constant col.sql_type Nothing]
|
||||
pairs = db_table.internal_columns.map col->[col.name, SQL_Expression.Constant col.sql_type Nothing]
|
||||
insert_query = self.dialect.generate_sql <| Query.Insert name pairs
|
||||
insert_template = insert_query.prepare.first
|
||||
self.jdbc_connection.load_table insert_template db_table table batch_size
|
||||
|
@ -9,7 +9,7 @@ import project.Data.SQL_Type.SQL_Type
|
||||
import project.Data.Table.Integrity_Error
|
||||
import project.Internal.Helpers
|
||||
import project.Internal.IR.Context.Context
|
||||
import project.Internal.IR.Expression.Expression
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.Internal_Column.Internal_Column
|
||||
import project.Internal.IR.Query.Query
|
||||
|
||||
@ -36,7 +36,7 @@ type Column
|
||||
which they come. Combined expressions must come from the same context -
|
||||
they must both have the same filtering, grouping etc. rules applied to be
|
||||
able to be combined.
|
||||
Value name:Text connection:Connection sql_type:SQL_Type expression:Expression context:Context
|
||||
Value name:Text connection:Connection sql_type:SQL_Type expression:SQL_Expression context:Context
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -75,7 +75,7 @@ type Column
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns a materialized dataframe containing rows of this table.
|
||||
Returns a materialized column containing rows of this column.
|
||||
|
||||
Arguments:
|
||||
- max_rows: specifies a maximum amount of rows to fetch; if not set, all
|
||||
@ -135,12 +135,13 @@ type Column
|
||||
Error.throw <| Unsupported_Database_Operation_Error_Data "Cannot use columns coming from different contexts in one expression without a join."
|
||||
constant ->
|
||||
actual_operand_type = operand_type.if_nothing self.sql_type
|
||||
Expression.Constant actual_operand_type constant
|
||||
SQL_Expression.Constant actual_operand_type constant
|
||||
|
||||
actual_operand_types = operand_types.if_nothing (Vector.fill operands.length Nothing)
|
||||
expressions = operands.zip actual_operand_types prepare_operand
|
||||
|
||||
actual_new_type = new_type.if_nothing self.sql_type
|
||||
new_expr = Expression.Operation op_kind ([self.expression] + expressions)
|
||||
new_expr = SQL_Expression.Operation op_kind ([self.expression] + expressions)
|
||||
Column.Value self.name self.connection actual_new_type new_expr self.context
|
||||
|
||||
## PRIVATE
|
||||
@ -195,41 +196,6 @@ type Column
|
||||
join self other on=Nothing drop_unmatched=False left_suffix='_left' right_suffix='_right' =
|
||||
self.to_table.join other on drop_unmatched left_suffix right_suffix
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Sums the values in this column.
|
||||
sum : Any
|
||||
sum self = self.compute_aggregate "SUM"
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Computes the maximum element of this column.
|
||||
max : Any
|
||||
max self = self.compute_aggregate "MAX"
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Computes the minimum element of this column.
|
||||
min : Any
|
||||
min self = self.compute_aggregate "MIN"
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Computes the mean of non-missing elements of this column.
|
||||
mean : Any
|
||||
mean self = self.compute_aggregate "AVG"
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Computes an aggregate operator.
|
||||
|
||||
Arguments:
|
||||
- op_name: The name of the operator to compute.
|
||||
compute_aggregate : Text
|
||||
compute_aggregate self op_name =
|
||||
agg = make_aggregate self op_name
|
||||
agg.to_vector . at 0
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns the length of this column.
|
||||
@ -355,7 +321,12 @@ type Column
|
||||
of `self`. If `other` is a column, the operation is performed pairwise
|
||||
between corresponding elements of `self` and `other`.
|
||||
+ : Column | Any -> Column
|
||||
+ self other = self.make_binary_op "+" other
|
||||
+ self other =
|
||||
## TODO: Revisit this as part of the column value type work.
|
||||
op = case other of
|
||||
_ : Column -> if self.sql_type.is_definitely_numeric || other.sql_type.is_definitely_numeric then 'ADD_NUMBER' else 'ADD_TEXT'
|
||||
_ -> if self.sql_type.is_definitely_numeric then 'ADD_NUMBER' else 'ADD_TEXT'
|
||||
self.make_binary_op op other
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -422,6 +393,34 @@ type Column
|
||||
% : Column | Any -> Column
|
||||
% self other = self.make_binary_op "%" other
|
||||
|
||||
## ALIAS Power
|
||||
|
||||
Element-wise raising to the power.
|
||||
|
||||
Arguments:
|
||||
- other: The exponent to raise `self` by. If `other` is a column, the
|
||||
power operation is performed pairwise between corresponding elements
|
||||
of `self` and `other`.
|
||||
|
||||
Returns a column containing the result of raising each element of `self`
|
||||
by `other`.
|
||||
|
||||
> Example
|
||||
Squares the elements of one column.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_div = Examples.decimal_column ^ 2
|
||||
|
||||
> Example
|
||||
Raises each value in a column by the value in another column.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_div = Examples.decimal_column ^ Examples.integer_column
|
||||
^ : Column | Any -> Column
|
||||
^ self other = self.make_binary_op '^' other
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Element-wise boolean conjunction.
|
||||
@ -456,12 +455,14 @@ type Column
|
||||
not : Column
|
||||
not self = self.make_unary_op "NOT"
|
||||
|
||||
## UNSTABLE
|
||||
## ALIAS IF
|
||||
|
||||
Replaces `True` values with `when_true` and `False` with `when_false`.
|
||||
Only meant for use with boolean columns.
|
||||
|
||||
TODO: Currently `when_true` and `when_false` need to be a single value.
|
||||
In the future the API will also support row-based IIF if they are columns.
|
||||
Arguments:
|
||||
- when_true: value or column when `self` is `True`.
|
||||
- when_false: value or column when `self` is `False`.
|
||||
iif : Any -> Any -> Column
|
||||
iif self when_true when_false =
|
||||
## TODO we should adjust new_type based on types when_true and
|
||||
@ -473,17 +474,68 @@ type Column
|
||||
when_false being either columns or regular values and rely on a
|
||||
mapping of Enso base types to SQL types, and a rule for extracting a
|
||||
common type.
|
||||
approximate_type x = case x of
|
||||
_ : Integer -> SQL_Type.integer
|
||||
_ : Decimal -> SQL_Type.real
|
||||
_ : Text -> SQL_Type.text
|
||||
_ : Boolean -> SQL_Type.boolean
|
||||
_ -> Error.throw (Illegal_Argument_Error_Data "Unsupported type.")
|
||||
left_type = approximate_type when_true
|
||||
right_type = approximate_type when_false
|
||||
left_type = get_approximate_type when_true self.sql_type
|
||||
right_type = get_approximate_type when_false self.sql_type
|
||||
if left_type != right_type then Error.throw (Illegal_Argument_Error_Data "when_true and when_false types do not match") else
|
||||
self.make_op "IIF" [when_true, when_false] new_type=left_type
|
||||
|
||||
## Returns a column of first non-`Nothing` value on each row of `self` and
|
||||
`values` list.
|
||||
|
||||
Arguments:
|
||||
- values: list of columns or values to coalesce with `self`.
|
||||
|
||||
> Example
|
||||
Get the first non-`Nothing` value in two columns.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_coalesce = Examples.decimal_column.coalesce Examples.integer_column
|
||||
coalesce : (Any | Vector Any) -> Column
|
||||
coalesce self values = case values of
|
||||
_ : Vector.Vector ->
|
||||
if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument_Error_Data "self and values types do not all match") else
|
||||
self.make_op "COALESCE" values new_type=self.sql_type
|
||||
_ : Array -> self.coalesce (Vector.from_polyglot_array values)
|
||||
_ -> self.coalesce [values]
|
||||
|
||||
## Returns a column of minimum on each row of `self` and `values` list.
|
||||
|
||||
Arguments:
|
||||
- values: list of columns or values to minimum with `self`.
|
||||
|
||||
> Example
|
||||
Get the minimum value in two columns.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_min = Examples.decimal_column.min Examples.integer_column
|
||||
min : (Any | Vector Any) -> Column
|
||||
min self values = case values of
|
||||
_ : Vector.Vector ->
|
||||
if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument_Error_Data "self and values types do not all match") else
|
||||
self.make_op "ROW_MIN" values new_type=self.sql_type
|
||||
_ : Array -> self.min (Vector.from_polyglot_array values)
|
||||
_ -> self.min [values]
|
||||
|
||||
## Returns a column of maximum on each row of `self` and `values` list.
|
||||
|
||||
Arguments:
|
||||
- values: list of columns or values to maximum with `self`.
|
||||
|
||||
> Example
|
||||
Get the maximum value in two columns.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_max = Examples.decimal_column.max Examples.integer_column
|
||||
max : (Any | Vector Any) -> Column
|
||||
max self values = case values of
|
||||
_ : Vector.Vector ->
|
||||
if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument_Error_Data "self and values types do not all match") else
|
||||
self.make_op "ROW_MAX" values new_type=self.sql_type
|
||||
_ : Array -> self.max (Vector.from_polyglot_array values)
|
||||
_ -> self.max [values]
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -504,6 +556,18 @@ type Column
|
||||
is_empty : Column
|
||||
is_empty self = self.make_unary_op "IS_EMPTY" new_type=SQL_Type.boolean
|
||||
|
||||
## Returns a column of booleans, with `True` items at the positions where
|
||||
this column does not contain a `Nothing`.
|
||||
|
||||
> Example
|
||||
Check a column for present values.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_is_present = Examples.decimal_column.is_present
|
||||
is_present : Column
|
||||
is_present self = self.is_missing.not
|
||||
|
||||
## PRIVATE
|
||||
Returns a column of booleans with `True` at the positions where this
|
||||
column contains a blank value.
|
||||
@ -667,6 +731,7 @@ type Column
|
||||
example_contains = Examples.text_column_1.is_in [1, 2, 5]
|
||||
is_in : Column | Vector -> Column
|
||||
is_in self vector = case vector of
|
||||
_ : Array -> self.is_in (Vector.from_polyglot_array vector)
|
||||
_ : Vector.Vector ->
|
||||
## This is slightly hacky - we don't provide operand types as we want to
|
||||
allow any type to get through and currently we do not have a mapping
|
||||
@ -695,18 +760,17 @@ type Column
|
||||
column : Column -> if Helpers.check_connection self column . not then (Error.throw (Integrity_Error.Error "Column "+column.name)) else
|
||||
## We slightly abuse the expression syntax putting a Query as one of
|
||||
the sub-expressions. Once type-checking is added, we may need to
|
||||
amend the signature of `Expression.Operation` to account for
|
||||
amend the signature of `SQL_Expression.Operation` to account for
|
||||
this. Also, unfortunately as `NULL IN (...)` is `NULL` in SQL, we
|
||||
need to do separate handling of nulls - we check if the target
|
||||
column has any nulls and if so, we will do `IS NULL` checks for
|
||||
our columns too. That is because, we want the containment check
|
||||
for `NULL` to work the same way as for any other value.
|
||||
in_subquery = Query.Select [Pair_Data column.name column.expression] column.context
|
||||
has_nulls_expression = Expression.Operation "BOOL_OR" [column.is_missing.expression]
|
||||
has_nulls_expression = SQL_Expression.Operation "BOOL_OR" [column.is_missing.expression]
|
||||
has_nulls_subquery = Query.Select [Pair_Data "has_nulls" has_nulls_expression] column.context
|
||||
new_type = SQL_Type.boolean
|
||||
new_expr = Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery, has_nulls_subquery]
|
||||
Column.Value self.name self.connection new_type new_expr self.context
|
||||
new_expr = SQL_Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery, has_nulls_subquery]
|
||||
Column.Value self.name self.connection SQL_Type.boolean new_expr self.context
|
||||
|
||||
## PRIVATE
|
||||
as_internal : Internal_Column
|
||||
@ -716,48 +780,14 @@ type Column
|
||||
to_text : Text
|
||||
to_text self = "(Database Column "+self.name.to_text+")"
|
||||
|
||||
## PRIVATE
|
||||
|
||||
A helper method for creating an aggregated column by applying some
|
||||
operation.
|
||||
|
||||
Arguments:
|
||||
- column: The column to aggregate.
|
||||
- operation: The name of the aggregation operation.
|
||||
- name_suffix: The suffix to apply to the name of the aggregate column.
|
||||
- new_type: The SQL type of the result column.
|
||||
make_aggregate : Column -> Text -> Text -> SQL_Type -> Column
|
||||
make_aggregate column operation name_suffix="_agg" new_type=Nothing =
|
||||
actual_new_type = new_type.if_nothing column.sql_type
|
||||
expr = Expression.Operation operation [column.expression]
|
||||
case Helpers.ensure_name_is_sane name_suffix of
|
||||
True ->
|
||||
new_name = column.name + name_suffix
|
||||
lift_aggregate new_name column.connection actual_new_type expr column.context
|
||||
## PRIVATE
|
||||
Helper for the expression to tell it which functions needs a Vector.
|
||||
var_args_functions : Array
|
||||
var_args_functions = ['is_in', 'coalesce', 'min', 'max']
|
||||
|
||||
## PRIVATE
|
||||
|
||||
A helper function that lifts an aggregate query into a subquery to ensure
|
||||
correctness of further processing.
|
||||
|
||||
Argument:
|
||||
- new_name: The new name for the aggregate column.
|
||||
- connection: The connection with which the aggregate is associated.
|
||||
- expected_type: The expected SQL type of the column.
|
||||
- expr: The expression for the query.
|
||||
- context: The context in which the query exists.
|
||||
lift_aggregate : Text -> Connection -> SQL_Type -> Expression -> Context -> Column
|
||||
lift_aggregate new_name connection expected_type expr context =
|
||||
# TODO [RW] This is a simple workaround for #1643 - we always wrap the
|
||||
# aggregate into a subquery, thus making it safe to use it everywhere. A
|
||||
# more complex solution may be adopted at some point.
|
||||
ixes = freshen_columns [new_name] context.meta_index
|
||||
col = Internal_Column.Value new_name expected_type expr
|
||||
setup = context.as_subquery new_name+"_sub" [[col], ixes]
|
||||
subquery = setup.first
|
||||
cols = setup.second
|
||||
new_col = cols.first.first
|
||||
new_ixes = cols.second
|
||||
new_ctx = Context.for_subquery subquery . set_index new_ixes
|
||||
Column.Value new_name connection new_col.sql_type new_col.expression new_ctx
|
||||
|
||||
TODO: Revisit this as part of the column value type work.
|
||||
get_approximate_type value default = case value of
|
||||
_ : Column -> value.sql_type
|
||||
Nothing -> default
|
||||
_ -> SQL_Type.approximate_type value
|
@ -43,8 +43,12 @@ type SQL_Statement
|
||||
strings = self.internal_fragments . map <| case _ of
|
||||
SQL_Fragment.Code_Part code -> code
|
||||
# TODO at some point we may try more sophisticated serialization based on data type
|
||||
# TODO #183734954: date and time formatting is limited and will lose sub-second precision and timezone offset.
|
||||
SQL_Fragment.Interpolation _ obj -> case obj of
|
||||
Number -> obj.to_text
|
||||
Date_Time.Date_Time -> "'" + (obj.format "yyyy-MM-dd HH:mm:ss") + "'"
|
||||
Date.Date -> "'" + (obj.format "yyyy-MM-dd") + "'"
|
||||
Time_Of_Day.Time_Of_Day -> "'" + (obj.format "HH:mm:ss") + "'"
|
||||
_ -> "'" + obj.to_text.replace "'" "''" + "'"
|
||||
strings.join ""
|
||||
|
||||
|
@ -49,12 +49,12 @@ type SQL_Type
|
||||
numeric : SQL_Type
|
||||
numeric = SQL_Type.Value Types.NUMERIC "NUMERIC"
|
||||
|
||||
## The SQL type representing one of the suppported textual types.
|
||||
## The SQL type representing one of the supported textual types.
|
||||
varchar : SQL_Type
|
||||
varchar = SQL_Type.Value Types.VARCHAR "VARCHAR"
|
||||
|
||||
## UNSTABLE
|
||||
The SQL type representing one of the suppported textual types.
|
||||
The SQL type representing one of the supported textual types.
|
||||
|
||||
It seems that JDBC treats the `TEXT` and `VARCHAR` types as interchangeable.
|
||||
text : SQL_Type
|
||||
@ -64,6 +64,40 @@ type SQL_Type
|
||||
blob : SQL_Type
|
||||
blob = SQL_Type.Value Types.BLOB "BLOB"
|
||||
|
||||
## The SQL type representing a date type.
|
||||
date : SQL_Type
|
||||
date = SQL_Type.Value Types.DATE "DATE"
|
||||
|
||||
## The SQL type representing a time type.
|
||||
time : SQL_Type
|
||||
time = SQL_Type.Value Types.TIME "TIME"
|
||||
|
||||
## The SQL type representing a time type.
|
||||
date_time : SQL_Type
|
||||
date_time = SQL_Type.Value Types.TIMESTAMP_WITH_TIMEZONE "TIMESTAMP"
|
||||
|
||||
## ADVANCED
|
||||
Given an Enso value gets the approximate SQL type.
|
||||
approximate_type : Any -> SQL_Type ! Illegal_Argument_Error_Data
|
||||
approximate_type value = case value of
|
||||
_ : Boolean -> SQL_Type.boolean
|
||||
_ : Integer -> SQL_Type.integer
|
||||
_ : Decimal -> SQL_Type.double
|
||||
_ : Text -> SQL_Type.varchar
|
||||
_ : Date.Date -> SQL_Type.date
|
||||
_ : Time_Of_Day.Time_Of_Day -> SQL_Type.time_of_day
|
||||
_ : Date_Time.Date_Time -> SQL_Type.date_time
|
||||
_ -> Error.throw (Illegal_Argument_Error_Data "Unsupported type.")
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Returns True if this type represents an integer or a double.
|
||||
|
||||
It only handles the standard types so it may return false negatives for
|
||||
non-standard ones.
|
||||
is_definitely_numeric : Boolean
|
||||
is_definitely_numeric self = self.is_definitely_double || self.is_definitely_integer
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Returns True if this type represents an integer.
|
||||
|
@ -2,6 +2,8 @@ from Standard.Base import all
|
||||
from Standard.Base.Error.Problem_Behavior import Report_Warning
|
||||
|
||||
from Standard.Table import Auto_Detect, Aggregate_Column, Data_Formatter, Column_Name_Mapping, Column_Selector, Sort_Column_Selector, Sort_Column, Match_Columns, Position
|
||||
import Standard.Table.Data.Expression.Expression
|
||||
import Standard.Table.Data.Expression.Expression_Error
|
||||
import Standard.Table.Data.Table.Table as Materialized_Table
|
||||
import Standard.Table.Internal.Java_Exports
|
||||
import Standard.Table.Internal.Table_Helpers
|
||||
@ -18,7 +20,7 @@ import project.Data.SQL_Type.SQL_Type
|
||||
import project.Internal.Helpers
|
||||
import project.Internal.Aggregate_Helper
|
||||
import project.Internal.IR.Context.Context
|
||||
import project.Internal.IR.Expression.Expression
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.From_Spec.From_Spec
|
||||
import project.Internal.IR.Internal_Column.Internal_Column
|
||||
import project.Internal.IR.Join_Kind.Join_Kind
|
||||
@ -27,6 +29,7 @@ import project.Internal.IR.Query.Query
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation_Error_Data
|
||||
|
||||
polyglot java import java.sql.JDBCType
|
||||
polyglot java import java.util.UUID
|
||||
|
||||
## Represents a column-oriented table data structure backed by a database.
|
||||
type Table
|
||||
@ -396,13 +399,20 @@ type Table
|
||||
on_problems.handle_errors fallback=self.with_no_rows <|
|
||||
mask (make_filter_column column filter)
|
||||
_ : Function -> Error.throw (Unsupported_Database_Operation_Error_Data "Filtering with a custom predicate is not supported in the database.")
|
||||
_ : Text ->
|
||||
table_at = self.at column
|
||||
if table_at.is_error.not then self.filter table_at filter on_problems else
|
||||
expression = self.evaluate column
|
||||
if expression.is_error.not then self.filter expression filter on_problems else
|
||||
pick_error = expression.catch Expression_Error.Syntax_Error (_->table_at)
|
||||
on_problems.handle_errors pick_error fallback=self
|
||||
_ -> case on_problems.handle_errors (self.at column) fallback=Nothing of
|
||||
Nothing -> self
|
||||
resolved_column -> self.filter resolved_column filter on_problems
|
||||
|
||||
## PRIVATE
|
||||
with_no_rows self =
|
||||
false_expression = Expression.Operation "=" [Expression.Constant SQL_Type.integer 1, Expression.Constant SQL_Type.integer 2]
|
||||
false_expression = SQL_Expression.Operation "=" [SQL_Expression.Constant SQL_Type.integer 1, SQL_Expression.Constant SQL_Type.integer 2]
|
||||
new_filters = self.context.where_filters + [false_expression]
|
||||
new_ctx = self.context.set_where_filters new_filters
|
||||
self.updated_context new_ctx
|
||||
@ -477,21 +487,35 @@ type Table
|
||||
|
||||
If a column with the given name already exists, it will be replaced.
|
||||
Otherwise a new column is added.
|
||||
set : Text -> Column -> Table
|
||||
set self name column = case Helpers.ensure_name_is_sane name of
|
||||
True ->
|
||||
is_used_in_index = self.context.meta_index.exists i-> i.name == name
|
||||
case is_used_in_index of
|
||||
True -> Error.throw <| Illegal_State_Error_Data "Cannot override column "+name+", because it is used as an index. Remove the index or use a different name."
|
||||
False ->
|
||||
new_col = Internal_Column.Value name column.sql_type column.expression
|
||||
replace = self.internal_columns.exists (c -> c.name == name)
|
||||
case replace of
|
||||
True ->
|
||||
new_cols = self.internal_columns.map (c -> if c.name == name then new_col else c)
|
||||
self.updated_columns new_cols
|
||||
False ->
|
||||
self.updated_columns (self.internal_columns + [new_col])
|
||||
set : Text -> Column | Text -> Problem_Behavior -> Table
|
||||
set self name column on_problems=Report_Warning = on_problems.handle_errors fallback=self <|
|
||||
case Helpers.ensure_name_is_sane name of
|
||||
True ->
|
||||
is_used_in_index = self.context.meta_index.exists i-> i.name == name
|
||||
case is_used_in_index of
|
||||
True -> Error.throw <| Illegal_State_Error_Data "Cannot override column "+name+", because it is used as an index. Remove the index or use a different name."
|
||||
False ->
|
||||
resolved = case column of
|
||||
_ : Text -> self.evaluate column
|
||||
_ -> column
|
||||
new_col = Internal_Column.Value name resolved.sql_type resolved.expression
|
||||
replace = self.internal_columns.exists (c -> c.name == name)
|
||||
case replace of
|
||||
True ->
|
||||
new_cols = self.internal_columns.map (c -> if c.name == name then new_col else c)
|
||||
self.updated_columns new_cols
|
||||
False ->
|
||||
self.updated_columns (self.internal_columns + [new_col])
|
||||
|
||||
## PRIVATE
|
||||
evaluate : Text -> Column
|
||||
evaluate self expression =
|
||||
get_column name = self.at name
|
||||
make_constant value =
|
||||
new_type = SQL_Type.approximate_type value
|
||||
other = SQL_Expression.Constant new_type value
|
||||
Column.Value ("Constant_" + UUID.randomUUID.to_text) self.connection new_type other self.context
|
||||
Expression.evaluate expression get_column make_constant "Standard.Database.Data.Column" "Column" Column.var_args_functions
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -732,7 +756,7 @@ type Table
|
||||
new_columns = left_renamed_columns + right_renamed_columns
|
||||
|
||||
on_exprs = left_new_join_index.zip right_new_join_index l-> r->
|
||||
Expression.Operation "=" [l.expression, r.expression]
|
||||
SQL_Expression.Operation "=" [l.expression, r.expression]
|
||||
new_from = From_Spec.Join kind left_subquery right_subquery on_exprs
|
||||
new_limit = Nothing
|
||||
new_ctx = Context.Value new_from [] [] [] new_index new_limit
|
||||
@ -814,7 +838,7 @@ type Table
|
||||
## Returns the amount of rows in this table.
|
||||
row_count : Integer
|
||||
row_count self = if self.internal_columns.is_empty then 0 else
|
||||
expr = Expression.Operation "COUNT_ROWS" []
|
||||
expr = SQL_Expression.Operation "COUNT_ROWS" []
|
||||
column_name = "row_count"
|
||||
## We need to keep some column in the subquery which will determine if
|
||||
the query is performing regular selection or aggregation. To avoid
|
||||
@ -889,7 +913,7 @@ type Table
|
||||
setup = self.context.as_subquery self.name [self.internal_columns]
|
||||
new_ctx = Context.for_subquery setup.first
|
||||
new_columns = setup.second.first.map column->
|
||||
[column.name, Expression.Operation "COUNT" [column.expression]]
|
||||
[column.name, SQL_Expression.Operation "COUNT" [column.expression]]
|
||||
query = Query.Select new_columns new_ctx
|
||||
self.connection.dialect.generate_sql query
|
||||
count_table = self.connection.read_statement count_query
|
||||
@ -971,7 +995,7 @@ type Table
|
||||
_ -> Error.throw <| Illegal_State_Error_Data "Inserting can only be performed on tables as returned by `query`, any further processing is not allowed."
|
||||
# TODO [RW] before removing the PRIVATE tag, add a check that no bad stuff was done to the table as described above
|
||||
pairs = self.internal_columns.zip values col-> value->
|
||||
[col.name, Expression.Constant col.sql_type value]
|
||||
[col.name, SQL_Expression.Constant col.sql_type value]
|
||||
query = self.connection.dialect.generate_sql <| Query.Insert table_name pairs
|
||||
affected_rows = self.connection.execute_update query
|
||||
case affected_rows == 1 of
|
||||
@ -1071,7 +1095,7 @@ type Integrity_Error
|
||||
# make_table : Connection -> Text -> Vector [Text, SQL_Type] -> Context -> Table
|
||||
make_table : Connection -> Text -> Vector -> Context -> Table
|
||||
make_table connection table_name columns ctx =
|
||||
cols = columns.map (p -> Internal_Column.Value p.first p.second (Expression.Column table_name p.first))
|
||||
cols = columns.map (p -> Internal_Column.Value p.first p.second (SQL_Expression.Column table_name p.first))
|
||||
Table.Value table_name connection cols ctx
|
||||
|
||||
## PRIVATE
|
||||
|
@ -3,7 +3,7 @@ from Standard.Base import all hiding First, Last
|
||||
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
|
||||
|
||||
import project.Data.SQL_Type.SQL_Type
|
||||
import project.Internal.IR.Expression.Expression
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.Internal_Column.Internal_Column
|
||||
|
||||
from project.Errors import Unsupported_Database_Operation_Error_Data
|
||||
@ -23,48 +23,48 @@ make_aggregate_column table aggregate new_name =
|
||||
## PRIVATE
|
||||
Creates an Internal Representation of the expression that computes a
|
||||
requested statistic.
|
||||
make_expression : Aggregate_Column -> Dialect -> Expression
|
||||
make_expression : Aggregate_Column -> Dialect -> SQL_Expression
|
||||
make_expression aggregate dialect =
|
||||
is_non_empty_selector v = if v.is_nothing then False else v.columns.not_empty
|
||||
case aggregate of
|
||||
Group_By c _ -> c.expression
|
||||
Count _ -> Expression.Operation "COUNT_ROWS" []
|
||||
Count _ -> SQL_Expression.Operation "COUNT_ROWS" []
|
||||
Count_Distinct columns _ ignore_nothing -> if columns.is_empty then Error.throw (Illegal_Argument_Error_Data "Count_Distinct must have at least one column.") else
|
||||
case ignore_nothing of
|
||||
True -> Expression.Operation "COUNT_DISTINCT" (columns.map .expression)
|
||||
False -> Expression.Operation "COUNT_DISTINCT_INCLUDE_NULL" (columns.map .expression)
|
||||
Count_Not_Nothing c _ -> Expression.Operation "COUNT" [c.expression]
|
||||
Count_Nothing c _ -> Expression.Operation "COUNT_IS_NULL" [c.expression]
|
||||
Count_Not_Empty c _ -> Expression.Operation "COUNT_NOT_EMPTY" [c.expression]
|
||||
Count_Empty c _ -> Expression.Operation "COUNT_EMPTY" [c.expression]
|
||||
Percentile p c _ -> Expression.Operation "PERCENTILE" [Expression.Constant SQL_Type.double p, c.expression]
|
||||
Mode c _ -> Expression.Operation "MODE" [c.expression]
|
||||
True -> SQL_Expression.Operation "COUNT_DISTINCT" (columns.map .expression)
|
||||
False -> SQL_Expression.Operation "COUNT_DISTINCT_INCLUDE_NULL" (columns.map .expression)
|
||||
Count_Not_Nothing c _ -> SQL_Expression.Operation "COUNT" [c.expression]
|
||||
Count_Nothing c _ -> SQL_Expression.Operation "COUNT_IS_NULL" [c.expression]
|
||||
Count_Not_Empty c _ -> SQL_Expression.Operation "COUNT_NOT_EMPTY" [c.expression]
|
||||
Count_Empty c _ -> SQL_Expression.Operation "COUNT_EMPTY" [c.expression]
|
||||
Percentile p c _ -> SQL_Expression.Operation "PERCENTILE" [SQL_Expression.Constant SQL_Type.double p, c.expression]
|
||||
Mode c _ -> SQL_Expression.Operation "MODE" [c.expression]
|
||||
First c _ ignore_nothing order_by -> case is_non_empty_selector order_by of
|
||||
False -> Error.throw (Unsupported_Database_Operation_Error_Data "`First` aggregation requires at least one `order_by` column.")
|
||||
True ->
|
||||
order_bys = order_by.columns.map c-> dialect.prepare_order_descriptor c.column.as_internal c.direction Text_Ordering.Default
|
||||
case ignore_nothing of
|
||||
False -> Expression.Operation "FIRST" [c.expression]+order_bys
|
||||
True -> Expression.Operation "FIRST_NOT_NULL" [c.expression]+order_bys
|
||||
False -> SQL_Expression.Operation "FIRST" [c.expression]+order_bys
|
||||
True -> SQL_Expression.Operation "FIRST_NOT_NULL" [c.expression]+order_bys
|
||||
Last c _ ignore_nothing order_by -> case is_non_empty_selector order_by of
|
||||
False -> Error.throw (Unsupported_Database_Operation_Error_Data "`Last` aggregation requires at least one `order_by` column.")
|
||||
True ->
|
||||
order_bys = order_by.columns.map c-> dialect.prepare_order_descriptor c.column.as_internal c.direction Text_Ordering.Default
|
||||
case ignore_nothing of
|
||||
False -> Expression.Operation "LAST" [c.expression]+order_bys
|
||||
True -> Expression.Operation "LAST_NOT_NULL" [c.expression]+order_bys
|
||||
Maximum c _ -> Expression.Operation "MAX" [c.expression]
|
||||
Minimum c _ -> Expression.Operation "MIN" [c.expression]
|
||||
Shortest c _ -> Expression.Operation "SHORTEST" [c.expression]
|
||||
Longest c _ -> Expression.Operation "LONGEST" [c.expression]
|
||||
False -> SQL_Expression.Operation "LAST" [c.expression]+order_bys
|
||||
True -> SQL_Expression.Operation "LAST_NOT_NULL" [c.expression]+order_bys
|
||||
Maximum c _ -> SQL_Expression.Operation "MAX" [c.expression]
|
||||
Minimum c _ -> SQL_Expression.Operation "MIN" [c.expression]
|
||||
Shortest c _ -> SQL_Expression.Operation "SHORTEST" [c.expression]
|
||||
Longest c _ -> SQL_Expression.Operation "LONGEST" [c.expression]
|
||||
Standard_Deviation c _ population -> case population of
|
||||
True -> Expression.Operation "STDDEV_POP" [c.expression]
|
||||
False -> Expression.Operation "STDDEV_SAMP" [c.expression]
|
||||
True -> SQL_Expression.Operation "STDDEV_POP" [c.expression]
|
||||
False -> SQL_Expression.Operation "STDDEV_SAMP" [c.expression]
|
||||
Concatenate c _ separator prefix suffix quote_char ->
|
||||
base_args = [c.expression, Expression.Constant SQL_Type.text separator, Expression.Constant SQL_Type.text prefix, Expression.Constant SQL_Type.text suffix]
|
||||
base_args = [c.expression, SQL_Expression.Constant SQL_Type.text separator, SQL_Expression.Constant SQL_Type.text prefix, SQL_Expression.Constant SQL_Type.text suffix]
|
||||
case quote_char.is_empty of
|
||||
True -> Expression.Operation "CONCAT" base_args
|
||||
False -> Expression.Operation "CONCAT_QUOTE_IF_NEEDED" base_args+[Expression.Constant SQL_Type.text quote_char]
|
||||
Sum c _ -> Expression.Operation "SUM" [c.expression]
|
||||
Average c _ -> Expression.Operation "AVG" [c.expression]
|
||||
Median c _ -> Expression.Operation "MEDIAN" [c.expression]
|
||||
True -> SQL_Expression.Operation "CONCAT" base_args
|
||||
False -> SQL_Expression.Operation "CONCAT_QUOTE_IF_NEEDED" base_args+[SQL_Expression.Constant SQL_Type.text quote_char]
|
||||
Sum c _ -> SQL_Expression.Operation "SUM" [c.expression]
|
||||
Average c _ -> SQL_Expression.Operation "AVG" [c.expression]
|
||||
Median c _ -> SQL_Expression.Operation "MEDIAN" [c.expression]
|
||||
|
@ -3,7 +3,7 @@ from Standard.Base import all
|
||||
import project.Data.SQL
|
||||
import project.Data.SQL.Builder
|
||||
import project.Internal.IR.Context.Context
|
||||
import project.Internal.IR.Expression.Expression
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.From_Spec.From_Spec
|
||||
import project.Internal.IR.Join_Kind.Join_Kind
|
||||
import project.Internal.IR.Order_Descriptor.Order_Descriptor
|
||||
@ -169,15 +169,16 @@ base_dialect =
|
||||
unary = name -> [name, make_unary_op name]
|
||||
fun = name -> [name, make_function name]
|
||||
|
||||
arith = [bin "+", bin "-", bin "*", bin "/", bin "%"]
|
||||
arith = [["ADD_NUMBER", make_binary_op "+"], ["ADD_TEXT", make_binary_op "||"], bin "-", bin "*", bin "/", bin "%", ["^", make_function "POWER"]]
|
||||
logic = [bin "AND", bin "OR", unary "NOT", ["IIF", make_iif]]
|
||||
compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]]
|
||||
functions = [["COALESCE", make_function "COALESCE"], ["ROW_MIN", make_function "MIN"], ["ROW_MAX", make_function "MAX"]]
|
||||
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
|
||||
counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
|
||||
text = [is_empty, bin "LIKE"]
|
||||
nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
|
||||
contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
|
||||
base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls + contains)
|
||||
base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains)
|
||||
Internal_Dialect.Value base_map wrap_in_quotes
|
||||
|
||||
## PRIVATE
|
||||
@ -242,12 +243,12 @@ make_is_in_column arguments = case arguments.length of
|
||||
Arguments:
|
||||
- dialect: The SQL dialect in which the expression is being generated.
|
||||
- expr: The expression to generate SQL code for.
|
||||
generate_expression : Internal_Dialect -> Expression | Order_Descriptor | Query -> Builder
|
||||
generate_expression : Internal_Dialect -> SQL_Expression | Order_Descriptor | Query -> Builder
|
||||
generate_expression dialect expr = case expr of
|
||||
Expression.Column origin name ->
|
||||
SQL_Expression.Column origin name ->
|
||||
dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name
|
||||
Expression.Constant sql_type value -> SQL.interpolation sql_type value
|
||||
Expression.Operation kind arguments ->
|
||||
SQL_Expression.Constant sql_type value -> SQL.interpolation sql_type value
|
||||
SQL_Expression.Operation kind arguments ->
|
||||
op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error_Data kind)
|
||||
parsed_args = arguments.map (generate_expression dialect)
|
||||
op parsed_args
|
||||
|
@ -1,6 +1,6 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import project.Internal.IR.Expression.Expression
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.From_Spec.From_Spec
|
||||
import project.Internal.IR.Internal_Column.Internal_Column
|
||||
import project.Internal.IR.Order_Descriptor.Order_Descriptor
|
||||
@ -31,7 +31,7 @@ type Context
|
||||
- meta_index: a list of internal columns to use for joining or grouping.
|
||||
- limit: an optional maximum number of elements that the equery should
|
||||
return.
|
||||
Value (from_spec : From_Spec) (where_filters : Vector Expression) (orders : Vector Order_Descriptor) (groups : Vector Expression) (meta_index : Vector Internal_Column) (limit : Nothing | Integer)
|
||||
Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (meta_index : Vector Internal_Column) (limit : Nothing | Integer)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -82,7 +82,7 @@ type Context
|
||||
|
||||
Arguments:
|
||||
- new_filters: The new filters to set in the query.
|
||||
set_where_filters : Vector Expression -> Context
|
||||
set_where_filters : Vector SQL_Expression -> Context
|
||||
set_where_filters self new_filters =
|
||||
Context.Value self.from_spec new_filters self.orders self.groups self.meta_index self.limit
|
||||
|
||||
@ -119,7 +119,7 @@ type Context
|
||||
|
||||
Arguments:
|
||||
- new_groups: The new grouping clauses to set in the query.
|
||||
set_groups : Vector Expression -> Context
|
||||
set_groups : Vector SQL_Expression -> Context
|
||||
set_groups self new_groups =
|
||||
Context.Value self.from_spec self.where_filters self.orders new_groups self.meta_index self.limit
|
||||
|
||||
@ -152,7 +152,7 @@ type Context
|
||||
as_subquery self alias column_lists =
|
||||
rewrite_internal_column : Internal_Column -> Internal_Column
|
||||
rewrite_internal_column column =
|
||||
Internal_Column.Value column.name column.sql_type (Expression.Column alias column.name)
|
||||
Internal_Column.Value column.name column.sql_type (SQL_Expression.Column alias column.name)
|
||||
|
||||
new_columns = column_lists.map columns->
|
||||
columns.map rewrite_internal_column
|
||||
|
@ -1,7 +1,7 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import project.Internal.IR.Context.Context
|
||||
import project.Internal.IR.Expression.Expression
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.Join_Kind.Join_Kind
|
||||
|
||||
## PRIVATE
|
||||
@ -44,7 +44,7 @@ type From_Spec
|
||||
- on: a list of expressions that will be used as join conditions, these
|
||||
are usually be equalities between expressions from the left and right
|
||||
sources.
|
||||
Join (kind : Join_Kind) (left_spec : From_Spec) (right_spec : From_Spec) (on : Vector Expression)
|
||||
Join (kind : Join_Kind) (left_spec : From_Spec) (right_spec : From_Spec) (on : Vector SQL_Expression)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -57,4 +57,4 @@ type From_Spec
|
||||
- context: the context for the sub-query.
|
||||
- alias: the name upon which the results of this sub-query can be
|
||||
referred to in other parts of the query.
|
||||
Sub_Query (columns : Vector (Pair Text Expression)) (context : Context) (alias : Text)
|
||||
Sub_Query (columns : Vector (Pair Text SQL_Expression)) (context : Context) (alias : Text)
|
||||
|
@ -1,7 +1,7 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import project.Data.SQL_Type.SQL_Type
|
||||
import project.Internal.IR.Expression.Expression
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
|
||||
type Internal_Column
|
||||
## PRIVATE
|
||||
@ -12,7 +12,7 @@ type Internal_Column
|
||||
- name: The column name.
|
||||
- sql_type: The SQL type of the column.
|
||||
- expression: An expression for applying to the column.
|
||||
Value name:Text sql_type:SQL_Type expression:Expression
|
||||
Value name:Text sql_type:SQL_Type expression:SQL_Expression
|
||||
|
||||
## PRIVATE
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import project.Internal.IR.Expression.Expression
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.Nulls_Order.Nulls_Order
|
||||
|
||||
## PRIVATE
|
||||
type Order_Descriptor
|
||||
Value (expression : Expression) (direction : Sort_Direction) (nulls_order : Nothing | Nulls_Order = Nothing) (collation : Nothing | Text = Nothing)
|
||||
Value (expression : SQL_Expression) (direction : Sort_Direction) (nulls_order : Nothing | Nulls_Order = Nothing) (collation : Nothing | Text = Nothing)
|
||||
|
@ -1,7 +1,7 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import project.Internal.IR.Context.Context
|
||||
import project.Internal.IR.Expression.Expression
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -17,7 +17,7 @@ type Query
|
||||
is a pair whose first element is the name of the materialized column
|
||||
and the second element is the expression to compute.
|
||||
- context: The query context, see `Context` for more detail.
|
||||
Select (expressions : Vector (Pair Text Expression)) (context : Context)
|
||||
Select (expressions : Vector (Pair Text SQL_Expression)) (context : Context)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
|
@ -7,7 +7,7 @@ import project.Data.SQL_Type.SQL_Type
|
||||
The internal representation of an SQL expression which can be a column
|
||||
reference, an interpolated constant or an operation that combines other
|
||||
expressions.
|
||||
type Expression
|
||||
type SQL_Expression
|
||||
## PRIVATE
|
||||
|
||||
The internal representation of an SQL expression that gets a value from a
|
||||
@ -43,4 +43,4 @@ type Expression
|
||||
dialect.
|
||||
- expressions: a list of expressions which are arguments to the operation
|
||||
different operations support different amounts of arguments.
|
||||
Operation (kind : Text) (expressions : Vector Expression)
|
||||
Operation (kind : Text) (expressions : Vector SQL_Expression)
|
@ -198,6 +198,9 @@ default_storage_type storage_type = case storage_type of
|
||||
Storage.Integer -> SQL_Type.integer
|
||||
Storage.Decimal -> SQL_Type.double
|
||||
Storage.Boolean -> SQL_Type.boolean
|
||||
Storage.Date -> SQL_Type.date
|
||||
Storage.Time_Of_Day -> SQL_Type.time_of_day
|
||||
Storage.Date_Time -> SQL_Type.date_time
|
||||
## Support for mixed type columns in Table upload is currently very limited,
|
||||
falling back to treating everything as text.
|
||||
Storage.Any -> SQL_Type.text
|
||||
|
@ -7,7 +7,7 @@ import project.Data.SQL
|
||||
import project.Data.SQL_Statement.SQL_Statement
|
||||
import project.Data.SQL_Type.SQL_Type
|
||||
import project.Internal.Base_Generator
|
||||
import project.Internal.IR.Expression.Expression
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.Internal_Column.Internal_Column
|
||||
import project.Internal.IR.Order_Descriptor.Order_Descriptor
|
||||
import project.Internal.IR.Nulls_Order.Nulls_Order
|
||||
@ -244,8 +244,8 @@ make_order_descriptor internal_column sort_direction text_ordering =
|
||||
False ->
|
||||
Error.throw (Unsupported_Database_Operation_Error_Data "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
|
||||
True ->
|
||||
upper = Expression.Operation "UPPER" [internal_column.expression]
|
||||
folded_expression = Expression.Operation "LOWER" [upper]
|
||||
upper = SQL_Expression.Operation "UPPER" [internal_column.expression]
|
||||
folded_expression = SQL_Expression.Operation "LOWER" [upper]
|
||||
Order_Descriptor.Value folded_expression sort_direction nulls_order=nulls collation=Nothing
|
||||
False ->
|
||||
Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation=Nothing
|
||||
|
@ -26,6 +26,11 @@ The license file can be found at `licenses/APACHE2.0`.
|
||||
Copyright notices related to this dependency can be found in the directory `commons-io.commons-io-2.11.0`.
|
||||
|
||||
|
||||
'antlr4-runtime', licensed under the The BSD License, is distributed with the Table.
|
||||
The license file can be found at `licenses/BSD-3-Clause`.
|
||||
Copyright notices related to this dependency can be found in the directory `org.antlr.antlr4-runtime-4.10.1`.
|
||||
|
||||
|
||||
'commons-collections4', licensed under the Apache License, Version 2.0, is distributed with the Table.
|
||||
The license information can be found along with the copyright notices.
|
||||
Copyright notices related to this dependency can be found in the directory `org.apache.commons.commons-collections4-4.4`.
|
||||
|
@ -0,0 +1 @@
|
||||
Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
@ -400,6 +400,34 @@ type Column
|
||||
% : Column | Any -> Column
|
||||
% self other = run_vectorized_binary_op self '%' (%) other
|
||||
|
||||
## ALIAS Power Columns
|
||||
|
||||
Element-wise raising to the power.
|
||||
|
||||
Arguments:
|
||||
- other: The exponent to raise `self` by. If `other` is a column, the
|
||||
power operation is performed pairwise between corresponding elements
|
||||
of `self` and `other`.
|
||||
|
||||
Returns a column containing the result of raising each element of `self`
|
||||
by `other`.
|
||||
|
||||
> Example
|
||||
Squares the elements of one column.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_div = Examples.decimal_column ^ 2
|
||||
|
||||
> Example
|
||||
Raises each value in a column by the value in another column.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_div = Examples.decimal_column ^ Examples.integer_column
|
||||
^ : Column | Any -> Column
|
||||
^ self other = run_vectorized_binary_op self '^' (^) other
|
||||
|
||||
## ALIAS AND Columns
|
||||
|
||||
Element-wise boolean conjunction.
|
||||
@ -460,7 +488,7 @@ type Column
|
||||
|| self other =
|
||||
run_vectorized_binary_op self "||" (||) other
|
||||
|
||||
## ALIAS NOT Columns
|
||||
## ALIAS NOT
|
||||
|
||||
Boolean negation of each element in self column.
|
||||
|
||||
@ -473,21 +501,91 @@ type Column
|
||||
not : Column
|
||||
not self = run_vectorized_unary_op self "not" .not
|
||||
|
||||
## UNSTABLE
|
||||
## ALIAS IF
|
||||
|
||||
Replaces `True` values with `when_true` and `False` with `when_false`.
|
||||
Only meant for use with boolean columns.
|
||||
|
||||
TODO: Currently `when_true` and `when_false` need to be a single value.
|
||||
In the future the API will also support row-based IIF if they are columns.
|
||||
Arguments:
|
||||
- when_true: value or column when `self` is `True`.
|
||||
- when_false: value or column when `self` is `False`.
|
||||
|
||||
> Example
|
||||
If the value in a column is `True`, replace it with `1`, otherwise `0`.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_if = Examples.bool_column_1.iif 1 0
|
||||
iif : Any -> Any -> Column
|
||||
iif self when_true when_false = case self.storage_type of
|
||||
Storage.Boolean ->
|
||||
s = self.java_column.getStorage
|
||||
ix = self.java_column.getIndex
|
||||
rs = s.iif when_true when_false
|
||||
|
||||
true_val = case when_true of
|
||||
_ : Column -> when_true.java_column.getStorage
|
||||
_ -> when_true
|
||||
|
||||
false_val = case when_false of
|
||||
_ : Column -> when_false.java_column.getStorage
|
||||
_ -> when_false
|
||||
|
||||
rs = s.iif true_val false_val
|
||||
Column.Column_Data (Java_Column.new "Result" ix rs)
|
||||
_ -> Error.throw (Illegal_Argument_Error "`iif` can only be used with boolean columns.")
|
||||
|
||||
## Returns a column of first non-`Nothing` value on each row of `self` and
|
||||
`values` list.
|
||||
|
||||
Arguments:
|
||||
- values: list of columns or values to coalesce with `self`.
|
||||
|
||||
> Example
|
||||
Get the first non-`Nothing` value in two columns.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_coalesce = Examples.decimal_column.coalesce Examples.integer_column
|
||||
coalesce : (Any | Vector Any) -> Column
|
||||
coalesce self values =
|
||||
fallback a b = a.if_nothing b
|
||||
run_vectorized_many_op self "coalesce" fallback values
|
||||
|
||||
## Returns a column of minimum on each row of `self` and `values` list.
|
||||
|
||||
Arguments:
|
||||
- values: list of columns or values to minimum with `self`.
|
||||
|
||||
> Example
|
||||
Get the minimum value in two columns.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_min = Examples.decimal_column.min Examples.integer_column
|
||||
min : (Any | Vector Any) -> Column
|
||||
min self values =
|
||||
fallback a b = if a.is_nothing then b else
|
||||
if b.is_nothing then a else
|
||||
if b < a then b else a
|
||||
run_vectorized_many_op self "minimum" fallback values
|
||||
|
||||
## Returns a column of maximum on each row of `self` and `values` list.
|
||||
|
||||
Arguments:
|
||||
- values: list of columns or values to maximum with `self`.
|
||||
|
||||
> Example
|
||||
Get the maximum value in two columns.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_max = Examples.decimal_column.max Examples.integer_column
|
||||
max : (Any | Vector Any) -> Column
|
||||
max self values =
|
||||
fallback a b = if a.is_nothing then b else
|
||||
if b.is_nothing then a else
|
||||
if b > a then b else a
|
||||
run_vectorized_many_op self "maximum" fallback values
|
||||
|
||||
## Returns a column of booleans, with `True` items at the positions where
|
||||
this column contains a `Nothing`.
|
||||
@ -696,7 +794,11 @@ type Column
|
||||
True ->
|
||||
fallback_fn _ _ =
|
||||
Panic.throw (Illegal_State_Error_Data "Impossible: This is a bug in the Standard.Table library.")
|
||||
run_vectorized_binary_op self op_name fallback_fn vector skip_nulls=False new_name=result_name
|
||||
true_vector = case vector of
|
||||
_ : Array -> Vector.from_polyglot_array vector
|
||||
_ : Vector.Vector -> vector
|
||||
column : Column -> column.to_vector
|
||||
run_vectorized_binary_op self op_name fallback_fn true_vector skip_nulls=False new_name=result_name
|
||||
False ->
|
||||
## We have custom code for the non-vectorized case, because
|
||||
usually a vectorized binary op will apply the fallback
|
||||
@ -705,6 +807,7 @@ type Column
|
||||
against the whole other column, instead of just the
|
||||
corresponding row - so we need to go around a bit.
|
||||
true_vector = case vector of
|
||||
_ : Array -> Vector.from_polyglot_array vector
|
||||
_ : Vector.Vector -> vector
|
||||
## This does no copying, as `Column.to_vector` just returns
|
||||
a view of the storage.
|
||||
@ -881,6 +984,17 @@ type Column
|
||||
if storage.isNa index then Nothing else
|
||||
storage.getItem index
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns a column containing rows of this column.
|
||||
|
||||
Arguments:
|
||||
- max_rows: specifies a maximum amount of rows to fetch; if not set, all
|
||||
available rows are fetched.
|
||||
read : (Nothing | Integer) -> Column
|
||||
read self max_rows=Nothing =
|
||||
if max_rows.is_nothing then self else self.slice 0 max_rows
|
||||
|
||||
## Returns a vector containing all the elements in this column.
|
||||
|
||||
> Example
|
||||
@ -996,98 +1110,6 @@ type Column
|
||||
data = ['data', self.to_vector.take (First max_data)]
|
||||
Json.from_pairs [size, name, data] . to_text
|
||||
|
||||
## ALIAS Sum Columns
|
||||
|
||||
Sums the values in this column.
|
||||
|
||||
> Example
|
||||
Sum the values in a column.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_sum = Examples.integer_column.sum
|
||||
sum : Any
|
||||
sum self = self.java_column.aggregate 'sum' (x-> Vector.from_polyglot_array x . reduce (+)) True
|
||||
|
||||
## ALIAS Max Columns
|
||||
|
||||
Computes the maximum element of this column.
|
||||
|
||||
> Example
|
||||
Compute the maximum value of a column.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_max = Examples.integer_column.max
|
||||
max : Any
|
||||
max self =
|
||||
self.java_column.aggregate 'max' (x-> Vector.from_polyglot_array x . reduce Math.max) True
|
||||
|
||||
## ALIAS Min Columns
|
||||
|
||||
Computes the minimum element of this column.
|
||||
|
||||
> Example
|
||||
Compute the minimum value of a column.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_min = Examples.integer_column.min
|
||||
min : Any
|
||||
min self =
|
||||
self.java_column.aggregate 'min' (x-> Vector.from_polyglot_array x . reduce Math.min) True
|
||||
|
||||
## ALIAS Mean Columns
|
||||
|
||||
Computes the mean of non-missing elements of this column.
|
||||
|
||||
> Example
|
||||
Compute the mean value of a column.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_mean = Examples.integer_column.mean
|
||||
mean : Any
|
||||
mean self =
|
||||
vec_mean v = if v.length == 0 then Nothing else
|
||||
(Vector.from_polyglot_array v).reduce (+) / v.length
|
||||
self.java_column.aggregate 'mean' vec_mean True
|
||||
|
||||
## Computes the variance of the sample represented by this column.
|
||||
|
||||
Arguments:
|
||||
- degrees_of_freedom_correction: a correction to account for the
|
||||
missing degrees of freedom in the sample. The default value of `1`
|
||||
computes a sample variance. Setting it to `0` will compute population
|
||||
variance instead.
|
||||
variance self degrees_of_freedom_correction=1 =
|
||||
mean = self.mean
|
||||
shifted = self - mean
|
||||
sq = shifted * shifted
|
||||
sq.sum / (self.length - degrees_of_freedom_correction)
|
||||
|
||||
## Computes the standard deviation of the sample represented by this column.
|
||||
|
||||
Arguments:
|
||||
- degrees_of_freedom_correction: a correction to account for the
|
||||
missing degrees of freedom in the sample. The default value of `1`
|
||||
computes a sample standard deviation. Setting it to `0` will compute
|
||||
population standard deviation instead.
|
||||
standard_deviation self degrees_of_freedom_correction=1 =
|
||||
self.variance degrees_of_freedom_correction . sqrt
|
||||
|
||||
## Computes the coefficient of determination of a given prediction column.
|
||||
|
||||
Arguments:
|
||||
- predictions: the column predicting the values of this column.
|
||||
r_squared self predictions =
|
||||
prediction_diff = self - predictions
|
||||
ss_res = prediction_diff*prediction_diff . sum
|
||||
ss_tot_lin = self - self.mean
|
||||
ss_tot = ss_tot_lin*ss_tot_lin . sum
|
||||
1 - ss_res / ss_tot
|
||||
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Sorts the column according to the specified rules.
|
||||
@ -1181,22 +1203,6 @@ type Column
|
||||
first : Any ! Empty_Error
|
||||
first self = self.at 0 . catch Index_Out_Of_Bounds_Error_Data (_ -> Error.throw Empty_Error)
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns the first element in the column, if it exists.
|
||||
|
||||
If the column is empty, this method will return a dataflow error
|
||||
containing an `Empty_Error`.
|
||||
|
||||
> Example
|
||||
Get the first element of a column.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_head = Examples.integer_column.head
|
||||
head : Any ! Empty_Error
|
||||
head self = self.first
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns the last element in the column, if it exists.
|
||||
@ -1243,6 +1249,11 @@ type Column
|
||||
duplicate_count : Column
|
||||
duplicate_count self = Column_Data self.java_column.duplicateCount
|
||||
|
||||
## PRIVATE
|
||||
Helper for the expression to tell it which functions needs a Vector.
|
||||
var_args_functions : Vector
|
||||
var_args_functions = ['is_in', 'coalesce', 'min', 'max']
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
An error for when the column contains no elements.
|
||||
@ -1253,6 +1264,33 @@ type Empty_Error
|
||||
to_display_text : Text
|
||||
to_display_text self = "The column is empty."
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Folds the vectorized operation over the provided column and values. When more
|
||||
than one value to is provided, the result is folded with subsequent values.
|
||||
|
||||
Arguments:
|
||||
- column: The column to execute the operation over.
|
||||
- name: The name of the vectorized operation.
|
||||
- fallback_fn: A function used if the vectorized operation isn't available.
|
||||
- operands: The vector of operands to apply to the function after `column`.
|
||||
- skip_nulls: Specifies if nulls should be skipped. If set to `True`, a null
|
||||
value results in null without passing it to the function. If set to
|
||||
`False`, the null values are passed as any other value and can have custom
|
||||
handling logic.
|
||||
- new_name: The name of the column created as the result of this operation.
|
||||
run_vectorized_many_op : Column -> Text -> (Any -> Any -> Any) -> Vector -> Boolean -> Text -> Column
|
||||
run_vectorized_many_op column name fallback_fn operands skip_nulls=False new_name=(name + "_" + column.name) =
|
||||
case operands of
|
||||
_ : Vector.Vector ->
|
||||
folded = operands.fold column.java_column.getStorage current-> operand->
|
||||
case operand of
|
||||
_ : Column -> current.zip name fallback_fn operand.java_column.getStorage skip_nulls
|
||||
_ -> current.bimap name fallback_fn operand skip_nulls
|
||||
Column.Column_Data (Java_Column.new new_name column.java_column.getIndex folded)
|
||||
_ : Array -> run_vectorized_many_op column name fallback_fn (Vector.from_polyglot_array operands) skip_nulls new_name
|
||||
_ -> run_vectorized_many_op column name fallback_fn [operands] skip_nulls new_name
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Executes a vectorized binary operation over the provided column.
|
||||
@ -1267,7 +1305,7 @@ type Empty_Error
|
||||
`False`, the null values are passed as any other value and can have custom
|
||||
handling logic.
|
||||
- new_name: The name of the column created as the result of this operation.
|
||||
run_vectorized_binary_op : Column -> Text -> (Any -> Any) -> Any -> Boolean -> Text -> Column
|
||||
run_vectorized_binary_op : Column -> Text -> (Any -> Any -> Any) -> Any -> Boolean -> Text -> Column
|
||||
run_vectorized_binary_op column name fallback_fn operand skip_nulls=True new_name="Result" = case operand of
|
||||
Column.Column_Data col2 ->
|
||||
s1 = column.java_column.getStorage
|
||||
|
@ -0,0 +1,49 @@
|
||||
from Standard.Base import all
|
||||
|
||||
polyglot java import org.enso.table.expressions.ExpressionVisitorImpl
|
||||
polyglot java import java.lang.IllegalArgumentException
|
||||
polyglot java import java.lang.UnsupportedOperationException
|
||||
|
||||
type Expression
|
||||
## Evaluates an expression and returns the result
|
||||
|
||||
Arguments:
|
||||
- expression: the expression to evaluate
|
||||
- get_column: a function that takes a column name and returns the
|
||||
associated Column object.
|
||||
- make_constant`: a function that takes an object and returns a
|
||||
constant Column object.
|
||||
- module_name: the name of the Column module that the expression is
|
||||
being evaluated against.
|
||||
- type_name: the name of the Column type that the expression is being
|
||||
evaluated against.
|
||||
- var_args_functions: a Vector of function names which take a single
|
||||
Vector argument but which should be exposed with variable parameters.
|
||||
evaluate : Text -> (Text -> Any) -> (Any -> Any) -> Text -> Text -> Vector Text -> Any
|
||||
evaluate expression get_column make_constant module_name type_name var_args_functions =
|
||||
handle_parse_error = Panic.catch_java ExpressionVisitorImpl.SyntaxErrorException handler=(cause-> Error.throw (Expression_Error.Syntax_Error cause.getMessage cause.getLine cause.getColumn))
|
||||
handle_unsupported = handle_java_error UnsupportedOperationException Expression_Error.Unsupported_Operation
|
||||
handle_arguments = handle_java_error IllegalArgumentException Expression_Error.Argument_Mismatch
|
||||
|
||||
handle_parse_error <| handle_unsupported <| handle_arguments <|
|
||||
ExpressionVisitorImpl.evaluate expression get_column make_constant module_name type_name var_args_functions.to_array
|
||||
|
||||
type Expression_Error
|
||||
## The expression supplied could not be parsed due to a syntax error.
|
||||
Syntax_Error message:Text line:Integer column:Integer
|
||||
|
||||
## Expression error when a function could not be found on the target type.
|
||||
Unsupported_Operation name:Text
|
||||
|
||||
## Expression error when the number of arguments for a function is incorrect.
|
||||
Argument_Mismatch message:Text
|
||||
|
||||
to_display_text : Text
|
||||
to_display_text self = case self of
|
||||
Expression_Error.Syntax_Error _ _ _ -> "Expression.Syntax_Error: " + self.message + " (line " + self.line.to_text + ", column " + self.column.to_text + ")."
|
||||
Expression_Error.Unsupported_Operation _ -> "Expression.Unsupported: " + self.name + " is not a supported method."
|
||||
Expression_Error.Argument_Mismatch _ -> "Expression.Argument_Mismatch: " + self.message
|
||||
|
||||
## PRIVATE
|
||||
handle_java_error java_type enso_constructor =
|
||||
Panic.catch_java java_type handler=(cause-> Error.throw (enso_constructor cause.getMessage))
|
@ -24,6 +24,8 @@ import project.Internal.Table_Helpers
|
||||
import project.Internal.Aggregate_Column_Helper
|
||||
import project.Internal.Parse_Values_Helper
|
||||
import project.Internal.Problem_Builder.Problem_Builder
|
||||
import project.Data.Expression.Expression
|
||||
import project.Data.Expression.Expression_Error
|
||||
|
||||
from project.Data.Column import get_item_string
|
||||
from project.Data.Column_Type_Selection import Column_Type_Selection, Auto
|
||||
@ -38,6 +40,7 @@ polyglot java import org.enso.table.data.table.Table as Java_Table
|
||||
polyglot java import org.enso.table.data.table.Column as Java_Column
|
||||
polyglot java import org.enso.table.operations.OrderBuilder
|
||||
polyglot java import org.enso.table.data.mask.OrderMask
|
||||
polyglot java import java.util.UUID
|
||||
|
||||
## Represents a column-oriented table data structure.
|
||||
type Table
|
||||
@ -865,6 +868,13 @@ type Table
|
||||
on_problems.handle_errors fallback=self.with_no_rows <|
|
||||
mask (make_filter_column column filter)
|
||||
_ : Function -> mask (column.map filter)
|
||||
_ : Text ->
|
||||
table_at = self.at column
|
||||
if table_at.is_error.not then self.filter table_at filter on_problems else
|
||||
expression = self.evaluate column
|
||||
if expression.is_error.not then self.filter expression filter on_problems else
|
||||
pick_error = expression.catch Expression_Error.Syntax_Error (_->table_at)
|
||||
on_problems.handle_errors pick_error fallback=self
|
||||
_ -> case on_problems.handle_errors (self.at column) fallback=Nothing of
|
||||
Nothing -> self
|
||||
resolved_column -> self.filter resolved_column filter on_problems
|
||||
@ -922,7 +932,10 @@ type Table
|
||||
|
||||
Arguments:
|
||||
- name: The name of the column to set the value of.
|
||||
- column: The new value for the column called `name`.
|
||||
- column: The new value for the column either a `Column` or `Text` of an
|
||||
expression.
|
||||
- on_problems: Specifies how to handle if a problem occurs, raising as a
|
||||
warning by default.
|
||||
|
||||
If a column with the given name already exists, it will be replaced.
|
||||
Otherwise a new column is added.
|
||||
@ -937,13 +950,24 @@ type Table
|
||||
table = Examples.inventory_table
|
||||
double_inventory = table.at "total_stock" * 2
|
||||
table.set "total_stock" double_inventory
|
||||
set : Text -> Column | Vector.Vector -> Table
|
||||
set self name column = case column of
|
||||
_ : Vector.Vector ->
|
||||
self.set name (Column.from_vector name column)
|
||||
Column.Column_Data _ ->
|
||||
table.set "total_stock_expr" "2 * [total_stock]"
|
||||
set : Text -> Column | Vector.Vector | Text -> Problem_Behavior -> Table
|
||||
set self name column on_problems=Report_Warning = case column of
|
||||
_ : Text ->
|
||||
expression = self.evaluate column
|
||||
if expression.is_error.not then self.set name expression on_problems else
|
||||
on_problems.handle_errors expression fallback=self
|
||||
_ : Vector.Vector -> self.set name (Column.from_vector name column)
|
||||
_ : Column ->
|
||||
Table.Table_Data (self.java_table.addOrReplaceColumn (column.rename name . java_column))
|
||||
|
||||
## PRIVATE
|
||||
evaluate : Text -> Column
|
||||
evaluate self expression =
|
||||
get_column name = self.at name
|
||||
make_constant value = Column.from_vector (UUID.randomUUID.to_text) (Vector.new self.row_count _->value)
|
||||
Expression.evaluate expression get_column make_constant "Standard.Table.Data.Column" "Column" Column.var_args_functions
|
||||
|
||||
## Returns the vector of columns contained in this table.
|
||||
|
||||
> Examples
|
||||
|
@ -192,10 +192,13 @@ type Table_Column_Helper
|
||||
resolve_column_helper : (Integer | Text | Column) -> Problem_Builder -> a | Nothing
|
||||
resolve_column_helper self selector problem_builder = case selector of
|
||||
_ : Text ->
|
||||
matched_columns = Matching.match_criteria_callback Text_Matcher.Case_Sensitive self.internal_columns [selector] reorder=True name_mapper=(_.name) problem_callback=problem_builder.report_missing_input_columns
|
||||
matched_columns = self.internal_columns.filter column->(column.name==selector)
|
||||
if matched_columns.length == 1 then matched_columns.first else
|
||||
if matched_columns.length == 0 then Nothing else
|
||||
Panic.throw (Illegal_State_Error_Data "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?")
|
||||
if matched_columns.length != 0 then Panic.throw (Illegal_State_Error_Data "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?") else
|
||||
expression = (self.table.evaluate selector).catch Any _->Nothing
|
||||
if expression != Nothing then expression else
|
||||
problem_builder.report_missing_input_columns [selector]
|
||||
Nothing
|
||||
_ : Integer -> case is_index_valid self.internal_columns.length selector of
|
||||
True -> self.internal_columns.at selector
|
||||
False ->
|
||||
|
@ -97,7 +97,7 @@ len_list list =
|
||||
|
||||
Arguments:
|
||||
- act: The action to perform `count` number of times.
|
||||
times : Integer-> List Any
|
||||
times : Integer -> (Integer -> Any) -> List Any
|
||||
times count act =
|
||||
go = results -> number -> if number == 0 then results else
|
||||
@Tail_Call go (Cons (act number) results) number-1
|
||||
|
@ -5,6 +5,7 @@ addSbtPlugin("org.scala-js" % "sbt-scalajs" % "1.10.1")
|
||||
addSbtPlugin("com.typesafe.sbt" % "sbt-license-report" % "1.2.0")
|
||||
addSbtPlugin("com.lightbend.sbt" % "sbt-java-formatter" % "0.7.0")
|
||||
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.6")
|
||||
addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.3")
|
||||
|
||||
libraryDependencies += "io.circe" %% "circe-yaml" % "0.14.1"
|
||||
libraryDependencies += "commons-io" % "commons-io" % "2.11.0"
|
||||
|
126
std-bits/table/src/main/antlr4/Expression.g4
Normal file
126
std-bits/table/src/main/antlr4/Expression.g4
Normal file
@ -0,0 +1,126 @@
|
||||
grammar Expression;
|
||||
prog: expr EOF ;
|
||||
|
||||
expr: expr op=POWER expr # Power
|
||||
| expr op=(MULTIPLY|DIVIDE|MODULO) expr # MultDivMod
|
||||
| expr op=(ADD|MINUS) expr # AddSub
|
||||
| expr op=(EQUALS|NOT_EQUALS|LESS_THAN_OR_EQUAL|GREATER_THAN_OR_EQUAL|LESS_THAN|GREATER_THAN) expr # Compare
|
||||
| expr (IS_NULL|IS_EMPTY|IS_NOT_EMPTY|IS_NOT_NULL) # IsNull
|
||||
| expr (LIKE|NOT_LIKE) expr # Like
|
||||
| expr (IN|NOT_IN) '(' expr (',' expr)* ')' # In
|
||||
| expr (NOT_BETWEEN | BETWEEN) expr AND expr # Between
|
||||
| UNARY_NOT expr # UnaryNot
|
||||
| expr op=(AND | '&&') expr # And
|
||||
| expr op=(OR | '||') expr # Or
|
||||
| IF expr THEN expr ELSE expr END? # If
|
||||
| IDENTIFIER '(' (expr (',' expr)*)? ')' # Function // This allows for functions of 0 or more arguments within brackets (e.g. PI(), SIN(1), MOD(3,4) etc.)
|
||||
| '(' expr ')' # Paren
|
||||
| COLUMN_NAME # Column
|
||||
| MINUS expr # UnaryMinus
|
||||
| value # Literal
|
||||
;
|
||||
|
||||
POWER : '^';
|
||||
MULTIPLY : '*';
|
||||
DIVIDE : '/';
|
||||
MODULO : '%';
|
||||
ADD : '+';
|
||||
MINUS : '-';
|
||||
EQUALS : '==' | '=';
|
||||
NOT_EQUALS : '!=' | '<>';
|
||||
LESS_THAN_OR_EQUAL : '<=';
|
||||
GREATER_THAN_OR_EQUAL : '>=';
|
||||
LESS_THAN : '<';
|
||||
GREATER_THAN : '>';
|
||||
|
||||
WHITESPACE : [ \t\r\n]+ -> skip;
|
||||
|
||||
fragment A:[aA];
|
||||
fragment B:[bB];
|
||||
fragment C:[cC];
|
||||
fragment D:[dD];
|
||||
fragment E:[eE];
|
||||
fragment F:[fF];
|
||||
fragment G:[gG];
|
||||
fragment H:[hH];
|
||||
fragment I:[iI];
|
||||
fragment J:[jJ];
|
||||
fragment K:[kK];
|
||||
fragment L:[lL];
|
||||
fragment M:[mM];
|
||||
fragment N:[nN];
|
||||
fragment O:[oO];
|
||||
fragment P:[pP];
|
||||
fragment Q:[qQ];
|
||||
fragment R:[rR];
|
||||
fragment S:[sS];
|
||||
fragment T:[tT];
|
||||
fragment U:[uU];
|
||||
fragment V:[vV];
|
||||
fragment W:[wW];
|
||||
fragment X:[xX];
|
||||
fragment Y:[yY];
|
||||
fragment Z:[zZ];
|
||||
fragment LETTER : [A-Za-z];
|
||||
fragment DIGIT : [0-9];
|
||||
fragment HEX : [0-9a-fA-F];
|
||||
fragment IS : I S;
|
||||
fragment EMPTY : E M P T Y;
|
||||
|
||||
AND : A N D ;
|
||||
OR : O R ;
|
||||
NULL : N U L L;
|
||||
NOTHING : N O T H I N G;
|
||||
IS_NULL: IS ' ' (NOTHING | NULL);
|
||||
IS_NOT_NULL : IS ' ' N O T ' ' (NOTHING | NULL);
|
||||
IS_EMPTY: IS ' ' EMPTY;
|
||||
IS_NOT_EMPTY : IS ' ' N O T ' ' EMPTY;
|
||||
LIKE : L I K E;
|
||||
NOT_LIKE : N O T ' ' LIKE;
|
||||
IN : I N;
|
||||
NOT_IN : N O T ' ' IN;
|
||||
BETWEEN : B E T W E E N;
|
||||
NOT_BETWEEN : N O T ' ' BETWEEN;
|
||||
TRUE : T R U E;
|
||||
FALSE : F A L S E;
|
||||
IF : I F;
|
||||
THEN : T H E N;
|
||||
ELSE : E L S E;
|
||||
UNARY_NOT : (N O T) | '!';
|
||||
END : E N D IF?;
|
||||
|
||||
IDENTIFIER : LETTER (LETTER|DIGIT|'_')*;
|
||||
|
||||
EXCEL_STRING : '"' ('""'|~'"')* '"';
|
||||
|
||||
PYTHON_STRING : '\'' (ESC|~['])* '\'';
|
||||
fragment ESC : '\\' [abtnfrv"'\\] | '\\u' HEX HEX HEX HEX | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX | '\\x' HEX HEX;
|
||||
|
||||
fragment YEAR : DIGIT DIGIT DIGIT DIGIT;
|
||||
fragment DATE_PART : '-' DIGIT DIGIT;
|
||||
fragment HOUR : DIGIT DIGIT;
|
||||
fragment TIME_PART : ':' DIGIT DIGIT;
|
||||
fragment NANO_PART : '.' DIGIT DIGIT? DIGIT? DIGIT? DIGIT? DIGIT? DIGIT? DIGIT? DIGIT?;
|
||||
fragment UTCOFFSET : ('Z' | ('+'|'-') HOUR TIME_PART?);
|
||||
fragment TIMEZONE : '[' (~']')+ ']';
|
||||
fragment INTEGER : '0' | [1-9] (DIGIT | '_')* ;
|
||||
fragment DECIMAL : '.' (DIGIT | '_')+;
|
||||
|
||||
DATE : YEAR DATE_PART DATE_PART ;
|
||||
TIME : HOUR TIME_PART (TIME_PART NANO_PART?)? ;
|
||||
DATE_TIME : YEAR DATE_PART DATE_PART ('T' | ' ') HOUR TIME_PART (TIME_PART NANO_PART?)? UTCOFFSET? TIMEZONE? ;
|
||||
|
||||
NUMBER : INTEGER DECIMAL? ;
|
||||
|
||||
value
|
||||
: (NULL | NOTHING) # nullOrNothing
|
||||
| (TRUE | FALSE) # boolean
|
||||
| '#' text=DATE '#' # date
|
||||
| '#' text=TIME '#' # time
|
||||
| '#' text=DATE_TIME '#' # datetime
|
||||
| NUMBER # number
|
||||
| EXCEL_STRING # excelString
|
||||
| PYTHON_STRING # pythonString
|
||||
;
|
||||
|
||||
COLUMN_NAME : '[' (']]'|~']')* ']';
|
@ -1,28 +0,0 @@
|
||||
package org.enso.table.data.column.operation.aggregate;
|
||||
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* Represents a fold-like operation on a storage. An aggregator is usually created for a given
|
||||
* storage, then {@link #nextGroup(IntStream)} is repeatedly called and the aggregator is
|
||||
* responsible for collecting the results of such calls. After that, {@link #seal()} is called to
|
||||
* obtain a storage containing all the results.
|
||||
*/
|
||||
public abstract class Aggregator {
|
||||
/**
|
||||
* Requests the aggregator to append the result of aggregating the values at the specified
|
||||
* positions.
|
||||
*
|
||||
* @param positions the positions to aggregate in this round.
|
||||
*/
|
||||
public abstract void nextGroup(IntStream positions);
|
||||
|
||||
/**
|
||||
* Returns the results of all previous {@link #nextGroup(IntStream)} calls.
|
||||
*
|
||||
* @return the storage containing all aggregation results.
|
||||
*/
|
||||
public abstract Storage<?> seal();
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
package org.enso.table.data.column.operation.aggregate;
|
||||
|
||||
import org.enso.table.data.column.storage.LongStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/** Aggregates a storage by counting the non-missing values in each group. */
|
||||
public class CountAggregator extends Aggregator {
|
||||
private final Storage<?> storage;
|
||||
private final long[] counts;
|
||||
private int position = 0;
|
||||
|
||||
/**
|
||||
* @param storage the storage used as data source
|
||||
* @param resultSize the exact number of times {@link Aggregator#nextGroup(IntStream)} will be
|
||||
* called.
|
||||
*/
|
||||
public CountAggregator(Storage<?> storage, int resultSize) {
|
||||
this.storage = storage;
|
||||
this.counts = new long[resultSize];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nextGroup(IntStream positions) {
|
||||
counts[position++] = positions.filter(i -> !storage.isNa(i)).count();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage<Long> seal() {
|
||||
return new LongStorage(counts);
|
||||
}
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
package org.enso.table.data.column.operation.aggregate;
|
||||
|
||||
import org.enso.base.polyglot.Polyglot_Utils;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/** Aggregates the storage using a provided {@link Function}. */
|
||||
public class FunctionAggregator extends Aggregator {
|
||||
private final Function<List<Object>, Value> aggregateFunction;
|
||||
private final boolean skipNa;
|
||||
private final Storage<?> storage;
|
||||
private final InferredBuilder builder;
|
||||
|
||||
/**
|
||||
* @param aggregateFunction the function used to obtain aggregation of a group
|
||||
* @param storage the storage serving as data source
|
||||
* @param skipNa whether missing values should be passed to the function
|
||||
* @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called
|
||||
*/
|
||||
public FunctionAggregator(
|
||||
Function<List<Object>, Value> aggregateFunction,
|
||||
Storage<?> storage,
|
||||
boolean skipNa,
|
||||
int resultSize) {
|
||||
this.aggregateFunction = aggregateFunction;
|
||||
this.storage = storage;
|
||||
this.skipNa = skipNa;
|
||||
this.builder = new InferredBuilder(resultSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nextGroup(IntStream positions) {
|
||||
List<Object> items = getItems(positions);
|
||||
Value result = aggregateFunction.apply(items);
|
||||
Object converted = Polyglot_Utils.convertPolyglotValue(result);
|
||||
builder.appendNoGrow(converted);
|
||||
}
|
||||
|
||||
private List<Object> getItems(IntStream positions) {
|
||||
Stream<Object> items = positions.mapToObj(storage::getItemBoxed);
|
||||
if (skipNa) {
|
||||
items = items.filter(Objects::nonNull);
|
||||
}
|
||||
return items.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage<?> seal() {
|
||||
return builder.seal();
|
||||
}
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
package org.enso.table.data.column.operation.aggregate.numeric;
|
||||
|
||||
import org.enso.table.data.column.operation.aggregate.Aggregator;
|
||||
import org.enso.table.data.column.storage.LongStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.LongStream;
|
||||
|
||||
/** An aggregator consuming a {@link LongStorage} and returning a {@link LongStorage} */
|
||||
public abstract class LongToLongAggregator extends Aggregator {
|
||||
private final LongStorage storage;
|
||||
private final long[] items;
|
||||
private final BitSet missing;
|
||||
private int position = 0;
|
||||
|
||||
/**
|
||||
* @param storage the data source
|
||||
* @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called
|
||||
*/
|
||||
public LongToLongAggregator(LongStorage storage, int resultSize) {
|
||||
this.storage = storage;
|
||||
this.items = new long[resultSize];
|
||||
this.missing = new BitSet();
|
||||
}
|
||||
|
||||
/** Used by subclasses to return a missing value from a given group. */
|
||||
protected void submitMissing() {
|
||||
missing.set(position++);
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by subclasses to return a value from a given group.
|
||||
*
|
||||
* @param value the return value of a group
|
||||
*/
|
||||
protected void submit(long value) {
|
||||
items[position++] = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the aggregation on a particular set of values.
|
||||
*
|
||||
* @param items the values contained in the current group
|
||||
*/
|
||||
protected abstract void runGroup(LongStream items);
|
||||
|
||||
@Override
|
||||
public void nextGroup(IntStream positions) {
|
||||
LongStream items = positions.filter(x -> !storage.isNa(x)).mapToLong(storage::getItem);
|
||||
runGroup(items);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage<Long> seal() {
|
||||
return new LongStorage(items, items.length, missing);
|
||||
}
|
||||
}
|
@ -1,78 +0,0 @@
|
||||
package org.enso.table.data.column.operation.aggregate.numeric;
|
||||
|
||||
import org.enso.table.data.column.operation.aggregate.Aggregator;
|
||||
import org.enso.table.data.column.storage.DoubleStorage;
|
||||
import org.enso.table.data.column.storage.NumericStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.OptionalDouble;
|
||||
import java.util.stream.DoubleStream;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* An aggregator sourcing data from any {@link NumericStorage} and returning a {@link
|
||||
* DoubleStorage}.
|
||||
*/
|
||||
public abstract class NumericAggregator extends Aggregator {
|
||||
private final NumericStorage<?> storage;
|
||||
private final long[] data;
|
||||
private final BitSet missing;
|
||||
private int position = 0;
|
||||
|
||||
/**
|
||||
* @param storage the data source
|
||||
* @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called
|
||||
*/
|
||||
public NumericAggregator(NumericStorage<?> storage, int resultSize) {
|
||||
this.storage = storage;
|
||||
this.data = new long[resultSize];
|
||||
this.missing = new BitSet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the aggregation on a particular set of values.
|
||||
*
|
||||
* @param elements the values contained in the current group
|
||||
*/
|
||||
protected abstract void runGroup(DoubleStream elements);
|
||||
|
||||
/**
|
||||
* Used by subclasses to return a value from a given group.
|
||||
*
|
||||
* @param value the return value of a group
|
||||
*/
|
||||
protected void submit(double value) {
|
||||
data[position++] = Double.doubleToRawLongBits(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by subclasses to return a value from a given group.
|
||||
*
|
||||
* @param value the return value of a group
|
||||
*/
|
||||
protected void submit(OptionalDouble value) {
|
||||
if (value.isPresent()) {
|
||||
submit(value.getAsDouble());
|
||||
} else {
|
||||
submitMissing();
|
||||
}
|
||||
}
|
||||
|
||||
/** Used by subclasses to return a missing value from a given group. */
|
||||
protected void submitMissing() {
|
||||
missing.set(position++);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nextGroup(IntStream positions) {
|
||||
DoubleStream elements =
|
||||
positions.filter(i -> !storage.isNa(i)).mapToDouble(storage::getItemDouble);
|
||||
runGroup(elements);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage<Double> seal() {
|
||||
return new DoubleStorage(data, data.length, missing);
|
||||
}
|
||||
}
|
@ -11,11 +11,11 @@ import java.util.BitSet;
|
||||
|
||||
/** An operation expecting a numeric argument and returning a boolean. */
|
||||
public abstract class LongNumericOp extends MapOperation<Long, LongStorage> {
|
||||
private final boolean alwaysCast;
|
||||
private final boolean alwaysCastToDouble;
|
||||
|
||||
public LongNumericOp(String name, boolean alwaysCast) {
|
||||
public LongNumericOp(String name, boolean alwaysCastToDouble) {
|
||||
super(name);
|
||||
this.alwaysCast = true;
|
||||
this.alwaysCastToDouble = alwaysCastToDouble;
|
||||
}
|
||||
|
||||
public LongNumericOp(String name) {
|
||||
@ -28,8 +28,7 @@ public abstract class LongNumericOp extends MapOperation<Long, LongStorage> {
|
||||
|
||||
@Override
|
||||
public NumericStorage<?> runMap(LongStorage storage, Object arg) {
|
||||
if (arg instanceof Long && !alwaysCast) {
|
||||
long x = (Long) arg;
|
||||
if (!alwaysCastToDouble && arg instanceof Long x) {
|
||||
long[] newVals = new long[storage.size()];
|
||||
for (int i = 0; i < storage.size(); i++) {
|
||||
if (!storage.isNa(i)) {
|
||||
@ -57,12 +56,16 @@ public abstract class LongNumericOp extends MapOperation<Long, LongStorage> {
|
||||
BitSet newMissing = new BitSet();
|
||||
for (int i = 0; i < storage.size(); i++) {
|
||||
if (!storage.isNa(i) && i < v.size() && !v.isNa(i)) {
|
||||
out[i] = doLong(storage.getItem(i), v.getItem(i));
|
||||
out[i] = alwaysCastToDouble
|
||||
? Double.doubleToRawLongBits(doDouble(storage.getItem(i), v.getItem(i)))
|
||||
: doLong(storage.getItem(i), v.getItem(i));
|
||||
} else {
|
||||
newMissing.set(i);
|
||||
}
|
||||
}
|
||||
return new LongStorage(out, storage.size(), newMissing);
|
||||
return alwaysCastToDouble
|
||||
? new DoubleStorage(out, storage.size(), newMissing)
|
||||
: new LongStorage(out, storage.size(), newMissing);
|
||||
} else if (arg instanceof DoubleStorage v) {
|
||||
long[] out = new long[storage.size()];
|
||||
BitSet newMissing = new BitSet();
|
||||
|
@ -2,6 +2,8 @@ package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
import java.util.function.IntFunction;
|
||||
|
||||
import org.enso.base.polyglot.Polyglot_Utils;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
@ -168,21 +170,29 @@ public final class BoolStorage extends Storage<Boolean> {
|
||||
}
|
||||
|
||||
public Storage<?> iif(Value when_true, Value when_false) {
|
||||
Object on_true = Polyglot_Utils.convertPolyglotValue(when_true);
|
||||
Object on_false = Polyglot_Utils.convertPolyglotValue(when_false);
|
||||
var on_true = makeRowProvider(when_true);
|
||||
var on_false = makeRowProvider(when_false);
|
||||
InferredBuilder builder = new InferredBuilder(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (isMissing.get(i)) {
|
||||
builder.append(null);
|
||||
} else if (getItem(i)) {
|
||||
builder.append(on_true);
|
||||
builder.append(on_true.apply(i));
|
||||
} else {
|
||||
builder.append(on_false);
|
||||
builder.append(on_false.apply(i));
|
||||
}
|
||||
}
|
||||
return builder.seal();
|
||||
}
|
||||
|
||||
private static IntFunction<Object> makeRowProvider(Value value) {
|
||||
if (value.isHostObject() && value.asHostObject() instanceof Storage<?> s) {
|
||||
return i->(Object)s.getItemBoxed(i);
|
||||
}
|
||||
var converted = Polyglot_Utils.convertPolyglotValue(value);
|
||||
return i->converted;
|
||||
}
|
||||
|
||||
private static MapOpStorage<Boolean, BoolStorage> buildOps() {
|
||||
MapOpStorage<Boolean, BoolStorage> ops = new MapOpStorage<>();
|
||||
ops.add(
|
||||
|
@ -1,12 +1,9 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import org.enso.base.polyglot.NumericConverter;
|
||||
import org.enso.table.data.column.builder.object.NumericBuilder;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.column.operation.map.SpecializedIsInOp;
|
||||
import org.enso.table.data.column.operation.map.UnaryMapOperation;
|
||||
import org.enso.table.data.column.operation.map.numeric.DoubleBooleanOp;
|
||||
import org.enso.table.data.column.operation.map.numeric.DoubleIsInOp;
|
||||
@ -209,6 +206,13 @@ public final class DoubleStorage extends NumericStorage<Double> {
|
||||
return a % b;
|
||||
}
|
||||
})
|
||||
.add(
|
||||
new DoubleNumericOp(Maps.POWER) {
|
||||
@Override
|
||||
protected double doDouble(double a, double b) {
|
||||
return Math.pow(a, b);
|
||||
}
|
||||
})
|
||||
.add(
|
||||
new DoubleBooleanOp(Maps.LT) {
|
||||
@Override
|
||||
|
@ -1,16 +1,9 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.OptionalLong;
|
||||
import java.util.stream.LongStream;
|
||||
import org.enso.base.polyglot.NumericConverter;
|
||||
import org.enso.table.data.column.builder.object.NumericBuilder;
|
||||
import org.enso.table.data.column.operation.aggregate.Aggregator;
|
||||
import org.enso.table.data.column.operation.aggregate.numeric.LongToLongAggregator;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.column.operation.map.SpecializedIsInOp;
|
||||
import org.enso.table.data.column.operation.map.UnaryMapOperation;
|
||||
import org.enso.table.data.column.operation.map.numeric.LongBooleanOp;
|
||||
import org.enso.table.data.column.operation.map.numeric.LongIsInOp;
|
||||
@ -43,17 +36,13 @@ public final class LongStorage extends NumericStorage<Long> {
|
||||
this(data, data.length, new BitSet());
|
||||
}
|
||||
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public int countMissing() {
|
||||
return isMissing.cardinality();
|
||||
@ -77,17 +66,13 @@ public final class LongStorage extends NumericStorage<Long> {
|
||||
return isMissing.get(idx) ? null : data[idx];
|
||||
}
|
||||
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public int getType() {
|
||||
return Type.LONG;
|
||||
}
|
||||
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return isMissing.get((int) idx);
|
||||
@ -108,46 +93,6 @@ public final class LongStorage extends NumericStorage<Long> {
|
||||
return ops.runZip(name, this, argument);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Aggregator getVectorizedAggregator(String name, int resultSize) {
|
||||
return switch (name) {
|
||||
case Aggregators.SUM -> new LongToLongAggregator(this, resultSize) {
|
||||
@Override
|
||||
protected void runGroup(LongStream items) {
|
||||
long[] elements = items.toArray();
|
||||
if (elements.length == 0) {
|
||||
submitMissing();
|
||||
} else {
|
||||
submit(LongStream.of(elements).sum());
|
||||
}
|
||||
}
|
||||
};
|
||||
case Aggregators.MAX -> new LongToLongAggregator(this, resultSize) {
|
||||
@Override
|
||||
protected void runGroup(LongStream items) {
|
||||
OptionalLong r = items.max();
|
||||
if (r.isPresent()) {
|
||||
submit(r.getAsLong());
|
||||
} else {
|
||||
submitMissing();
|
||||
}
|
||||
}
|
||||
};
|
||||
case Aggregators.MIN -> new LongToLongAggregator(this, resultSize) {
|
||||
@Override
|
||||
protected void runGroup(LongStream items) {
|
||||
OptionalLong r = items.min();
|
||||
if (r.isPresent()) {
|
||||
submit(r.getAsLong());
|
||||
} else {
|
||||
submitMissing();
|
||||
}
|
||||
}
|
||||
};
|
||||
default -> super.getVectorizedAggregator(name, resultSize);
|
||||
};
|
||||
}
|
||||
|
||||
private Storage<?> fillMissingDouble(double arg) {
|
||||
final var builder = NumericBuilder.createDoubleBuilder(size());
|
||||
long rawArg = Double.doubleToRawLongBits(arg);
|
||||
@ -291,6 +236,19 @@ public final class LongStorage extends NumericStorage<Long> {
|
||||
return in % arg;
|
||||
}
|
||||
})
|
||||
.add(
|
||||
new LongNumericOp(Maps.POWER, true) {
|
||||
@Override
|
||||
public double doDouble(long in, double arg) {
|
||||
return Math.pow(in, arg);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long doLong(long in, long arg) {
|
||||
throw new IllegalStateException(
|
||||
"Internal error: Power operation should cast to double.");
|
||||
}
|
||||
})
|
||||
.add(
|
||||
new LongNumericOp(Maps.DIV, true) {
|
||||
@Override
|
||||
@ -300,7 +258,7 @@ public final class LongStorage extends NumericStorage<Long> {
|
||||
|
||||
@Override
|
||||
public long doLong(long in, long arg) {
|
||||
return in / arg;
|
||||
throw new UnsupportedOperationException("Divide operation should cast to double.");
|
||||
}
|
||||
})
|
||||
.add(
|
||||
|
@ -1,9 +1,5 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.stream.DoubleStream;
|
||||
import org.enso.table.data.column.operation.aggregate.Aggregator;
|
||||
import org.enso.table.data.column.operation.aggregate.numeric.NumericAggregator;
|
||||
|
||||
/** A storage containing items representable as a {@code double}. */
|
||||
public abstract class NumericStorage<T> extends Storage<T> {
|
||||
/**
|
||||
@ -14,45 +10,4 @@ public abstract class NumericStorage<T> extends Storage<T> {
|
||||
* @return the value associated with {@code idx}
|
||||
*/
|
||||
public abstract double getItemDouble(int idx);
|
||||
|
||||
@Override
|
||||
protected Aggregator getVectorizedAggregator(String name, int resultSize) {
|
||||
switch (name) {
|
||||
case Aggregators.MAX:
|
||||
return new NumericAggregator(this, resultSize) {
|
||||
@Override
|
||||
protected void runGroup(DoubleStream elements) {
|
||||
submit(elements.max());
|
||||
}
|
||||
};
|
||||
case Aggregators.MIN:
|
||||
return new NumericAggregator(this, resultSize) {
|
||||
@Override
|
||||
protected void runGroup(DoubleStream elements) {
|
||||
submit(elements.min());
|
||||
}
|
||||
};
|
||||
case Aggregators.SUM:
|
||||
return new NumericAggregator(this, resultSize) {
|
||||
@Override
|
||||
protected void runGroup(DoubleStream elements) {
|
||||
double[] its = elements.toArray();
|
||||
if (its.length == 0) {
|
||||
submitMissing();
|
||||
} else {
|
||||
submit(DoubleStream.of(its).sum());
|
||||
}
|
||||
}
|
||||
};
|
||||
case Aggregators.MEAN:
|
||||
return new NumericAggregator(this, resultSize) {
|
||||
@Override
|
||||
protected void runGroup(DoubleStream elements) {
|
||||
submit(elements.average());
|
||||
}
|
||||
};
|
||||
default:
|
||||
return super.getVectorizedAggregator(name, resultSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9,9 +9,6 @@ import org.enso.base.polyglot.Polyglot_Utils;
|
||||
import org.enso.table.data.column.builder.object.Builder;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
import org.enso.table.data.column.builder.object.ObjectBuilder;
|
||||
import org.enso.table.data.column.operation.aggregate.Aggregator;
|
||||
import org.enso.table.data.column.operation.aggregate.CountAggregator;
|
||||
import org.enso.table.data.column.operation.aggregate.FunctionAggregator;
|
||||
import org.enso.table.data.mask.OrderMask;
|
||||
import org.enso.table.data.mask.SliceRange;
|
||||
import org.graalvm.polyglot.Value;
|
||||
@ -73,6 +70,7 @@ public abstract class Storage<T> {
|
||||
public static final String SUB = "-";
|
||||
public static final String DIV = "/";
|
||||
public static final String MOD = "%";
|
||||
public static final String POWER = "^";
|
||||
public static final String NOT = "not";
|
||||
public static final String AND = "&&";
|
||||
public static final String OR = "||";
|
||||
@ -86,14 +84,6 @@ public abstract class Storage<T> {
|
||||
public static final String IS_IN = "is_in";
|
||||
}
|
||||
|
||||
public static final class Aggregators {
|
||||
public static final String SUM = "sum";
|
||||
public static final String MEAN = "mean";
|
||||
public static final String MAX = "max";
|
||||
public static final String MIN = "min";
|
||||
public static final String COUNT = "count";
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies if the given operation has a vectorized implementation available for this storage.
|
||||
*/
|
||||
@ -137,36 +127,6 @@ public abstract class Storage<T> {
|
||||
return builder.seal();
|
||||
}
|
||||
|
||||
protected Aggregator getVectorizedAggregator(String name, int resultSize) {
|
||||
if (name.equals(Aggregators.COUNT)) {
|
||||
return new CountAggregator(this, resultSize);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an aggregator created based on the provided parameters.
|
||||
*
|
||||
* @param name name of a vectorized operation that can be used if possible. If null is passed,
|
||||
* this parameter is unused.
|
||||
* @param fallback the function to use if a vectorized operation is not available.
|
||||
* @param skipNa whether missing values should be passed to the {@code fallback} function.
|
||||
* @param resultSize the number of times the {@link
|
||||
* Aggregator#nextGroup(java.util.stream.IntStream)} method will be called.
|
||||
* @return an aggregator satisfying the above properties.
|
||||
*/
|
||||
public final Aggregator getAggregator(
|
||||
String name, Function<List<Object>, Value> fallback, boolean skipNa, int resultSize) {
|
||||
Aggregator result = null;
|
||||
if (name != null) {
|
||||
result = getVectorizedAggregator(name, resultSize);
|
||||
}
|
||||
if (result == null) {
|
||||
result = new FunctionAggregator(fallback, this, skipNa, resultSize);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs a function on each non-missing element in this storage and gathers the results.
|
||||
*
|
||||
|
@ -2,7 +2,6 @@ package org.enso.table.data.table;
|
||||
|
||||
import org.enso.base.polyglot.Polyglot_Utils;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
import org.enso.table.data.column.operation.aggregate.Aggregator;
|
||||
import org.enso.table.data.column.storage.BoolStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.index.DefaultIndex;
|
||||
@ -15,8 +14,6 @@ import org.graalvm.polyglot.Value;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/** A representation of a column. Consists of a column name and the underlying storage. */
|
||||
public class Column {
|
||||
@ -89,11 +86,11 @@ public class Column {
|
||||
* @return the result of masking this column with the provided column
|
||||
*/
|
||||
public Column mask(Column maskCol) {
|
||||
if (!(maskCol.getStorage() instanceof BoolStorage storage)) {
|
||||
if (!(maskCol.getStorage() instanceof BoolStorage boolStorage)) {
|
||||
throw new UnexpectedColumnTypeException("Boolean");
|
||||
}
|
||||
|
||||
var mask = BoolStorage.toMask(storage);
|
||||
var mask = BoolStorage.toMask(boolStorage);
|
||||
var localStorageMask = new BitSet();
|
||||
localStorageMask.set(0, getStorage().size());
|
||||
mask.and(localStorageMask);
|
||||
@ -156,25 +153,6 @@ public class Column {
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregates the values in this column, using a given aggregation operation.
|
||||
*
|
||||
* @param aggName name of a vectorized operation that can be used if possible. If null is passed,
|
||||
* this parameter is unused.
|
||||
* @param aggregatorFunction the function to use if a vectorized operation is not available.
|
||||
* @param skipNa whether missing values should be passed to the {@code fallback} function.
|
||||
* @return a column indexed by the unique index of this aggregate, storing results of applying the
|
||||
* specified operation.
|
||||
*/
|
||||
public Object aggregate(
|
||||
String aggName, Function<List<Object>, Value> aggregatorFunction, boolean skipNa) {
|
||||
Aggregator aggregator = storage.getAggregator(aggName, aggregatorFunction, skipNa, 1);
|
||||
|
||||
IntStream ixes = IntStream.range(0, storage.size());
|
||||
aggregator.nextGroup(ixes);
|
||||
return aggregator.seal().getItemBoxed(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mask the reordering to apply
|
||||
* @return a new column, resulting from reordering this column according to {@code mask}.
|
||||
|
@ -0,0 +1,373 @@
|
||||
package org.enso.table.expressions;
|
||||
|
||||
import org.antlr.v4.runtime.BaseErrorListener;
|
||||
import org.antlr.v4.runtime.CharStreams;
|
||||
import org.antlr.v4.runtime.CommonTokenStream;
|
||||
import org.antlr.v4.runtime.RecognitionException;
|
||||
import org.antlr.v4.runtime.Recognizer;
|
||||
|
||||
import org.graalvm.polyglot.Context;
|
||||
import org.graalvm.polyglot.PolyglotException;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalTime;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class ExpressionVisitorImpl extends ExpressionBaseVisitor<Value> {
|
||||
private static class ThrowOnErrorListener extends BaseErrorListener {
|
||||
public static final ThrowOnErrorListener INSTANCE = new ThrowOnErrorListener();
|
||||
|
||||
@Override
|
||||
public void syntaxError(
|
||||
Recognizer<?, ?> recognizer,
|
||||
Object offendingSymbol,
|
||||
int line,
|
||||
int charPositionInLine,
|
||||
String msg,
|
||||
RecognitionException e)
|
||||
throws SyntaxErrorException {
|
||||
throw new SyntaxErrorException(msg, line, charPositionInLine);
|
||||
}
|
||||
}
|
||||
|
||||
public static class SyntaxErrorException extends RuntimeException {
|
||||
private final int line;
|
||||
private final int column;
|
||||
|
||||
public SyntaxErrorException(String message, int line, int column) {
|
||||
super(message);
|
||||
this.line = line;
|
||||
this.column = column;
|
||||
}
|
||||
|
||||
public int getLine() {
|
||||
return line;
|
||||
}
|
||||
|
||||
public int getColumn() {
|
||||
return column;
|
||||
}
|
||||
}
|
||||
|
||||
public static Value evaluate(
|
||||
String expression,
|
||||
Function<String, Value> getColumn,
|
||||
Function<Object, Value> makeConstantColumn,
|
||||
String moduleName,
|
||||
String typeName,
|
||||
String[] variableArgumentFunctions)
|
||||
throws UnsupportedOperationException, IllegalArgumentException {
|
||||
var lexer = new ExpressionLexer(CharStreams.fromString(expression));
|
||||
lexer.removeErrorListeners();
|
||||
lexer.addErrorListener(ThrowOnErrorListener.INSTANCE);
|
||||
|
||||
var tokens = new CommonTokenStream(lexer);
|
||||
var parser = new ExpressionParser(tokens);
|
||||
parser.removeErrorListeners();
|
||||
parser.addErrorListener(ThrowOnErrorListener.INSTANCE);
|
||||
|
||||
var visitor =
|
||||
new ExpressionVisitorImpl(
|
||||
getColumn, makeConstantColumn, moduleName, typeName, variableArgumentFunctions);
|
||||
|
||||
var expr = parser.prog();
|
||||
return visitor.visit(expr);
|
||||
}
|
||||
|
||||
private final Function<String, Value> getColumn;
|
||||
private final Function<Object, Value> makeConstantColumn;
|
||||
private final Function<String, Value> getMethod;
|
||||
private final Set<String> variableArgumentFunctions;
|
||||
|
||||
private ExpressionVisitorImpl(
|
||||
Function<String, Value> getColumn,
|
||||
Function<Object, Value> makeConstantColumn,
|
||||
String moduleName,
|
||||
String typeName,
|
||||
String[] variableArgumentFunctions) {
|
||||
this.getColumn = getColumn;
|
||||
this.makeConstantColumn = makeConstantColumn;
|
||||
|
||||
final Value module =
|
||||
Context.getCurrent().getBindings("enso").invokeMember("get_module", moduleName);
|
||||
final Value type = module.invokeMember("get_type", typeName);
|
||||
this.getMethod = name -> module.invokeMember("get_method", type, name);
|
||||
|
||||
this.variableArgumentFunctions = new HashSet<>(Arrays.asList(variableArgumentFunctions));
|
||||
}
|
||||
|
||||
private Value wrapAsColumn(Value value) {
|
||||
if (value.isNull()) {
|
||||
return makeConstantColumn.apply(value);
|
||||
}
|
||||
|
||||
var metaObject = value.getMetaObject();
|
||||
return metaObject != null && metaObject.asHostObject() instanceof Class<?>
|
||||
? makeConstantColumn.apply(value)
|
||||
: value;
|
||||
}
|
||||
|
||||
private Value executeMethod(String name, Value... args) {
|
||||
Value method = getMethod.apply(name);
|
||||
if (!method.canExecute()) {
|
||||
throw new UnsupportedOperationException(name);
|
||||
}
|
||||
|
||||
Object[] objects;
|
||||
if (this.variableArgumentFunctions.contains(name)) {
|
||||
objects = new Object[2];
|
||||
objects[0] = args[0];
|
||||
objects[1] = Arrays.copyOfRange(args, 1, args.length, Object[].class);
|
||||
} else {
|
||||
objects = Arrays.copyOf(args, args.length, Object[].class);
|
||||
}
|
||||
objects[0] = wrapAsColumn(args[0]);
|
||||
|
||||
try {
|
||||
var result = method.execute(objects);
|
||||
if (result.canExecute()) {
|
||||
throw new IllegalArgumentException("Insufficient arguments for method " + name + ".");
|
||||
}
|
||||
return result;
|
||||
} catch (PolyglotException e) {
|
||||
if (e.getMessage().startsWith("Type error: expected a function")) {
|
||||
throw new IllegalArgumentException("Too many arguments for method " + name + ".");
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitProg(ExpressionParser.ProgContext ctx) {
|
||||
Value base = visit(ctx.expr());
|
||||
return wrapAsColumn(base);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitColumn(ExpressionParser.ColumnContext ctx) {
|
||||
var text = ctx.getText();
|
||||
return getColumn.apply(text.substring(1, text.length() - 1).replace("]]", "]"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitPower(ExpressionParser.PowerContext ctx) {
|
||||
return executeMethod("^", visit(ctx.expr(0)), visit(ctx.expr(1)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitMultDivMod(ExpressionParser.MultDivModContext ctx) {
|
||||
return executeMethod(ctx.op.getText(), visit(ctx.expr(0)), visit(ctx.expr(1)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitCompare(ExpressionParser.CompareContext ctx) {
|
||||
var op = ctx.op.getText();
|
||||
if (op.equals("=")) {
|
||||
op = "==";
|
||||
}
|
||||
if (op.equals("<>")) {
|
||||
op = "!=";
|
||||
}
|
||||
|
||||
return executeMethod(op, visit(ctx.expr(0)), visit(ctx.expr(1)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitLike(ExpressionParser.LikeContext ctx) {
|
||||
var condition = executeMethod("like", visit(ctx.expr(0)), visit(ctx.expr(1)));
|
||||
return ctx.NOT_LIKE() != null ? executeMethod("not", condition) : condition;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitIsNull(ExpressionParser.IsNullContext ctx) {
|
||||
var op = ctx.IS_NULL() != null || ctx.IS_NOT_NULL() != null ? "is_missing" : "is_empty";
|
||||
var condition = executeMethod(op, visit(ctx.expr()));
|
||||
return ctx.IS_NOT_NULL() != null || ctx.IS_NOT_EMPTY() != null
|
||||
? executeMethod("not", condition)
|
||||
: condition;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitIf(ExpressionParser.IfContext ctx) {
|
||||
return executeMethod("iif", visit(ctx.expr(0)), visit(ctx.expr(1)), visit(ctx.expr(2)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitAddSub(ExpressionParser.AddSubContext ctx) {
|
||||
return executeMethod(ctx.op.getText(), visit(ctx.expr(0)), visit(ctx.expr(1)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitAnd(ExpressionParser.AndContext ctx) {
|
||||
return executeMethod("&&", visit(ctx.expr(0)), visit(ctx.expr(1)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitOr(ExpressionParser.OrContext ctx) {
|
||||
return executeMethod("||", visit(ctx.expr(0)), visit(ctx.expr(1)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitUnaryNot(ExpressionParser.UnaryNotContext ctx) {
|
||||
return executeMethod("not", visit(ctx.expr()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitUnaryMinus(ExpressionParser.UnaryMinusContext ctx) {
|
||||
return executeMethod("*", visit(ctx.expr()), Value.asValue(-1));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitNullOrNothing(ExpressionParser.NullOrNothingContext ctx) {
|
||||
return Value.asValue(null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitBoolean(ExpressionParser.BooleanContext ctx) {
|
||||
return Value.asValue(ctx.TRUE() != null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitNumber(ExpressionParser.NumberContext ctx) {
|
||||
var text = ctx.getText().replace("_", "");
|
||||
if (text.contains(".")) {
|
||||
return Value.asValue(Double.parseDouble(text));
|
||||
} else {
|
||||
return Value.asValue(Long.parseLong(text));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitExcelString(ExpressionParser.ExcelStringContext ctx) {
|
||||
var text = ctx.getText();
|
||||
return Value.asValue(text.substring(1, text.length() - 1).replace("\"\"", "\""));
|
||||
}
|
||||
|
||||
private static final Pattern pythonRegex = Pattern.compile("(\\\\[abtnfrv\"'\\\\])|(\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{4}))|\\\\|([^\\\\]+)");
|
||||
|
||||
private static String unescapePython(String text) {
|
||||
var matcher = pythonRegex.matcher(text);
|
||||
var builder = new StringBuilder(text.length());
|
||||
while (matcher.find()) {
|
||||
if (matcher.group(1) != null) {
|
||||
builder.append(switch (matcher.group(1).charAt(1)) {
|
||||
case 'a' -> (char) 0x07;
|
||||
case 'f' -> (char) 0x0c;
|
||||
case 'b' -> '\b';
|
||||
case 't' -> '\t';
|
||||
case 'r' -> '\r';
|
||||
case 'n' -> '\n';
|
||||
case 'v' -> (char) 0x0b;
|
||||
case '\\' -> '\\';
|
||||
case '\'' -> '\'';
|
||||
case '"' -> '"';
|
||||
default -> throw new IllegalArgumentException("Unknown Python escape sequence.");
|
||||
});
|
||||
} else if (matcher.group(2) != null) {
|
||||
builder.append((char) Integer.parseInt(matcher.group(2).substring(2), 16));
|
||||
} else {
|
||||
builder.append(matcher.group(0));
|
||||
}
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitPythonString(ExpressionParser.PythonStringContext ctx) {
|
||||
var text = ctx.getText();
|
||||
return Value.asValue(unescapePython(text.substring(1, text.length() - 1)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitDate(ExpressionParser.DateContext ctx) {
|
||||
var text = ctx.text.getText();
|
||||
try {
|
||||
return Value.asValue(LocalDate.parse(ctx.text.getText()));
|
||||
} catch (DateTimeParseException e) {
|
||||
throw new SyntaxErrorException(
|
||||
"Invalid Date format: " + text,
|
||||
ctx.getStart().getLine(),
|
||||
ctx.getStart().getCharPositionInLine());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitTime(ExpressionParser.TimeContext ctx) {
|
||||
var text = ctx.text.getText();
|
||||
try {
|
||||
return Value.asValue(LocalTime.parse(ctx.text.getText()));
|
||||
} catch (DateTimeParseException e) {
|
||||
throw new SyntaxErrorException(
|
||||
"Invalid Time format: " + text,
|
||||
ctx.getStart().getLine(),
|
||||
ctx.getStart().getCharPositionInLine());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitDatetime(ExpressionParser.DatetimeContext ctx) {
|
||||
var text = ctx.text.getText().replace(' ', 'T');
|
||||
var timezone = text.contains("[") ? text.substring(text.indexOf('[')) : "";
|
||||
text = text.substring(0, text.length() - timezone.length());
|
||||
|
||||
var zoneId =
|
||||
timezone.equals("")
|
||||
? ZoneId.systemDefault()
|
||||
: ZoneId.of(timezone.substring(1, timezone.length() - 1));
|
||||
|
||||
try {
|
||||
var zonedDateTime =
|
||||
ZonedDateTime.parse(text, DateTimeFormatter.ISO_OFFSET_DATE_TIME.withZone(zoneId));
|
||||
return Value.asValue(zonedDateTime);
|
||||
} catch (DateTimeParseException ignored) {
|
||||
}
|
||||
|
||||
try {
|
||||
var localDateTime = LocalDateTime.parse(text);
|
||||
return Value.asValue(localDateTime.atZone(zoneId));
|
||||
} catch (DateTimeParseException e) {
|
||||
throw new SyntaxErrorException(
|
||||
"Invalid Date_Time format: " + text,
|
||||
ctx.getStart().getLine(),
|
||||
ctx.getStart().getCharPositionInLine());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitParen(ExpressionParser.ParenContext ctx) {
|
||||
return visit(ctx.expr());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitBetween(ExpressionParser.BetweenContext ctx) {
|
||||
var self = visit(ctx.expr(0));
|
||||
var lower = visit(ctx.expr(1));
|
||||
var upper = visit(ctx.expr(2));
|
||||
var condition = executeMethod("between", self, lower, upper);
|
||||
return ctx.NOT_BETWEEN() != null ? executeMethod("not", condition) : condition;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitIn(ExpressionParser.InContext ctx) {
|
||||
var args = ctx.expr().stream().map(this::visit).toArray(Value[]::new);
|
||||
var condition = executeMethod("is_in", args);
|
||||
return ctx.NOT_IN() != null ? executeMethod("not", condition) : condition;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Value visitFunction(ExpressionParser.FunctionContext ctx) {
|
||||
var name = ctx.IDENTIFIER().getText().toLowerCase();
|
||||
var args = ctx.expr().stream().map(this::visit).toArray(Value[]::new);
|
||||
return executeMethod(name, args);
|
||||
}
|
||||
}
|
@ -53,11 +53,9 @@ spec = Test.group "Columns" <|
|
||||
test_column.take (Last 7) . to_vector . should_equal expected_2.to_vector
|
||||
test_column.take (Last 0) . to_vector . should_equal expected_3.to_vector
|
||||
|
||||
Test.specify "should be able to get the first / head element" <|
|
||||
Test.specify "should be able to get the first element" <|
|
||||
test_column.first . should_equal 1
|
||||
test_column.head . should_equal 1
|
||||
empty_column.first.should_fail_with Empty_Error
|
||||
empty_column.head.should_fail_with Empty_Error
|
||||
|
||||
Test.specify "should be able to get the last element" <|
|
||||
test_column.last . should_equal 6
|
||||
@ -85,25 +83,12 @@ spec = Test.group "Columns" <|
|
||||
col = Examples.decimal_column.set_index Examples.integer_column
|
||||
col.index.to_vector . should_equal Examples.integer_column.to_vector
|
||||
|
||||
Test.specify "should allow computing variance and standard deviation" <|
|
||||
const = Column.from_vector 'const' [1, 1, 1, 1, 1]
|
||||
const.variance . should_equal 0
|
||||
const.standard_deviation.should_equal 0
|
||||
|
||||
rand = Column.from_vector 'random' [10.0, 4.2, 6.8, 6.2, 7.2]
|
||||
rand.variance . should_equal 4.372
|
||||
rand.variance degrees_of_freedom_correction=0 . should_equal 3.4976
|
||||
rand.standard_deviation . should_equal 2.090932806 epsilon=(10 ^ -6)
|
||||
rand.standard_deviation degrees_of_freedom_correction=0 . should_equal 1.870187156 epsilon=(10 ^ -6)
|
||||
|
||||
Test.specify "should allow computing the R² score of a prediction" <|
|
||||
sample = Column.from_vector 'sample' [1,2,3,4,5]
|
||||
mean_pred = Column.from_vector 'mean' [3,3,3,3,3]
|
||||
perfect_pred = Column.from_vector 'perfect' [1,2,3,4,5]
|
||||
bad_pred = Column.from_vector 'bad' [5,4,3,2,1]
|
||||
|
||||
sample.r_squared mean_pred . should_equal 0
|
||||
sample.r_squared perfect_pred . should_equal 1
|
||||
sample.r_squared bad_pred . should_equal -3
|
||||
Test.specify "should result in correct Storage if operation allows it" <|
|
||||
another = Column.from_vector "Test" [10, 20, 30, 40, 50, 60]
|
||||
(test_column + 1).storage_type . should_equal Storage.Integer
|
||||
(test_column - 1).storage_type . should_equal Storage.Integer
|
||||
(test_column * 2).storage_type . should_equal Storage.Integer
|
||||
(test_column * 1.5).storage_type . should_equal Storage.Decimal
|
||||
(test_column + another).storage_type . should_equal Storage.Integer
|
||||
|
||||
main = Test_Suite.run_main spec
|
||||
|
@ -79,7 +79,7 @@ spec =
|
||||
Test.specify "should support simple text operations" <|
|
||||
b = t1.at "B"
|
||||
add = b + "SUFFIX"
|
||||
add.to_sql.prepare . should_equal ['SELECT ("T1"."B" + ?) AS "B" FROM "T1" AS "T1"', [["SUFFIX", str]]]
|
||||
add.to_sql.prepare . should_equal ['SELECT ("T1"."B" || ?) AS "B" FROM "T1" AS "T1"', [["SUFFIX", str]]]
|
||||
|
||||
ends = b.ends_with "suf"
|
||||
starts = b.starts_with "pref"
|
||||
|
@ -12,8 +12,6 @@ from Standard.Test import Test, Problems
|
||||
import project.Database.Helpers.Name_Generator
|
||||
|
||||
spec prefix connection pending=Nothing =
|
||||
eps=0.000001
|
||||
|
||||
tables_to_clean = Vector.new_builder
|
||||
upload prefix data =
|
||||
name = Name_Generator.random_name prefix
|
||||
@ -185,16 +183,6 @@ spec prefix connection pending=Nothing =
|
||||
col.count . should_equal 3
|
||||
col.count_missing . should_equal 2
|
||||
|
||||
Test.group prefix+"Column-wide statistics" pending=pending <|
|
||||
Test.specify 'should allow computing basic column-wide stats' <|
|
||||
t7 = upload "T7" <|
|
||||
Table.new [['price', [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]]]
|
||||
price = t7.at 'price'
|
||||
price.sum.should_equal 107.6 epsilon=eps
|
||||
price.min.should_equal 0.4 epsilon=eps
|
||||
price.max.should_equal 97 epsilon=eps
|
||||
price.mean.should_equal 26.9 epsilon=eps
|
||||
|
||||
Test.group prefix+"Sorting" pending=pending <|
|
||||
df = upload "clothes" <|
|
||||
Table.new [["id", [1,2,3,4,5,6]], ["name", ["shoes","trousers","dress","skirt","blouse","t-shirt"]], ["quantity", [20,10,20,10,30,30]], ["rating", [3.0,Nothing,7.3,3.0,2.2,Nothing]], ["price", [37.2,42.1,64.1,87.4,13.5,64.2]]]
|
||||
|
290
test/Table_Tests/src/Expression_Spec.enso
Normal file
290
test/Table_Tests/src/Expression_Spec.enso
Normal file
@ -0,0 +1,290 @@
|
||||
from Standard.Base import all
|
||||
from Standard.Base.Error.Problem_Behavior import Report_Error
|
||||
|
||||
from Standard.Table import Table, Column, Sort_Column, Column_Selector, Sort_Column_Selector, Aggregate_Column
|
||||
import Standard.Table.Data.Expression.Expression_Error
|
||||
|
||||
import Standard.Visualization
|
||||
|
||||
from Standard.Test import Test, Test_Suite, Problems
|
||||
|
||||
import project.Common_Table_Spec
|
||||
from project.Util import all
|
||||
|
||||
spec detailed=False =
|
||||
# Create Test Table
|
||||
column_a = ["A", [1, 2, 3, 4, 5]]
|
||||
column_b = ["B", [1.0, 1.5, 2.5, 4, 6]]
|
||||
column_c = ["C", ["Hello", "World", "Hello World!", "", Nothing]]
|
||||
column_odd = ["Bad] Name", [True, False, True, False, True]]
|
||||
test_table = Table.new [column_a, column_b, column_c, column_odd]
|
||||
|
||||
epsilon=0.0000000001
|
||||
|
||||
tester expression value =
|
||||
new_table = test_table.set "NEW_COL" expression
|
||||
new_table.column_count . should_equal (test_table.column_count + 1)
|
||||
new_column = new_table.at -1
|
||||
new_column.name . should_equal "NEW_COL"
|
||||
expected = case value of
|
||||
_ : Vector.Vector -> value
|
||||
_ -> Vector.new test_table.row_count _->value
|
||||
|
||||
values = new_column.to_vector
|
||||
values . each_with_index i->v->
|
||||
e = expected.at i
|
||||
match = case e of
|
||||
_ : Number -> e.equals v epsilon
|
||||
_ -> e == v
|
||||
if match.not then values.should_equal expected
|
||||
|
||||
specify_test label action expression_test=tester =
|
||||
case detailed of
|
||||
True ->
|
||||
specify_tester expression value =
|
||||
Test.specify (label + ": " + expression) <|
|
||||
expression_test expression value
|
||||
action specify_tester
|
||||
False ->
|
||||
Test.specify label (action expression_test)
|
||||
|
||||
Test.group "Expression Integer literals" <|
|
||||
specify_test "should be able to add an integer column" expression_test->
|
||||
expression_test "1" 1
|
||||
expression_test "-3" -3
|
||||
expression_test "1_000" 1000
|
||||
|
||||
Test.group "Expression Decimal literals" <|
|
||||
specify_test "should be able to add an decimal column" expression_test->
|
||||
expression_test "1.23" 1.23
|
||||
expression_test "-3.1415" -3.1415
|
||||
expression_test "1_000.456" 1000.456
|
||||
|
||||
Test.group "Expression Boolean literals" <|
|
||||
specify_test "should be able to add a boolean column" expression_test->
|
||||
expression_test "True" True
|
||||
expression_test "true" True
|
||||
expression_test "TRUE" True
|
||||
expression_test "tRuE" True
|
||||
expression_test "False" False
|
||||
expression_test "false" False
|
||||
expression_test "FALSE" False
|
||||
expression_test "FaLsE" False
|
||||
|
||||
Test.group "Expression Text literals" <|
|
||||
specify_test "should be able to add a text column" expression_test->
|
||||
expression_test "'Hello World'" 'Hello World'
|
||||
expression_test "'Hello \'World\''" "Hello 'World'"
|
||||
expression_test '"Hello World"' 'Hello World'
|
||||
expression_test '"Hello ""World"""' 'Hello "World"'
|
||||
expression_test '"Hello \\""World"""' 'Hello \\"World"'
|
||||
expression_test "'Alpha\r\n\gBeta'" 'Alpha\r\n\\gBeta'
|
||||
|
||||
Test.group "Expression Text literals" <|
|
||||
specify_test "should be able to get a Column" expression_test->
|
||||
expression_test "[A]" (column_a.at 1)
|
||||
expression_test "[Bad]] Name]" (column_odd.at 1)
|
||||
|
||||
Test.group "Expression Nothing literals" <|
|
||||
specify_test "should be able to add an nothing column" expression_test->
|
||||
expression_test "null" Nothing
|
||||
expression_test "nUlL" Nothing
|
||||
expression_test "Nothing" Nothing
|
||||
expression_test "NOTHING" Nothing
|
||||
|
||||
Test.group "Expression Date and Time literals" <|
|
||||
specify_test "should be able to add a date or time column" expression_test->
|
||||
expression_test "#2020-12-23#" (Date.new 2020 12 23)
|
||||
expression_test "#12:34#" (Time_Of_Day.new 12 34)
|
||||
expression_test "#12:34:56#" (Time_Of_Day.new 12 34 56)
|
||||
expression_test "#12:34:56.789#" (Time_Of_Day.new 12 34 56 789000000)
|
||||
expression_test "#12:34:56.789000123#" (Time_Of_Day.new 12 34 56 789000123)
|
||||
expression_test "#2020-12-23 12:34#" (Date_Time.new 2020 12 23 12 34)
|
||||
expression_test "#2020-12-23 12:34:56#" (Date_Time.new 2020 12 23 12 34 56)
|
||||
expression_test "#2020-12-23 12:34:56Z[UTC]#" (Date_Time.new 2020 12 23 12 34 56 zone=Time_Zone.utc)
|
||||
expression_test "#2020-12-23 12:34:56+02:30[UTC]#" (Date_Time.new 2020 12 23 10 04 56 zone=Time_Zone.utc)
|
||||
expression_test "#2020-12-23 12:34:56.157+01[UTC]#" (Date_Time.new 2020 12 23 11 34 56 157000000 zone=Time_Zone.utc)
|
||||
expression_test "#2020-12-23T12:34[Europe/Warsaw]#" (Date_Time.new 2020 12 23 12 34 zone=Time_Zone.parse("Europe/Warsaw"))
|
||||
|
||||
Test.group "Expression Arithmetic" <|
|
||||
specify_test "should be able to do basic arithmetic" expression_test->
|
||||
expression_test "1+1" 2
|
||||
expression_test "23-15" 8
|
||||
expression_test "2.5*4.2" 10.5
|
||||
expression_test "1_000.456/2" 500.228
|
||||
expression_test "2^4" 16
|
||||
expression_test "11%3" 2
|
||||
|
||||
specify_test "should be able to do basic arithmetic with order" expression_test->
|
||||
expression_test "1+1*2+2" 5
|
||||
expression_test "23-15/3+6" 24
|
||||
expression_test "52.92/4.2^2" 3
|
||||
expression_test "(1+1)*2+2" 6
|
||||
|
||||
specify_test "should be able to do basic arithmetic with whitespace" expression_test->
|
||||
expression_test "1 + 1" 2
|
||||
expression_test " 23 -15 " 8
|
||||
expression_test "2.5* 4.2" 10.5
|
||||
expression_test "1_000.456/ 2" 500.228
|
||||
expression_test " 2 ^ 4 " 16
|
||||
expression_test " 11 % 3 " 2
|
||||
expression_test "1+1 * 2" 3
|
||||
expression_test "1 + 1*2" 3
|
||||
|
||||
Test.group "Column Arithmetic" <|
|
||||
specify_test "should be able to perform arithmetic on columns" expression_test->
|
||||
expression_test "[A] + 2" [3, 4, 5, 6, 7]
|
||||
expression_test "[B] - 2" [-1, -0.5, 0.5, 2, 4]
|
||||
expression_test "[A] * 4" [4, 8, 12, 16, 20]
|
||||
expression_test "[B] / 2" [0.5, 0.75, 1.25, 2, 3]
|
||||
expression_test "[A] + [B]" [2, 3.5, 5.5, 8, 11]
|
||||
expression_test "[A] - [B]" [0, 0.5, 0.5, 0, -1]
|
||||
expression_test "[A] * [B]" [1, 3, 7.5, 16, 30]
|
||||
expression_test "[B] / [A]" [1, 0.75, 0.8333333333333334, 1, 1.2]
|
||||
expression_test "[A] ^ [B]" [1, 2.8284271247461903, 15.588457268119896, 256, 15625]
|
||||
expression_test "[A] % [B]" [0, 0.5, 0.5, 0, 5]
|
||||
expression_test "[A] + [B] + [A]" [3, 5.5, 8.5, 12, 16]
|
||||
expression_test "[A] - [B] - [A]" [-1, -1.5, -2.5, -4, -6]
|
||||
expression_test "[A] * [B] * [A]" [1, 6, 22.5, 64, 150]
|
||||
expression_test "[A] / [B] / [A]" [1.0, 0.6666666666666667, 0.39999999999999999, 0.25, 0.16666666666666667]
|
||||
expression_test "[A] ^ [B] * [A]" [1, 5.65685424949238, 46.7653718043597, 1024, 78125]
|
||||
expression_test "[A] % [B] % [A]" [0, 0.5, 0.5, 0, 0]
|
||||
|
||||
specify_test "should be able to perform arithmetic on columns with order" expression_test->
|
||||
expression_test "([A] + [B]) * 3" [6, 10.5, 16.5, 24, 33]
|
||||
expression_test "[A] * (4 + [B])" [5, 11, 19.5, 32, 50]
|
||||
expression_test "[A] * [B] + [A]" [2, 5, 10.5, 20, 35]
|
||||
expression_test "[A] + [B] * [B]" [2, 4.25, 9.25, 20, 41]
|
||||
expression_test "([A] + [B]) / [A]" [2, 1.75, 1.83333333333333, 2, 2.2]
|
||||
expression_test "[A] / [B] + 2" [3, 3.33333333333333, 3.2, 3, 2.83333333333333]
|
||||
expression_test "([A] + [B]) % 4" [2, 3.5, 1.5, 0, 3]
|
||||
expression_test "[A] % [B] + 2" [2, 2.5, 2.5, 2, 7]
|
||||
expression_test "([A] - [B]) ^ [A]" [0, 0.25, 0.125, 0, -1]
|
||||
expression_test "[A] ^ ([B] - [A])" [1, 0.707106781186547, 0.577350269189626, 1, 5]
|
||||
|
||||
Test.group "Comparison Operators" <|
|
||||
specify_test "should be able to compare equality" expression_test->
|
||||
expression_test "2 = 1 + 1" True
|
||||
expression_test "2 == 1 + 1" True
|
||||
expression_test "[A] = 2" [False, True, False, False, False]
|
||||
expression_test "[A] == 2" [False, True, False, False, False]
|
||||
expression_test "3 != 1 + 1" True
|
||||
expression_test "3 <> 1 + 1" True
|
||||
expression_test "[A] != 2" [True, False, True, True, True]
|
||||
expression_test "[A] <> 2" [True, False, True, True, True]
|
||||
|
||||
specify_test "should be able to compare ordering" expression_test->
|
||||
expression_test "1 > 2" False
|
||||
expression_test "1 < 2" True
|
||||
expression_test "[A] > 2" [False, False, True, True, True]
|
||||
expression_test "[A] >= 2" [False, True, True, True, True]
|
||||
expression_test "[A] < 2" [True, False, False, False, False]
|
||||
expression_test "[A] <= 2" [True, True, False, False, False]
|
||||
|
||||
specify_test "should be able to use between" expression_test->
|
||||
expression_test "1 + 1 BETWEEN 1 AND 3" True
|
||||
expression_test "1 + 1 between 2 AND 3" True
|
||||
expression_test "1 + 1 bETWEEN 1 AND 2" True
|
||||
expression_test "[A] between 2 AND 3" [False, True, True, False, False]
|
||||
expression_test "1 + 1 NOT BETWEEN 1 AND 3" False
|
||||
expression_test "[A] not between 2 AND 3" [True, False, False, True, True]
|
||||
|
||||
specify_test "should be able to use in" expression_test->
|
||||
expression_test "1 + 1 IN (2, 4, 6)" True
|
||||
expression_test "[A] IN (2, 4, 6)" [False, True, False, True, False]
|
||||
expression_test "1 + 1 NOT IN (2, 4, 6)" False
|
||||
expression_test "[A] NOT IN (2, 4, 6)" [True, False, True, False, True]
|
||||
expression_test "[A] IN (3)" [False, False, True, False, False]
|
||||
expression_test "[A] NOT IN (3)" [True, True, False, True, True]
|
||||
|
||||
specify_test "should be able to check null" expression_test->
|
||||
expression_test "1 IS NULL" False
|
||||
expression_test "1 IS NoTHing" False
|
||||
expression_test "Nothing IS NULL" True
|
||||
expression_test "1 IS NOT NULL" True
|
||||
expression_test "Nothing IS NOT NULL" False
|
||||
expression_test "[A] IS NULL" [False, False, False, False, False]
|
||||
expression_test "[C] IS NULL" [False, False, False, False, True]
|
||||
expression_test "[A] IS NOT NULL" [True, True, True, True, True]
|
||||
expression_test "[C] IS NOT NULL" [True, True, True, True, False]
|
||||
|
||||
specify_test "should be able to check empty" expression_test->
|
||||
expression_test "'Hello World' IS EMPTY" False
|
||||
expression_test "'' IS EMPTY" True
|
||||
expression_test "Nothing IS EMPTY" True
|
||||
expression_test "'Hello World' IS NOT EMPTY" True
|
||||
expression_test "'' IS NOT EMPTY" False
|
||||
expression_test "Nothing IS NOT EMPTY" False
|
||||
|
||||
Test.group "Text Operators" <|
|
||||
specify_test "should be able to concatenate text" expression_test->
|
||||
expression_test "'Hello ' + 'World'" "Hello World"
|
||||
expression_test "[C] + ' World'" ["Hello World", "World World", "Hello World! World", " World", Nothing]
|
||||
expression_test "'Hello ' + [C]" ["Hello Hello", "Hello World", "Hello Hello World!", "Hello ", Nothing]
|
||||
expression_test "[C] + [C]" ["HelloHello", "WorldWorld", "Hello World!Hello World!", "", Nothing]
|
||||
|
||||
specify_test "should be able to use like" expression_test->
|
||||
expression_test "'Hello World' LIKE 'Hello%'" True
|
||||
expression_test "'Hello' LIKE 'H_llo'" True
|
||||
expression_test "'Hello' LIKE 'H_l%'" True
|
||||
expression_test "'Hello' LIKE 'H___o'" True
|
||||
expression_test "'World' LIKE 'H___o'" False
|
||||
expression_test "'Hello World' NOT LIKE 'Hello%'" False
|
||||
expression_test "[C] LIKE 'Hello%'" [True, False, True, False, Nothing]
|
||||
expression_test "[C] NOT LIKE 'Hello%'" [False, True, False, True, Nothing]
|
||||
|
||||
Test.group "Boolean Operators" <|
|
||||
specify_test "should be able to AND booleans" expression_test->
|
||||
expression_test "True && TRUE" True
|
||||
expression_test "True AND False" False
|
||||
expression_test "True && [Bad]] Name]" [True, False, True, False, True]
|
||||
expression_test "False AND [Bad]] Name]" False
|
||||
|
||||
specify_test "should be able to OR booleans" expression_test->
|
||||
expression_test "True || TRUE" True
|
||||
expression_test "True OR False" True
|
||||
expression_test "False OR False" False
|
||||
expression_test "True OR [Bad]] Name]" True
|
||||
expression_test "False || [Bad]] Name]" [True, False, True, False, True]
|
||||
|
||||
specify_test "should be able to NOT booleans" expression_test->
|
||||
expression_test "!TRUE" False
|
||||
expression_test "Not False" True
|
||||
expression_test "NOT [Bad]] Name]" [False, True, False, True, False]
|
||||
|
||||
specify_test "should be able to use IF" expression_test->
|
||||
expression_test "IF True THEN 1 ELSE 0" 1
|
||||
expression_test "IF False THEN 'A' ELSE 'B' END" 'B'
|
||||
expression_test "IF [Bad]] Name] THEN [A] ELSE [B] ENDIF" [1, 1.5, 3, 4, 5]
|
||||
|
||||
Test.group "Function invocation" <|
|
||||
specify_test "should be able to call a function with arguments" expression_test->
|
||||
expression_test "Not(True)" False
|
||||
expression_test "not(False)" True
|
||||
expression_test "iif(True, 1, 3)" 1
|
||||
expression_test "iif([Bad]] Name], 2, 3)" [2, 3, 2, 3, 2]
|
||||
|
||||
specify_test "should be able to call a variable args function" expression_test->
|
||||
expression_test "min(10, 3, 8)" 3
|
||||
expression_test "max([A], [B], 3)" [3, 3, 3, 4, 6]
|
||||
|
||||
Test.group "Errors should be handled" <|
|
||||
error_tester expression fail_type =
|
||||
test_table.set "NEW_COL" expression on_problems=Problem_Behavior.Report_Error . should_fail_with fail_type
|
||||
test_table.set "NEW_COL" expression . column_count . should_equal test_table.column_count
|
||||
|
||||
specify_test "should fail with Syntax_Error if badly formed" expression_test=error_tester expression_test->
|
||||
expression_test "IIF [A] THEN 1 ELSE 2" Expression_Error.Syntax_Error
|
||||
expression_test "A + B" Expression_Error.Syntax_Error
|
||||
expression_test "#2022-31-21#" Expression_Error.Syntax_Error
|
||||
|
||||
specify_test "should fail with Unsupported_Operation if not sufficient arguments" expression_test=error_tester expression_test->
|
||||
expression_test "unknown([C])" Expression_Error.Unsupported_Operation
|
||||
|
||||
specify_test "should fail with Argument_Mismatch if not sufficient arguments" expression_test=error_tester expression_test->
|
||||
expression_test "starts_with([C])" Expression_Error.Argument_Mismatch
|
||||
|
||||
specify_test "should fail with Argument_Mismatch if too many arguments" expression_test=error_tester expression_test->
|
||||
expression_test "starts_with([C], 'Hello', 'World')" Expression_Error.Argument_Mismatch
|
||||
|
||||
main = Test_Suite.run_main (spec True)
|
@ -5,8 +5,10 @@ from Standard.Test import Test_Suite
|
||||
import project.In_Memory_Tests
|
||||
import project.Database.Main as Database_Tests
|
||||
import project.Data_Formatter_Spec
|
||||
import project.Expression_Spec
|
||||
|
||||
main = Test_Suite.run_main <|
|
||||
In_Memory_Tests.in_memory_spec
|
||||
Database_Tests.databases_spec
|
||||
Data_Formatter_Spec.spec
|
||||
Expression_Spec.spec
|
||||
Database_Tests.databases_spec
|
||||
|
@ -425,14 +425,6 @@ spec =
|
||||
i.at "Items Count" . to_vector . should_equal [3, 2, 4]
|
||||
i.at "Storage Type" . to_vector . should_equal [Storage.Text, Storage.Integer, Storage.Any]
|
||||
|
||||
Test.group "Column-wide statistics" <|
|
||||
Test.specify 'should allow computing basic column-wide stats' <|
|
||||
price = Column.from_vector 'price' [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]
|
||||
price.sum.should_equal 107.6
|
||||
price.min.should_equal 0.4
|
||||
price.max.should_equal 97
|
||||
price.mean.should_equal 26.9
|
||||
|
||||
Test.group "Sorting Tables" <|
|
||||
df = (enso_project.data / "clothes.csv").read
|
||||
|
||||
|
@ -0,0 +1 @@
|
||||
~ Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
@ -0,0 +1 @@
|
||||
Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
@ -1,3 +1,3 @@
|
||||
3D20F317407799FC2002CA1A005A2F5CDBFE3A082AD7BA59D08F04270EF9B88C
|
||||
0DF140BB506529B02B8A79B1E32040D7B4515E690EB2C8F32B7F74DD0E821719
|
||||
840031EDBA6D7166EE1BABF8D1AB65F7219F5258683A2D487D12D3D4B8387BD7
|
||||
4BC5787A7330388C3B8BF8C5955FEFB57E57CB47DFAA243180AF0DA066E3D0D6
|
||||
0
|
||||
|
Loading…
Reference in New Issue
Block a user