Expanding Derived Columns and Expression Syntax (#3782)

- Added expression ANTLR4 grammar and sbt based build.
- Added expression support to `set` and `filter` on the Database and InMemory `Table`.
- Added expression support to `aggregate` on the Database and InMemory `Table`.
- Removed old aggregate functions (`sum`, `max`, `min` and `mean`) from `Column` types.
- Adjusted database `Column` `+` operator to do concatenation (`||`) when text types.
- Added power operator `^` to both `Column` types.
- Adjust `iif` to allow for columns to be passed for `when_true` and `when_false` parameters.
- Added `is_present` to database `Column` type.
- Added `coalesce`, `min` and `max` functions to both `Column` types performing row based operation.
- Added support for `Date`, `Time_Of_Day` and `Date_Time` constants in database.
- Added `read` method to InMemory `Column` returning `self` (or a slice).

# Important Notes
- Moved approximate type computation to `SQL_Type`.
- Fixed issue in `LongNumericOp` where it was always casting to a double.
- Removed `head` from InMemory Table (still has `first` method).
This commit is contained in:
James Dunkerley 2022-11-08 15:57:59 +00:00 committed by GitHub
parent c2633bc137
commit 45276b243d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
48 changed files with 1391 additions and 793 deletions

View File

@ -230,6 +230,8 @@
- [Implemented `Table.rows` giving access to a vector of rows.][3827]
- [Define Enso epoch start as 15th October 1582][3804]
- [Implemented `Period` type][3818]
- [Implemented new functions on Column and added expression syntax support to
create derived Columns.][3782]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -368,6 +370,7 @@
[3818]: https://github.com/enso-org/enso/pull/3818
[3776]: https://github.com/enso-org/enso/pull/3776
[3836]: https://github.com/enso-org/enso/pull/3836
[3782]: https://github.com/enso-org/enso/pull/3782
#### Enso Compiler

View File

@ -1872,17 +1872,26 @@ lazy val `std-base` = project
lazy val `std-table` = project
.in(file("std-bits") / "table")
.enablePlugins(Antlr4Plugin)
.settings(
frgaalJavaCompilerSetting,
autoScalaLibrary := false,
Compile / packageBin / artifactPath :=
`table-polyglot-root` / "std-table.jar",
Antlr4 / antlr4PackageName := Some("org.enso.table.expressions"),
Antlr4 / antlr4Version := "4.10.1",
Antlr4 / antlr4GenVisitor := true,
Antlr4 / antlr4TreatWarningsAsErrors := true,
Compile / managedSourceDirectories += {
(Antlr4 / sourceManaged).value / "main" / "antlr4"
},
libraryDependencies ++= Seq(
"org.graalvm.truffle" % "truffle-api" % graalVersion % "provided",
"org.netbeans.api" % "org-openide-util-lookup" % netbeansApiVersion % "provided",
"com.univocity" % "univocity-parsers" % "2.9.1",
"org.apache.poi" % "poi-ooxml" % "5.2.2",
"org.apache.xmlbeans" % "xmlbeans" % "5.1.0"
"org.apache.xmlbeans" % "xmlbeans" % "5.1.0",
"org.antlr" % "antlr4-runtime" % "4.10.1"
),
Compile / packageBin := Def.task {
val result = (Compile / packageBin).value

View File

@ -8,7 +8,7 @@ import project.Data.SQL_Statement.SQL_Statement
import project.Data.SQL_Type.SQL_Type
import project.Data.Table as Database_Table
import project.Internal.IR.Context.Context
import project.Internal.IR.Expression.Expression
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.Query.Query
from project.Internal.Result_Set import read_column, result_set_to_table
@ -182,7 +182,7 @@ type Connection
db_table = if create_table.is_error then create_table else self.query (SQL_Query.Table_Name name)
if db_table.is_error.not then
pairs = db_table.internal_columns.map col->[col.name, Expression.Constant col.sql_type Nothing]
pairs = db_table.internal_columns.map col->[col.name, SQL_Expression.Constant col.sql_type Nothing]
insert_query = self.dialect.generate_sql <| Query.Insert name pairs
insert_template = insert_query.prepare.first
self.jdbc_connection.load_table insert_template db_table table batch_size

View File

@ -9,7 +9,7 @@ import project.Data.SQL_Type.SQL_Type
import project.Data.Table.Integrity_Error
import project.Internal.Helpers
import project.Internal.IR.Context.Context
import project.Internal.IR.Expression.Expression
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.Internal_Column.Internal_Column
import project.Internal.IR.Query.Query
@ -36,7 +36,7 @@ type Column
which they come. Combined expressions must come from the same context -
they must both have the same filtering, grouping etc. rules applied to be
able to be combined.
Value name:Text connection:Connection sql_type:SQL_Type expression:Expression context:Context
Value name:Text connection:Connection sql_type:SQL_Type expression:SQL_Expression context:Context
## UNSTABLE
@ -75,7 +75,7 @@ type Column
## UNSTABLE
Returns a materialized dataframe containing rows of this table.
Returns a materialized column containing rows of this column.
Arguments:
- max_rows: specifies a maximum amount of rows to fetch; if not set, all
@ -135,12 +135,13 @@ type Column
Error.throw <| Unsupported_Database_Operation_Error_Data "Cannot use columns coming from different contexts in one expression without a join."
constant ->
actual_operand_type = operand_type.if_nothing self.sql_type
Expression.Constant actual_operand_type constant
SQL_Expression.Constant actual_operand_type constant
actual_operand_types = operand_types.if_nothing (Vector.fill operands.length Nothing)
expressions = operands.zip actual_operand_types prepare_operand
actual_new_type = new_type.if_nothing self.sql_type
new_expr = Expression.Operation op_kind ([self.expression] + expressions)
new_expr = SQL_Expression.Operation op_kind ([self.expression] + expressions)
Column.Value self.name self.connection actual_new_type new_expr self.context
## PRIVATE
@ -195,41 +196,6 @@ type Column
join self other on=Nothing drop_unmatched=False left_suffix='_left' right_suffix='_right' =
self.to_table.join other on drop_unmatched left_suffix right_suffix
## UNSTABLE
Sums the values in this column.
sum : Any
sum self = self.compute_aggregate "SUM"
## UNSTABLE
Computes the maximum element of this column.
max : Any
max self = self.compute_aggregate "MAX"
## UNSTABLE
Computes the minimum element of this column.
min : Any
min self = self.compute_aggregate "MIN"
## UNSTABLE
Computes the mean of non-missing elements of this column.
mean : Any
mean self = self.compute_aggregate "AVG"
## PRIVATE
Computes an aggregate operator.
Arguments:
- op_name: The name of the operator to compute.
compute_aggregate : Text
compute_aggregate self op_name =
agg = make_aggregate self op_name
agg.to_vector . at 0
## UNSTABLE
Returns the length of this column.
@ -355,7 +321,12 @@ type Column
of `self`. If `other` is a column, the operation is performed pairwise
between corresponding elements of `self` and `other`.
+ : Column | Any -> Column
+ self other = self.make_binary_op "+" other
+ self other =
## TODO: Revisit this as part of the column value type work.
op = case other of
_ : Column -> if self.sql_type.is_definitely_numeric || other.sql_type.is_definitely_numeric then 'ADD_NUMBER' else 'ADD_TEXT'
_ -> if self.sql_type.is_definitely_numeric then 'ADD_NUMBER' else 'ADD_TEXT'
self.make_binary_op op other
## UNSTABLE
@ -422,6 +393,34 @@ type Column
% : Column | Any -> Column
% self other = self.make_binary_op "%" other
## ALIAS Power
Element-wise raising to the power.
Arguments:
- other: The exponent to raise `self` by. If `other` is a column, the
power operation is performed pairwise between corresponding elements
of `self` and `other`.
Returns a column containing the result of raising each element of `self`
by `other`.
> Example
Squares the elements of one column.
import Standard.Examples
example_div = Examples.decimal_column ^ 2
> Example
Raises each value in a column by the value in another column.
import Standard.Examples
example_div = Examples.decimal_column ^ Examples.integer_column
^ : Column | Any -> Column
^ self other = self.make_binary_op '^' other
## UNSTABLE
Element-wise boolean conjunction.
@ -456,12 +455,14 @@ type Column
not : Column
not self = self.make_unary_op "NOT"
## UNSTABLE
## ALIAS IF
Replaces `True` values with `when_true` and `False` with `when_false`.
Only meant for use with boolean columns.
TODO: Currently `when_true` and `when_false` need to be a single value.
In the future the API will also support row-based IIF if they are columns.
Arguments:
- when_true: value or column when `self` is `True`.
- when_false: value or column when `self` is `False`.
iif : Any -> Any -> Column
iif self when_true when_false =
## TODO we should adjust new_type based on types when_true and
@ -473,17 +474,68 @@ type Column
when_false being either columns or regular values and rely on a
mapping of Enso base types to SQL types, and a rule for extracting a
common type.
approximate_type x = case x of
_ : Integer -> SQL_Type.integer
_ : Decimal -> SQL_Type.real
_ : Text -> SQL_Type.text
_ : Boolean -> SQL_Type.boolean
_ -> Error.throw (Illegal_Argument_Error_Data "Unsupported type.")
left_type = approximate_type when_true
right_type = approximate_type when_false
left_type = get_approximate_type when_true self.sql_type
right_type = get_approximate_type when_false self.sql_type
if left_type != right_type then Error.throw (Illegal_Argument_Error_Data "when_true and when_false types do not match") else
self.make_op "IIF" [when_true, when_false] new_type=left_type
## Returns a column of first non-`Nothing` value on each row of `self` and
`values` list.
Arguments:
- values: list of columns or values to coalesce with `self`.
> Example
Get the first non-`Nothing` value in two columns.
import Standard.Examples
example_coalesce = Examples.decimal_column.coalesce Examples.integer_column
coalesce : (Any | Vector Any) -> Column
coalesce self values = case values of
_ : Vector.Vector ->
if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument_Error_Data "self and values types do not all match") else
self.make_op "COALESCE" values new_type=self.sql_type
_ : Array -> self.coalesce (Vector.from_polyglot_array values)
_ -> self.coalesce [values]
## Returns a column of minimum on each row of `self` and `values` list.
Arguments:
- values: list of columns or values to minimum with `self`.
> Example
Get the minimum value in two columns.
import Standard.Examples
example_min = Examples.decimal_column.min Examples.integer_column
min : (Any | Vector Any) -> Column
min self values = case values of
_ : Vector.Vector ->
if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument_Error_Data "self and values types do not all match") else
self.make_op "ROW_MIN" values new_type=self.sql_type
_ : Array -> self.min (Vector.from_polyglot_array values)
_ -> self.min [values]
## Returns a column of maximum on each row of `self` and `values` list.
Arguments:
- values: list of columns or values to maximum with `self`.
> Example
Get the maximum value in two columns.
import Standard.Examples
example_max = Examples.decimal_column.max Examples.integer_column
max : (Any | Vector Any) -> Column
max self values = case values of
_ : Vector.Vector ->
if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument_Error_Data "self and values types do not all match") else
self.make_op "ROW_MAX" values new_type=self.sql_type
_ : Array -> self.max (Vector.from_polyglot_array values)
_ -> self.max [values]
## UNSTABLE
@ -504,6 +556,18 @@ type Column
is_empty : Column
is_empty self = self.make_unary_op "IS_EMPTY" new_type=SQL_Type.boolean
## Returns a column of booleans, with `True` items at the positions where
this column does not contain a `Nothing`.
> Example
Check a column for present values.
import Standard.Examples
example_is_present = Examples.decimal_column.is_present
is_present : Column
is_present self = self.is_missing.not
## PRIVATE
Returns a column of booleans with `True` at the positions where this
column contains a blank value.
@ -667,6 +731,7 @@ type Column
example_contains = Examples.text_column_1.is_in [1, 2, 5]
is_in : Column | Vector -> Column
is_in self vector = case vector of
_ : Array -> self.is_in (Vector.from_polyglot_array vector)
_ : Vector.Vector ->
## This is slightly hacky - we don't provide operand types as we want to
allow any type to get through and currently we do not have a mapping
@ -695,18 +760,17 @@ type Column
column : Column -> if Helpers.check_connection self column . not then (Error.throw (Integrity_Error.Error "Column "+column.name)) else
## We slightly abuse the expression syntax putting a Query as one of
the sub-expressions. Once type-checking is added, we may need to
amend the signature of `Expression.Operation` to account for
amend the signature of `SQL_Expression.Operation` to account for
this. Also, unfortunately as `NULL IN (...)` is `NULL` in SQL, we
need to do separate handling of nulls - we check if the target
column has any nulls and if so, we will do `IS NULL` checks for
our columns too. That is because, we want the containment check
for `NULL` to work the same way as for any other value.
in_subquery = Query.Select [Pair_Data column.name column.expression] column.context
has_nulls_expression = Expression.Operation "BOOL_OR" [column.is_missing.expression]
has_nulls_expression = SQL_Expression.Operation "BOOL_OR" [column.is_missing.expression]
has_nulls_subquery = Query.Select [Pair_Data "has_nulls" has_nulls_expression] column.context
new_type = SQL_Type.boolean
new_expr = Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery, has_nulls_subquery]
Column.Value self.name self.connection new_type new_expr self.context
new_expr = SQL_Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery, has_nulls_subquery]
Column.Value self.name self.connection SQL_Type.boolean new_expr self.context
## PRIVATE
as_internal : Internal_Column
@ -716,48 +780,14 @@ type Column
to_text : Text
to_text self = "(Database Column "+self.name.to_text+")"
## PRIVATE
A helper method for creating an aggregated column by applying some
operation.
Arguments:
- column: The column to aggregate.
- operation: The name of the aggregation operation.
- name_suffix: The suffix to apply to the name of the aggregate column.
- new_type: The SQL type of the result column.
make_aggregate : Column -> Text -> Text -> SQL_Type -> Column
make_aggregate column operation name_suffix="_agg" new_type=Nothing =
actual_new_type = new_type.if_nothing column.sql_type
expr = Expression.Operation operation [column.expression]
case Helpers.ensure_name_is_sane name_suffix of
True ->
new_name = column.name + name_suffix
lift_aggregate new_name column.connection actual_new_type expr column.context
## PRIVATE
Helper for the expression to tell it which functions needs a Vector.
var_args_functions : Array
var_args_functions = ['is_in', 'coalesce', 'min', 'max']
## PRIVATE
A helper function that lifts an aggregate query into a subquery to ensure
correctness of further processing.
Argument:
- new_name: The new name for the aggregate column.
- connection: The connection with which the aggregate is associated.
- expected_type: The expected SQL type of the column.
- expr: The expression for the query.
- context: The context in which the query exists.
lift_aggregate : Text -> Connection -> SQL_Type -> Expression -> Context -> Column
lift_aggregate new_name connection expected_type expr context =
# TODO [RW] This is a simple workaround for #1643 - we always wrap the
# aggregate into a subquery, thus making it safe to use it everywhere. A
# more complex solution may be adopted at some point.
ixes = freshen_columns [new_name] context.meta_index
col = Internal_Column.Value new_name expected_type expr
setup = context.as_subquery new_name+"_sub" [[col], ixes]
subquery = setup.first
cols = setup.second
new_col = cols.first.first
new_ixes = cols.second
new_ctx = Context.for_subquery subquery . set_index new_ixes
Column.Value new_name connection new_col.sql_type new_col.expression new_ctx
TODO: Revisit this as part of the column value type work.
get_approximate_type value default = case value of
_ : Column -> value.sql_type
Nothing -> default
_ -> SQL_Type.approximate_type value

View File

@ -43,8 +43,12 @@ type SQL_Statement
strings = self.internal_fragments . map <| case _ of
SQL_Fragment.Code_Part code -> code
# TODO at some point we may try more sophisticated serialization based on data type
# TODO #183734954: date and time formatting is limited and will lose sub-second precision and timezone offset.
SQL_Fragment.Interpolation _ obj -> case obj of
Number -> obj.to_text
Date_Time.Date_Time -> "'" + (obj.format "yyyy-MM-dd HH:mm:ss") + "'"
Date.Date -> "'" + (obj.format "yyyy-MM-dd") + "'"
Time_Of_Day.Time_Of_Day -> "'" + (obj.format "HH:mm:ss") + "'"
_ -> "'" + obj.to_text.replace "'" "''" + "'"
strings.join ""

View File

@ -49,12 +49,12 @@ type SQL_Type
numeric : SQL_Type
numeric = SQL_Type.Value Types.NUMERIC "NUMERIC"
## The SQL type representing one of the suppported textual types.
## The SQL type representing one of the supported textual types.
varchar : SQL_Type
varchar = SQL_Type.Value Types.VARCHAR "VARCHAR"
## UNSTABLE
The SQL type representing one of the suppported textual types.
The SQL type representing one of the supported textual types.
It seems that JDBC treats the `TEXT` and `VARCHAR` types as interchangeable.
text : SQL_Type
@ -64,6 +64,40 @@ type SQL_Type
blob : SQL_Type
blob = SQL_Type.Value Types.BLOB "BLOB"
## The SQL type representing a date type.
date : SQL_Type
date = SQL_Type.Value Types.DATE "DATE"
## The SQL type representing a time type.
time : SQL_Type
time = SQL_Type.Value Types.TIME "TIME"
## The SQL type representing a time type.
date_time : SQL_Type
date_time = SQL_Type.Value Types.TIMESTAMP_WITH_TIMEZONE "TIMESTAMP"
## ADVANCED
Given an Enso value gets the approximate SQL type.
approximate_type : Any -> SQL_Type ! Illegal_Argument_Error_Data
approximate_type value = case value of
_ : Boolean -> SQL_Type.boolean
_ : Integer -> SQL_Type.integer
_ : Decimal -> SQL_Type.double
_ : Text -> SQL_Type.varchar
_ : Date.Date -> SQL_Type.date
_ : Time_Of_Day.Time_Of_Day -> SQL_Type.time_of_day
_ : Date_Time.Date_Time -> SQL_Type.date_time
_ -> Error.throw (Illegal_Argument_Error_Data "Unsupported type.")
## PRIVATE
Returns True if this type represents an integer or a double.
It only handles the standard types so it may return false negatives for
non-standard ones.
is_definitely_numeric : Boolean
is_definitely_numeric self = self.is_definitely_double || self.is_definitely_integer
## PRIVATE
Returns True if this type represents an integer.

View File

@ -2,6 +2,8 @@ from Standard.Base import all
from Standard.Base.Error.Problem_Behavior import Report_Warning
from Standard.Table import Auto_Detect, Aggregate_Column, Data_Formatter, Column_Name_Mapping, Column_Selector, Sort_Column_Selector, Sort_Column, Match_Columns, Position
import Standard.Table.Data.Expression.Expression
import Standard.Table.Data.Expression.Expression_Error
import Standard.Table.Data.Table.Table as Materialized_Table
import Standard.Table.Internal.Java_Exports
import Standard.Table.Internal.Table_Helpers
@ -18,7 +20,7 @@ import project.Data.SQL_Type.SQL_Type
import project.Internal.Helpers
import project.Internal.Aggregate_Helper
import project.Internal.IR.Context.Context
import project.Internal.IR.Expression.Expression
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.From_Spec.From_Spec
import project.Internal.IR.Internal_Column.Internal_Column
import project.Internal.IR.Join_Kind.Join_Kind
@ -27,6 +29,7 @@ import project.Internal.IR.Query.Query
from Standard.Database.Errors import Unsupported_Database_Operation_Error_Data
polyglot java import java.sql.JDBCType
polyglot java import java.util.UUID
## Represents a column-oriented table data structure backed by a database.
type Table
@ -396,13 +399,20 @@ type Table
on_problems.handle_errors fallback=self.with_no_rows <|
mask (make_filter_column column filter)
_ : Function -> Error.throw (Unsupported_Database_Operation_Error_Data "Filtering with a custom predicate is not supported in the database.")
_ : Text ->
table_at = self.at column
if table_at.is_error.not then self.filter table_at filter on_problems else
expression = self.evaluate column
if expression.is_error.not then self.filter expression filter on_problems else
pick_error = expression.catch Expression_Error.Syntax_Error (_->table_at)
on_problems.handle_errors pick_error fallback=self
_ -> case on_problems.handle_errors (self.at column) fallback=Nothing of
Nothing -> self
resolved_column -> self.filter resolved_column filter on_problems
## PRIVATE
with_no_rows self =
false_expression = Expression.Operation "=" [Expression.Constant SQL_Type.integer 1, Expression.Constant SQL_Type.integer 2]
false_expression = SQL_Expression.Operation "=" [SQL_Expression.Constant SQL_Type.integer 1, SQL_Expression.Constant SQL_Type.integer 2]
new_filters = self.context.where_filters + [false_expression]
new_ctx = self.context.set_where_filters new_filters
self.updated_context new_ctx
@ -477,21 +487,35 @@ type Table
If a column with the given name already exists, it will be replaced.
Otherwise a new column is added.
set : Text -> Column -> Table
set self name column = case Helpers.ensure_name_is_sane name of
True ->
is_used_in_index = self.context.meta_index.exists i-> i.name == name
case is_used_in_index of
True -> Error.throw <| Illegal_State_Error_Data "Cannot override column "+name+", because it is used as an index. Remove the index or use a different name."
False ->
new_col = Internal_Column.Value name column.sql_type column.expression
replace = self.internal_columns.exists (c -> c.name == name)
case replace of
True ->
new_cols = self.internal_columns.map (c -> if c.name == name then new_col else c)
self.updated_columns new_cols
False ->
self.updated_columns (self.internal_columns + [new_col])
set : Text -> Column | Text -> Problem_Behavior -> Table
set self name column on_problems=Report_Warning = on_problems.handle_errors fallback=self <|
case Helpers.ensure_name_is_sane name of
True ->
is_used_in_index = self.context.meta_index.exists i-> i.name == name
case is_used_in_index of
True -> Error.throw <| Illegal_State_Error_Data "Cannot override column "+name+", because it is used as an index. Remove the index or use a different name."
False ->
resolved = case column of
_ : Text -> self.evaluate column
_ -> column
new_col = Internal_Column.Value name resolved.sql_type resolved.expression
replace = self.internal_columns.exists (c -> c.name == name)
case replace of
True ->
new_cols = self.internal_columns.map (c -> if c.name == name then new_col else c)
self.updated_columns new_cols
False ->
self.updated_columns (self.internal_columns + [new_col])
## PRIVATE
evaluate : Text -> Column
evaluate self expression =
get_column name = self.at name
make_constant value =
new_type = SQL_Type.approximate_type value
other = SQL_Expression.Constant new_type value
Column.Value ("Constant_" + UUID.randomUUID.to_text) self.connection new_type other self.context
Expression.evaluate expression get_column make_constant "Standard.Database.Data.Column" "Column" Column.var_args_functions
## UNSTABLE
@ -732,7 +756,7 @@ type Table
new_columns = left_renamed_columns + right_renamed_columns
on_exprs = left_new_join_index.zip right_new_join_index l-> r->
Expression.Operation "=" [l.expression, r.expression]
SQL_Expression.Operation "=" [l.expression, r.expression]
new_from = From_Spec.Join kind left_subquery right_subquery on_exprs
new_limit = Nothing
new_ctx = Context.Value new_from [] [] [] new_index new_limit
@ -814,7 +838,7 @@ type Table
## Returns the amount of rows in this table.
row_count : Integer
row_count self = if self.internal_columns.is_empty then 0 else
expr = Expression.Operation "COUNT_ROWS" []
expr = SQL_Expression.Operation "COUNT_ROWS" []
column_name = "row_count"
## We need to keep some column in the subquery which will determine if
the query is performing regular selection or aggregation. To avoid
@ -889,7 +913,7 @@ type Table
setup = self.context.as_subquery self.name [self.internal_columns]
new_ctx = Context.for_subquery setup.first
new_columns = setup.second.first.map column->
[column.name, Expression.Operation "COUNT" [column.expression]]
[column.name, SQL_Expression.Operation "COUNT" [column.expression]]
query = Query.Select new_columns new_ctx
self.connection.dialect.generate_sql query
count_table = self.connection.read_statement count_query
@ -971,7 +995,7 @@ type Table
_ -> Error.throw <| Illegal_State_Error_Data "Inserting can only be performed on tables as returned by `query`, any further processing is not allowed."
# TODO [RW] before removing the PRIVATE tag, add a check that no bad stuff was done to the table as described above
pairs = self.internal_columns.zip values col-> value->
[col.name, Expression.Constant col.sql_type value]
[col.name, SQL_Expression.Constant col.sql_type value]
query = self.connection.dialect.generate_sql <| Query.Insert table_name pairs
affected_rows = self.connection.execute_update query
case affected_rows == 1 of
@ -1071,7 +1095,7 @@ type Integrity_Error
# make_table : Connection -> Text -> Vector [Text, SQL_Type] -> Context -> Table
make_table : Connection -> Text -> Vector -> Context -> Table
make_table connection table_name columns ctx =
cols = columns.map (p -> Internal_Column.Value p.first p.second (Expression.Column table_name p.first))
cols = columns.map (p -> Internal_Column.Value p.first p.second (SQL_Expression.Column table_name p.first))
Table.Value table_name connection cols ctx
## PRIVATE

View File

@ -3,7 +3,7 @@ from Standard.Base import all hiding First, Last
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
import project.Data.SQL_Type.SQL_Type
import project.Internal.IR.Expression.Expression
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.Internal_Column.Internal_Column
from project.Errors import Unsupported_Database_Operation_Error_Data
@ -23,48 +23,48 @@ make_aggregate_column table aggregate new_name =
## PRIVATE
Creates an Internal Representation of the expression that computes a
requested statistic.
make_expression : Aggregate_Column -> Dialect -> Expression
make_expression : Aggregate_Column -> Dialect -> SQL_Expression
make_expression aggregate dialect =
is_non_empty_selector v = if v.is_nothing then False else v.columns.not_empty
case aggregate of
Group_By c _ -> c.expression
Count _ -> Expression.Operation "COUNT_ROWS" []
Count _ -> SQL_Expression.Operation "COUNT_ROWS" []
Count_Distinct columns _ ignore_nothing -> if columns.is_empty then Error.throw (Illegal_Argument_Error_Data "Count_Distinct must have at least one column.") else
case ignore_nothing of
True -> Expression.Operation "COUNT_DISTINCT" (columns.map .expression)
False -> Expression.Operation "COUNT_DISTINCT_INCLUDE_NULL" (columns.map .expression)
Count_Not_Nothing c _ -> Expression.Operation "COUNT" [c.expression]
Count_Nothing c _ -> Expression.Operation "COUNT_IS_NULL" [c.expression]
Count_Not_Empty c _ -> Expression.Operation "COUNT_NOT_EMPTY" [c.expression]
Count_Empty c _ -> Expression.Operation "COUNT_EMPTY" [c.expression]
Percentile p c _ -> Expression.Operation "PERCENTILE" [Expression.Constant SQL_Type.double p, c.expression]
Mode c _ -> Expression.Operation "MODE" [c.expression]
True -> SQL_Expression.Operation "COUNT_DISTINCT" (columns.map .expression)
False -> SQL_Expression.Operation "COUNT_DISTINCT_INCLUDE_NULL" (columns.map .expression)
Count_Not_Nothing c _ -> SQL_Expression.Operation "COUNT" [c.expression]
Count_Nothing c _ -> SQL_Expression.Operation "COUNT_IS_NULL" [c.expression]
Count_Not_Empty c _ -> SQL_Expression.Operation "COUNT_NOT_EMPTY" [c.expression]
Count_Empty c _ -> SQL_Expression.Operation "COUNT_EMPTY" [c.expression]
Percentile p c _ -> SQL_Expression.Operation "PERCENTILE" [SQL_Expression.Constant SQL_Type.double p, c.expression]
Mode c _ -> SQL_Expression.Operation "MODE" [c.expression]
First c _ ignore_nothing order_by -> case is_non_empty_selector order_by of
False -> Error.throw (Unsupported_Database_Operation_Error_Data "`First` aggregation requires at least one `order_by` column.")
True ->
order_bys = order_by.columns.map c-> dialect.prepare_order_descriptor c.column.as_internal c.direction Text_Ordering.Default
case ignore_nothing of
False -> Expression.Operation "FIRST" [c.expression]+order_bys
True -> Expression.Operation "FIRST_NOT_NULL" [c.expression]+order_bys
False -> SQL_Expression.Operation "FIRST" [c.expression]+order_bys
True -> SQL_Expression.Operation "FIRST_NOT_NULL" [c.expression]+order_bys
Last c _ ignore_nothing order_by -> case is_non_empty_selector order_by of
False -> Error.throw (Unsupported_Database_Operation_Error_Data "`Last` aggregation requires at least one `order_by` column.")
True ->
order_bys = order_by.columns.map c-> dialect.prepare_order_descriptor c.column.as_internal c.direction Text_Ordering.Default
case ignore_nothing of
False -> Expression.Operation "LAST" [c.expression]+order_bys
True -> Expression.Operation "LAST_NOT_NULL" [c.expression]+order_bys
Maximum c _ -> Expression.Operation "MAX" [c.expression]
Minimum c _ -> Expression.Operation "MIN" [c.expression]
Shortest c _ -> Expression.Operation "SHORTEST" [c.expression]
Longest c _ -> Expression.Operation "LONGEST" [c.expression]
False -> SQL_Expression.Operation "LAST" [c.expression]+order_bys
True -> SQL_Expression.Operation "LAST_NOT_NULL" [c.expression]+order_bys
Maximum c _ -> SQL_Expression.Operation "MAX" [c.expression]
Minimum c _ -> SQL_Expression.Operation "MIN" [c.expression]
Shortest c _ -> SQL_Expression.Operation "SHORTEST" [c.expression]
Longest c _ -> SQL_Expression.Operation "LONGEST" [c.expression]
Standard_Deviation c _ population -> case population of
True -> Expression.Operation "STDDEV_POP" [c.expression]
False -> Expression.Operation "STDDEV_SAMP" [c.expression]
True -> SQL_Expression.Operation "STDDEV_POP" [c.expression]
False -> SQL_Expression.Operation "STDDEV_SAMP" [c.expression]
Concatenate c _ separator prefix suffix quote_char ->
base_args = [c.expression, Expression.Constant SQL_Type.text separator, Expression.Constant SQL_Type.text prefix, Expression.Constant SQL_Type.text suffix]
base_args = [c.expression, SQL_Expression.Constant SQL_Type.text separator, SQL_Expression.Constant SQL_Type.text prefix, SQL_Expression.Constant SQL_Type.text suffix]
case quote_char.is_empty of
True -> Expression.Operation "CONCAT" base_args
False -> Expression.Operation "CONCAT_QUOTE_IF_NEEDED" base_args+[Expression.Constant SQL_Type.text quote_char]
Sum c _ -> Expression.Operation "SUM" [c.expression]
Average c _ -> Expression.Operation "AVG" [c.expression]
Median c _ -> Expression.Operation "MEDIAN" [c.expression]
True -> SQL_Expression.Operation "CONCAT" base_args
False -> SQL_Expression.Operation "CONCAT_QUOTE_IF_NEEDED" base_args+[SQL_Expression.Constant SQL_Type.text quote_char]
Sum c _ -> SQL_Expression.Operation "SUM" [c.expression]
Average c _ -> SQL_Expression.Operation "AVG" [c.expression]
Median c _ -> SQL_Expression.Operation "MEDIAN" [c.expression]

View File

@ -3,7 +3,7 @@ from Standard.Base import all
import project.Data.SQL
import project.Data.SQL.Builder
import project.Internal.IR.Context.Context
import project.Internal.IR.Expression.Expression
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.From_Spec.From_Spec
import project.Internal.IR.Join_Kind.Join_Kind
import project.Internal.IR.Order_Descriptor.Order_Descriptor
@ -169,15 +169,16 @@ base_dialect =
unary = name -> [name, make_unary_op name]
fun = name -> [name, make_function name]
arith = [bin "+", bin "-", bin "*", bin "/", bin "%"]
arith = [["ADD_NUMBER", make_binary_op "+"], ["ADD_TEXT", make_binary_op "||"], bin "-", bin "*", bin "/", bin "%", ["^", make_function "POWER"]]
logic = [bin "AND", bin "OR", unary "NOT", ["IIF", make_iif]]
compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]]
functions = [["COALESCE", make_function "COALESCE"], ["ROW_MIN", make_function "MIN"], ["ROW_MAX", make_function "MAX"]]
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
text = [is_empty, bin "LIKE"]
nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls + contains)
base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains)
Internal_Dialect.Value base_map wrap_in_quotes
## PRIVATE
@ -242,12 +243,12 @@ make_is_in_column arguments = case arguments.length of
Arguments:
- dialect: The SQL dialect in which the expression is being generated.
- expr: The expression to generate SQL code for.
generate_expression : Internal_Dialect -> Expression | Order_Descriptor | Query -> Builder
generate_expression : Internal_Dialect -> SQL_Expression | Order_Descriptor | Query -> Builder
generate_expression dialect expr = case expr of
Expression.Column origin name ->
SQL_Expression.Column origin name ->
dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name
Expression.Constant sql_type value -> SQL.interpolation sql_type value
Expression.Operation kind arguments ->
SQL_Expression.Constant sql_type value -> SQL.interpolation sql_type value
SQL_Expression.Operation kind arguments ->
op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error_Data kind)
parsed_args = arguments.map (generate_expression dialect)
op parsed_args

View File

@ -1,6 +1,6 @@
from Standard.Base import all
import project.Internal.IR.Expression.Expression
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.From_Spec.From_Spec
import project.Internal.IR.Internal_Column.Internal_Column
import project.Internal.IR.Order_Descriptor.Order_Descriptor
@ -31,7 +31,7 @@ type Context
- meta_index: a list of internal columns to use for joining or grouping.
- limit: an optional maximum number of elements that the equery should
return.
Value (from_spec : From_Spec) (where_filters : Vector Expression) (orders : Vector Order_Descriptor) (groups : Vector Expression) (meta_index : Vector Internal_Column) (limit : Nothing | Integer)
Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (meta_index : Vector Internal_Column) (limit : Nothing | Integer)
## PRIVATE
@ -82,7 +82,7 @@ type Context
Arguments:
- new_filters: The new filters to set in the query.
set_where_filters : Vector Expression -> Context
set_where_filters : Vector SQL_Expression -> Context
set_where_filters self new_filters =
Context.Value self.from_spec new_filters self.orders self.groups self.meta_index self.limit
@ -119,7 +119,7 @@ type Context
Arguments:
- new_groups: The new grouping clauses to set in the query.
set_groups : Vector Expression -> Context
set_groups : Vector SQL_Expression -> Context
set_groups self new_groups =
Context.Value self.from_spec self.where_filters self.orders new_groups self.meta_index self.limit
@ -152,7 +152,7 @@ type Context
as_subquery self alias column_lists =
rewrite_internal_column : Internal_Column -> Internal_Column
rewrite_internal_column column =
Internal_Column.Value column.name column.sql_type (Expression.Column alias column.name)
Internal_Column.Value column.name column.sql_type (SQL_Expression.Column alias column.name)
new_columns = column_lists.map columns->
columns.map rewrite_internal_column

View File

@ -1,7 +1,7 @@
from Standard.Base import all
import project.Internal.IR.Context.Context
import project.Internal.IR.Expression.Expression
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.Join_Kind.Join_Kind
## PRIVATE
@ -44,7 +44,7 @@ type From_Spec
- on: a list of expressions that will be used as join conditions, these
are usually be equalities between expressions from the left and right
sources.
Join (kind : Join_Kind) (left_spec : From_Spec) (right_spec : From_Spec) (on : Vector Expression)
Join (kind : Join_Kind) (left_spec : From_Spec) (right_spec : From_Spec) (on : Vector SQL_Expression)
## PRIVATE
@ -57,4 +57,4 @@ type From_Spec
- context: the context for the sub-query.
- alias: the name upon which the results of this sub-query can be
referred to in other parts of the query.
Sub_Query (columns : Vector (Pair Text Expression)) (context : Context) (alias : Text)
Sub_Query (columns : Vector (Pair Text SQL_Expression)) (context : Context) (alias : Text)

View File

@ -1,7 +1,7 @@
from Standard.Base import all
import project.Data.SQL_Type.SQL_Type
import project.Internal.IR.Expression.Expression
import project.Internal.IR.SQL_Expression.SQL_Expression
type Internal_Column
## PRIVATE
@ -12,7 +12,7 @@ type Internal_Column
- name: The column name.
- sql_type: The SQL type of the column.
- expression: An expression for applying to the column.
Value name:Text sql_type:SQL_Type expression:Expression
Value name:Text sql_type:SQL_Type expression:SQL_Expression
## PRIVATE

View File

@ -1,8 +1,8 @@
from Standard.Base import all
import project.Internal.IR.Expression.Expression
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.Nulls_Order.Nulls_Order
## PRIVATE
type Order_Descriptor
Value (expression : Expression) (direction : Sort_Direction) (nulls_order : Nothing | Nulls_Order = Nothing) (collation : Nothing | Text = Nothing)
Value (expression : SQL_Expression) (direction : Sort_Direction) (nulls_order : Nothing | Nulls_Order = Nothing) (collation : Nothing | Text = Nothing)

View File

@ -1,7 +1,7 @@
from Standard.Base import all
import project.Internal.IR.Context.Context
import project.Internal.IR.Expression.Expression
import project.Internal.IR.SQL_Expression.SQL_Expression
## PRIVATE
@ -17,7 +17,7 @@ type Query
is a pair whose first element is the name of the materialized column
and the second element is the expression to compute.
- context: The query context, see `Context` for more detail.
Select (expressions : Vector (Pair Text Expression)) (context : Context)
Select (expressions : Vector (Pair Text SQL_Expression)) (context : Context)
## PRIVATE

View File

@ -7,7 +7,7 @@ import project.Data.SQL_Type.SQL_Type
The internal representation of an SQL expression which can be a column
reference, an interpolated constant or an operation that combines other
expressions.
type Expression
type SQL_Expression
## PRIVATE
The internal representation of an SQL expression that gets a value from a
@ -43,4 +43,4 @@ type Expression
dialect.
- expressions: a list of expressions which are arguments to the operation
different operations support different amounts of arguments.
Operation (kind : Text) (expressions : Vector Expression)
Operation (kind : Text) (expressions : Vector SQL_Expression)

View File

@ -198,6 +198,9 @@ default_storage_type storage_type = case storage_type of
Storage.Integer -> SQL_Type.integer
Storage.Decimal -> SQL_Type.double
Storage.Boolean -> SQL_Type.boolean
Storage.Date -> SQL_Type.date
Storage.Time_Of_Day -> SQL_Type.time_of_day
Storage.Date_Time -> SQL_Type.date_time
## Support for mixed type columns in Table upload is currently very limited,
falling back to treating everything as text.
Storage.Any -> SQL_Type.text

View File

@ -7,7 +7,7 @@ import project.Data.SQL
import project.Data.SQL_Statement.SQL_Statement
import project.Data.SQL_Type.SQL_Type
import project.Internal.Base_Generator
import project.Internal.IR.Expression.Expression
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.Internal_Column.Internal_Column
import project.Internal.IR.Order_Descriptor.Order_Descriptor
import project.Internal.IR.Nulls_Order.Nulls_Order
@ -244,8 +244,8 @@ make_order_descriptor internal_column sort_direction text_ordering =
False ->
Error.throw (Unsupported_Database_Operation_Error_Data "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
True ->
upper = Expression.Operation "UPPER" [internal_column.expression]
folded_expression = Expression.Operation "LOWER" [upper]
upper = SQL_Expression.Operation "UPPER" [internal_column.expression]
folded_expression = SQL_Expression.Operation "LOWER" [upper]
Order_Descriptor.Value folded_expression sort_direction nulls_order=nulls collation=Nothing
False ->
Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation=Nothing

View File

@ -26,6 +26,11 @@ The license file can be found at `licenses/APACHE2.0`.
Copyright notices related to this dependency can be found in the directory `commons-io.commons-io-2.11.0`.
'antlr4-runtime', licensed under the The BSD License, is distributed with the Table.
The license file can be found at `licenses/BSD-3-Clause`.
Copyright notices related to this dependency can be found in the directory `org.antlr.antlr4-runtime-4.10.1`.
'commons-collections4', licensed under the Apache License, Version 2.0, is distributed with the Table.
The license information can be found along with the copyright notices.
Copyright notices related to this dependency can be found in the directory `org.apache.commons.commons-collections4-4.4`.

View File

@ -0,0 +1 @@
Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.

View File

@ -400,6 +400,34 @@ type Column
% : Column | Any -> Column
% self other = run_vectorized_binary_op self '%' (%) other
## ALIAS Power Columns
Element-wise raising to the power.
Arguments:
- other: The exponent to raise `self` by. If `other` is a column, the
power operation is performed pairwise between corresponding elements
of `self` and `other`.
Returns a column containing the result of raising each element of `self`
by `other`.
> Example
Squares the elements of one column.
import Standard.Examples
example_div = Examples.decimal_column ^ 2
> Example
Raises each value in a column by the value in another column.
import Standard.Examples
example_div = Examples.decimal_column ^ Examples.integer_column
^ : Column | Any -> Column
^ self other = run_vectorized_binary_op self '^' (^) other
## ALIAS AND Columns
Element-wise boolean conjunction.
@ -460,7 +488,7 @@ type Column
|| self other =
run_vectorized_binary_op self "||" (||) other
## ALIAS NOT Columns
## ALIAS NOT
Boolean negation of each element in self column.
@ -473,21 +501,91 @@ type Column
not : Column
not self = run_vectorized_unary_op self "not" .not
## UNSTABLE
## ALIAS IF
Replaces `True` values with `when_true` and `False` with `when_false`.
Only meant for use with boolean columns.
TODO: Currently `when_true` and `when_false` need to be a single value.
In the future the API will also support row-based IIF if they are columns.
Arguments:
- when_true: value or column when `self` is `True`.
- when_false: value or column when `self` is `False`.
> Example
If the value in a column is `True`, replace it with `1`, otherwise `0`.
import Standard.Examples
example_if = Examples.bool_column_1.iif 1 0
iif : Any -> Any -> Column
iif self when_true when_false = case self.storage_type of
Storage.Boolean ->
s = self.java_column.getStorage
ix = self.java_column.getIndex
rs = s.iif when_true when_false
true_val = case when_true of
_ : Column -> when_true.java_column.getStorage
_ -> when_true
false_val = case when_false of
_ : Column -> when_false.java_column.getStorage
_ -> when_false
rs = s.iif true_val false_val
Column.Column_Data (Java_Column.new "Result" ix rs)
_ -> Error.throw (Illegal_Argument_Error "`iif` can only be used with boolean columns.")
## Returns a column of first non-`Nothing` value on each row of `self` and
`values` list.
Arguments:
- values: list of columns or values to coalesce with `self`.
> Example
Get the first non-`Nothing` value in two columns.
import Standard.Examples
example_coalesce = Examples.decimal_column.coalesce Examples.integer_column
coalesce : (Any | Vector Any) -> Column
coalesce self values =
fallback a b = a.if_nothing b
run_vectorized_many_op self "coalesce" fallback values
## Returns a column of minimum on each row of `self` and `values` list.
Arguments:
- values: list of columns or values to minimum with `self`.
> Example
Get the minimum value in two columns.
import Standard.Examples
example_min = Examples.decimal_column.min Examples.integer_column
min : (Any | Vector Any) -> Column
min self values =
fallback a b = if a.is_nothing then b else
if b.is_nothing then a else
if b < a then b else a
run_vectorized_many_op self "minimum" fallback values
## Returns a column of maximum on each row of `self` and `values` list.
Arguments:
- values: list of columns or values to maximum with `self`.
> Example
Get the maximum value in two columns.
import Standard.Examples
example_max = Examples.decimal_column.max Examples.integer_column
max : (Any | Vector Any) -> Column
max self values =
fallback a b = if a.is_nothing then b else
if b.is_nothing then a else
if b > a then b else a
run_vectorized_many_op self "maximum" fallback values
## Returns a column of booleans, with `True` items at the positions where
this column contains a `Nothing`.
@ -696,7 +794,11 @@ type Column
True ->
fallback_fn _ _ =
Panic.throw (Illegal_State_Error_Data "Impossible: This is a bug in the Standard.Table library.")
run_vectorized_binary_op self op_name fallback_fn vector skip_nulls=False new_name=result_name
true_vector = case vector of
_ : Array -> Vector.from_polyglot_array vector
_ : Vector.Vector -> vector
column : Column -> column.to_vector
run_vectorized_binary_op self op_name fallback_fn true_vector skip_nulls=False new_name=result_name
False ->
## We have custom code for the non-vectorized case, because
usually a vectorized binary op will apply the fallback
@ -705,6 +807,7 @@ type Column
against the whole other column, instead of just the
corresponding row - so we need to go around a bit.
true_vector = case vector of
_ : Array -> Vector.from_polyglot_array vector
_ : Vector.Vector -> vector
## This does no copying, as `Column.to_vector` just returns
a view of the storage.
@ -881,6 +984,17 @@ type Column
if storage.isNa index then Nothing else
storage.getItem index
## UNSTABLE
Returns a column containing rows of this column.
Arguments:
- max_rows: specifies a maximum amount of rows to fetch; if not set, all
available rows are fetched.
read : (Nothing | Integer) -> Column
read self max_rows=Nothing =
if max_rows.is_nothing then self else self.slice 0 max_rows
## Returns a vector containing all the elements in this column.
> Example
@ -996,98 +1110,6 @@ type Column
data = ['data', self.to_vector.take (First max_data)]
Json.from_pairs [size, name, data] . to_text
## ALIAS Sum Columns
Sums the values in this column.
> Example
Sum the values in a column.
import Standard.Examples
example_sum = Examples.integer_column.sum
sum : Any
sum self = self.java_column.aggregate 'sum' (x-> Vector.from_polyglot_array x . reduce (+)) True
## ALIAS Max Columns
Computes the maximum element of this column.
> Example
Compute the maximum value of a column.
import Standard.Examples
example_max = Examples.integer_column.max
max : Any
max self =
self.java_column.aggregate 'max' (x-> Vector.from_polyglot_array x . reduce Math.max) True
## ALIAS Min Columns
Computes the minimum element of this column.
> Example
Compute the minimum value of a column.
import Standard.Examples
example_min = Examples.integer_column.min
min : Any
min self =
self.java_column.aggregate 'min' (x-> Vector.from_polyglot_array x . reduce Math.min) True
## ALIAS Mean Columns
Computes the mean of non-missing elements of this column.
> Example
Compute the mean value of a column.
import Standard.Examples
example_mean = Examples.integer_column.mean
mean : Any
mean self =
vec_mean v = if v.length == 0 then Nothing else
(Vector.from_polyglot_array v).reduce (+) / v.length
self.java_column.aggregate 'mean' vec_mean True
## Computes the variance of the sample represented by this column.
Arguments:
- degrees_of_freedom_correction: a correction to account for the
missing degrees of freedom in the sample. The default value of `1`
computes a sample variance. Setting it to `0` will compute population
variance instead.
variance self degrees_of_freedom_correction=1 =
mean = self.mean
shifted = self - mean
sq = shifted * shifted
sq.sum / (self.length - degrees_of_freedom_correction)
## Computes the standard deviation of the sample represented by this column.
Arguments:
- degrees_of_freedom_correction: a correction to account for the
missing degrees of freedom in the sample. The default value of `1`
computes a sample standard deviation. Setting it to `0` will compute
population standard deviation instead.
standard_deviation self degrees_of_freedom_correction=1 =
self.variance degrees_of_freedom_correction . sqrt
## Computes the coefficient of determination of a given prediction column.
Arguments:
- predictions: the column predicting the values of this column.
r_squared self predictions =
prediction_diff = self - predictions
ss_res = prediction_diff*prediction_diff . sum
ss_tot_lin = self - self.mean
ss_tot = ss_tot_lin*ss_tot_lin . sum
1 - ss_res / ss_tot
## UNSTABLE
Sorts the column according to the specified rules.
@ -1181,22 +1203,6 @@ type Column
first : Any ! Empty_Error
first self = self.at 0 . catch Index_Out_Of_Bounds_Error_Data (_ -> Error.throw Empty_Error)
## UNSTABLE
Returns the first element in the column, if it exists.
If the column is empty, this method will return a dataflow error
containing an `Empty_Error`.
> Example
Get the first element of a column.
import Standard.Examples
example_head = Examples.integer_column.head
head : Any ! Empty_Error
head self = self.first
## UNSTABLE
Returns the last element in the column, if it exists.
@ -1243,6 +1249,11 @@ type Column
duplicate_count : Column
duplicate_count self = Column_Data self.java_column.duplicateCount
## PRIVATE
Helper for the expression to tell it which functions needs a Vector.
var_args_functions : Vector
var_args_functions = ['is_in', 'coalesce', 'min', 'max']
## UNSTABLE
An error for when the column contains no elements.
@ -1253,6 +1264,33 @@ type Empty_Error
to_display_text : Text
to_display_text self = "The column is empty."
## PRIVATE
Folds the vectorized operation over the provided column and values. When more
than one value to is provided, the result is folded with subsequent values.
Arguments:
- column: The column to execute the operation over.
- name: The name of the vectorized operation.
- fallback_fn: A function used if the vectorized operation isn't available.
- operands: The vector of operands to apply to the function after `column`.
- skip_nulls: Specifies if nulls should be skipped. If set to `True`, a null
value results in null without passing it to the function. If set to
`False`, the null values are passed as any other value and can have custom
handling logic.
- new_name: The name of the column created as the result of this operation.
run_vectorized_many_op : Column -> Text -> (Any -> Any -> Any) -> Vector -> Boolean -> Text -> Column
run_vectorized_many_op column name fallback_fn operands skip_nulls=False new_name=(name + "_" + column.name) =
case operands of
_ : Vector.Vector ->
folded = operands.fold column.java_column.getStorage current-> operand->
case operand of
_ : Column -> current.zip name fallback_fn operand.java_column.getStorage skip_nulls
_ -> current.bimap name fallback_fn operand skip_nulls
Column.Column_Data (Java_Column.new new_name column.java_column.getIndex folded)
_ : Array -> run_vectorized_many_op column name fallback_fn (Vector.from_polyglot_array operands) skip_nulls new_name
_ -> run_vectorized_many_op column name fallback_fn [operands] skip_nulls new_name
## PRIVATE
Executes a vectorized binary operation over the provided column.
@ -1267,7 +1305,7 @@ type Empty_Error
`False`, the null values are passed as any other value and can have custom
handling logic.
- new_name: The name of the column created as the result of this operation.
run_vectorized_binary_op : Column -> Text -> (Any -> Any) -> Any -> Boolean -> Text -> Column
run_vectorized_binary_op : Column -> Text -> (Any -> Any -> Any) -> Any -> Boolean -> Text -> Column
run_vectorized_binary_op column name fallback_fn operand skip_nulls=True new_name="Result" = case operand of
Column.Column_Data col2 ->
s1 = column.java_column.getStorage

View File

@ -0,0 +1,49 @@
from Standard.Base import all
polyglot java import org.enso.table.expressions.ExpressionVisitorImpl
polyglot java import java.lang.IllegalArgumentException
polyglot java import java.lang.UnsupportedOperationException
type Expression
## Evaluates an expression and returns the result
Arguments:
- expression: the expression to evaluate
- get_column: a function that takes a column name and returns the
associated Column object.
- make_constant`: a function that takes an object and returns a
constant Column object.
- module_name: the name of the Column module that the expression is
being evaluated against.
- type_name: the name of the Column type that the expression is being
evaluated against.
- var_args_functions: a Vector of function names which take a single
Vector argument but which should be exposed with variable parameters.
evaluate : Text -> (Text -> Any) -> (Any -> Any) -> Text -> Text -> Vector Text -> Any
evaluate expression get_column make_constant module_name type_name var_args_functions =
handle_parse_error = Panic.catch_java ExpressionVisitorImpl.SyntaxErrorException handler=(cause-> Error.throw (Expression_Error.Syntax_Error cause.getMessage cause.getLine cause.getColumn))
handle_unsupported = handle_java_error UnsupportedOperationException Expression_Error.Unsupported_Operation
handle_arguments = handle_java_error IllegalArgumentException Expression_Error.Argument_Mismatch
handle_parse_error <| handle_unsupported <| handle_arguments <|
ExpressionVisitorImpl.evaluate expression get_column make_constant module_name type_name var_args_functions.to_array
type Expression_Error
## The expression supplied could not be parsed due to a syntax error.
Syntax_Error message:Text line:Integer column:Integer
## Expression error when a function could not be found on the target type.
Unsupported_Operation name:Text
## Expression error when the number of arguments for a function is incorrect.
Argument_Mismatch message:Text
to_display_text : Text
to_display_text self = case self of
Expression_Error.Syntax_Error _ _ _ -> "Expression.Syntax_Error: " + self.message + " (line " + self.line.to_text + ", column " + self.column.to_text + ")."
Expression_Error.Unsupported_Operation _ -> "Expression.Unsupported: " + self.name + " is not a supported method."
Expression_Error.Argument_Mismatch _ -> "Expression.Argument_Mismatch: " + self.message
## PRIVATE
handle_java_error java_type enso_constructor =
Panic.catch_java java_type handler=(cause-> Error.throw (enso_constructor cause.getMessage))

View File

@ -24,6 +24,8 @@ import project.Internal.Table_Helpers
import project.Internal.Aggregate_Column_Helper
import project.Internal.Parse_Values_Helper
import project.Internal.Problem_Builder.Problem_Builder
import project.Data.Expression.Expression
import project.Data.Expression.Expression_Error
from project.Data.Column import get_item_string
from project.Data.Column_Type_Selection import Column_Type_Selection, Auto
@ -38,6 +40,7 @@ polyglot java import org.enso.table.data.table.Table as Java_Table
polyglot java import org.enso.table.data.table.Column as Java_Column
polyglot java import org.enso.table.operations.OrderBuilder
polyglot java import org.enso.table.data.mask.OrderMask
polyglot java import java.util.UUID
## Represents a column-oriented table data structure.
type Table
@ -865,6 +868,13 @@ type Table
on_problems.handle_errors fallback=self.with_no_rows <|
mask (make_filter_column column filter)
_ : Function -> mask (column.map filter)
_ : Text ->
table_at = self.at column
if table_at.is_error.not then self.filter table_at filter on_problems else
expression = self.evaluate column
if expression.is_error.not then self.filter expression filter on_problems else
pick_error = expression.catch Expression_Error.Syntax_Error (_->table_at)
on_problems.handle_errors pick_error fallback=self
_ -> case on_problems.handle_errors (self.at column) fallback=Nothing of
Nothing -> self
resolved_column -> self.filter resolved_column filter on_problems
@ -922,7 +932,10 @@ type Table
Arguments:
- name: The name of the column to set the value of.
- column: The new value for the column called `name`.
- column: The new value for the column either a `Column` or `Text` of an
expression.
- on_problems: Specifies how to handle if a problem occurs, raising as a
warning by default.
If a column with the given name already exists, it will be replaced.
Otherwise a new column is added.
@ -937,13 +950,24 @@ type Table
table = Examples.inventory_table
double_inventory = table.at "total_stock" * 2
table.set "total_stock" double_inventory
set : Text -> Column | Vector.Vector -> Table
set self name column = case column of
_ : Vector.Vector ->
self.set name (Column.from_vector name column)
Column.Column_Data _ ->
table.set "total_stock_expr" "2 * [total_stock]"
set : Text -> Column | Vector.Vector | Text -> Problem_Behavior -> Table
set self name column on_problems=Report_Warning = case column of
_ : Text ->
expression = self.evaluate column
if expression.is_error.not then self.set name expression on_problems else
on_problems.handle_errors expression fallback=self
_ : Vector.Vector -> self.set name (Column.from_vector name column)
_ : Column ->
Table.Table_Data (self.java_table.addOrReplaceColumn (column.rename name . java_column))
## PRIVATE
evaluate : Text -> Column
evaluate self expression =
get_column name = self.at name
make_constant value = Column.from_vector (UUID.randomUUID.to_text) (Vector.new self.row_count _->value)
Expression.evaluate expression get_column make_constant "Standard.Table.Data.Column" "Column" Column.var_args_functions
## Returns the vector of columns contained in this table.
> Examples

View File

@ -192,10 +192,13 @@ type Table_Column_Helper
resolve_column_helper : (Integer | Text | Column) -> Problem_Builder -> a | Nothing
resolve_column_helper self selector problem_builder = case selector of
_ : Text ->
matched_columns = Matching.match_criteria_callback Text_Matcher.Case_Sensitive self.internal_columns [selector] reorder=True name_mapper=(_.name) problem_callback=problem_builder.report_missing_input_columns
matched_columns = self.internal_columns.filter column->(column.name==selector)
if matched_columns.length == 1 then matched_columns.first else
if matched_columns.length == 0 then Nothing else
Panic.throw (Illegal_State_Error_Data "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?")
if matched_columns.length != 0 then Panic.throw (Illegal_State_Error_Data "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?") else
expression = (self.table.evaluate selector).catch Any _->Nothing
if expression != Nothing then expression else
problem_builder.report_missing_input_columns [selector]
Nothing
_ : Integer -> case is_index_valid self.internal_columns.length selector of
True -> self.internal_columns.at selector
False ->

View File

@ -97,7 +97,7 @@ len_list list =
Arguments:
- act: The action to perform `count` number of times.
times : Integer-> List Any
times : Integer -> (Integer -> Any) -> List Any
times count act =
go = results -> number -> if number == 0 then results else
@Tail_Call go (Cons (act number) results) number-1

View File

@ -5,6 +5,7 @@ addSbtPlugin("org.scala-js" % "sbt-scalajs" % "1.10.1")
addSbtPlugin("com.typesafe.sbt" % "sbt-license-report" % "1.2.0")
addSbtPlugin("com.lightbend.sbt" % "sbt-java-formatter" % "0.7.0")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.6")
addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.3")
libraryDependencies += "io.circe" %% "circe-yaml" % "0.14.1"
libraryDependencies += "commons-io" % "commons-io" % "2.11.0"

View File

@ -0,0 +1,126 @@
grammar Expression;
prog: expr EOF ;
expr: expr op=POWER expr # Power
| expr op=(MULTIPLY|DIVIDE|MODULO) expr # MultDivMod
| expr op=(ADD|MINUS) expr # AddSub
| expr op=(EQUALS|NOT_EQUALS|LESS_THAN_OR_EQUAL|GREATER_THAN_OR_EQUAL|LESS_THAN|GREATER_THAN) expr # Compare
| expr (IS_NULL|IS_EMPTY|IS_NOT_EMPTY|IS_NOT_NULL) # IsNull
| expr (LIKE|NOT_LIKE) expr # Like
| expr (IN|NOT_IN) '(' expr (',' expr)* ')' # In
| expr (NOT_BETWEEN | BETWEEN) expr AND expr # Between
| UNARY_NOT expr # UnaryNot
| expr op=(AND | '&&') expr # And
| expr op=(OR | '||') expr # Or
| IF expr THEN expr ELSE expr END? # If
| IDENTIFIER '(' (expr (',' expr)*)? ')' # Function // This allows for functions of 0 or more arguments within brackets (e.g. PI(), SIN(1), MOD(3,4) etc.)
| '(' expr ')' # Paren
| COLUMN_NAME # Column
| MINUS expr # UnaryMinus
| value # Literal
;
POWER : '^';
MULTIPLY : '*';
DIVIDE : '/';
MODULO : '%';
ADD : '+';
MINUS : '-';
EQUALS : '==' | '=';
NOT_EQUALS : '!=' | '<>';
LESS_THAN_OR_EQUAL : '<=';
GREATER_THAN_OR_EQUAL : '>=';
LESS_THAN : '<';
GREATER_THAN : '>';
WHITESPACE : [ \t\r\n]+ -> skip;
fragment A:[aA];
fragment B:[bB];
fragment C:[cC];
fragment D:[dD];
fragment E:[eE];
fragment F:[fF];
fragment G:[gG];
fragment H:[hH];
fragment I:[iI];
fragment J:[jJ];
fragment K:[kK];
fragment L:[lL];
fragment M:[mM];
fragment N:[nN];
fragment O:[oO];
fragment P:[pP];
fragment Q:[qQ];
fragment R:[rR];
fragment S:[sS];
fragment T:[tT];
fragment U:[uU];
fragment V:[vV];
fragment W:[wW];
fragment X:[xX];
fragment Y:[yY];
fragment Z:[zZ];
fragment LETTER : [A-Za-z];
fragment DIGIT : [0-9];
fragment HEX : [0-9a-fA-F];
fragment IS : I S;
fragment EMPTY : E M P T Y;
AND : A N D ;
OR : O R ;
NULL : N U L L;
NOTHING : N O T H I N G;
IS_NULL: IS ' ' (NOTHING | NULL);
IS_NOT_NULL : IS ' ' N O T ' ' (NOTHING | NULL);
IS_EMPTY: IS ' ' EMPTY;
IS_NOT_EMPTY : IS ' ' N O T ' ' EMPTY;
LIKE : L I K E;
NOT_LIKE : N O T ' ' LIKE;
IN : I N;
NOT_IN : N O T ' ' IN;
BETWEEN : B E T W E E N;
NOT_BETWEEN : N O T ' ' BETWEEN;
TRUE : T R U E;
FALSE : F A L S E;
IF : I F;
THEN : T H E N;
ELSE : E L S E;
UNARY_NOT : (N O T) | '!';
END : E N D IF?;
IDENTIFIER : LETTER (LETTER|DIGIT|'_')*;
EXCEL_STRING : '"' ('""'|~'"')* '"';
PYTHON_STRING : '\'' (ESC|~['])* '\'';
fragment ESC : '\\' [abtnfrv"'\\] | '\\u' HEX HEX HEX HEX | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX | '\\x' HEX HEX;
fragment YEAR : DIGIT DIGIT DIGIT DIGIT;
fragment DATE_PART : '-' DIGIT DIGIT;
fragment HOUR : DIGIT DIGIT;
fragment TIME_PART : ':' DIGIT DIGIT;
fragment NANO_PART : '.' DIGIT DIGIT? DIGIT? DIGIT? DIGIT? DIGIT? DIGIT? DIGIT? DIGIT?;
fragment UTCOFFSET : ('Z' | ('+'|'-') HOUR TIME_PART?);
fragment TIMEZONE : '[' (~']')+ ']';
fragment INTEGER : '0' | [1-9] (DIGIT | '_')* ;
fragment DECIMAL : '.' (DIGIT | '_')+;
DATE : YEAR DATE_PART DATE_PART ;
TIME : HOUR TIME_PART (TIME_PART NANO_PART?)? ;
DATE_TIME : YEAR DATE_PART DATE_PART ('T' | ' ') HOUR TIME_PART (TIME_PART NANO_PART?)? UTCOFFSET? TIMEZONE? ;
NUMBER : INTEGER DECIMAL? ;
value
: (NULL | NOTHING) # nullOrNothing
| (TRUE | FALSE) # boolean
| '#' text=DATE '#' # date
| '#' text=TIME '#' # time
| '#' text=DATE_TIME '#' # datetime
| NUMBER # number
| EXCEL_STRING # excelString
| PYTHON_STRING # pythonString
;
COLUMN_NAME : '[' (']]'|~']')* ']';

View File

@ -1,28 +0,0 @@
package org.enso.table.data.column.operation.aggregate;
import org.enso.table.data.column.storage.Storage;
import java.util.stream.IntStream;
/**
* Represents a fold-like operation on a storage. An aggregator is usually created for a given
* storage, then {@link #nextGroup(IntStream)} is repeatedly called and the aggregator is
* responsible for collecting the results of such calls. After that, {@link #seal()} is called to
* obtain a storage containing all the results.
*/
public abstract class Aggregator {
/**
* Requests the aggregator to append the result of aggregating the values at the specified
* positions.
*
* @param positions the positions to aggregate in this round.
*/
public abstract void nextGroup(IntStream positions);
/**
* Returns the results of all previous {@link #nextGroup(IntStream)} calls.
*
* @return the storage containing all aggregation results.
*/
public abstract Storage<?> seal();
}

View File

@ -1,33 +0,0 @@
package org.enso.table.data.column.operation.aggregate;
import org.enso.table.data.column.storage.LongStorage;
import org.enso.table.data.column.storage.Storage;
import java.util.stream.IntStream;
/** Aggregates a storage by counting the non-missing values in each group. */
public class CountAggregator extends Aggregator {
private final Storage<?> storage;
private final long[] counts;
private int position = 0;
/**
* @param storage the storage used as data source
* @param resultSize the exact number of times {@link Aggregator#nextGroup(IntStream)} will be
* called.
*/
public CountAggregator(Storage<?> storage, int resultSize) {
this.storage = storage;
this.counts = new long[resultSize];
}
@Override
public void nextGroup(IntStream positions) {
counts[position++] = positions.filter(i -> !storage.isNa(i)).count();
}
@Override
public Storage<Long> seal() {
return new LongStorage(counts);
}
}

View File

@ -1,59 +0,0 @@
package org.enso.table.data.column.operation.aggregate;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.storage.Storage;
import org.graalvm.polyglot.Value;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
/** Aggregates the storage using a provided {@link Function}. */
public class FunctionAggregator extends Aggregator {
private final Function<List<Object>, Value> aggregateFunction;
private final boolean skipNa;
private final Storage<?> storage;
private final InferredBuilder builder;
/**
* @param aggregateFunction the function used to obtain aggregation of a group
* @param storage the storage serving as data source
* @param skipNa whether missing values should be passed to the function
* @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called
*/
public FunctionAggregator(
Function<List<Object>, Value> aggregateFunction,
Storage<?> storage,
boolean skipNa,
int resultSize) {
this.aggregateFunction = aggregateFunction;
this.storage = storage;
this.skipNa = skipNa;
this.builder = new InferredBuilder(resultSize);
}
@Override
public void nextGroup(IntStream positions) {
List<Object> items = getItems(positions);
Value result = aggregateFunction.apply(items);
Object converted = Polyglot_Utils.convertPolyglotValue(result);
builder.appendNoGrow(converted);
}
private List<Object> getItems(IntStream positions) {
Stream<Object> items = positions.mapToObj(storage::getItemBoxed);
if (skipNa) {
items = items.filter(Objects::nonNull);
}
return items.collect(Collectors.toList());
}
@Override
public Storage<?> seal() {
return builder.seal();
}
}

View File

@ -1,59 +0,0 @@
package org.enso.table.data.column.operation.aggregate.numeric;
import org.enso.table.data.column.operation.aggregate.Aggregator;
import org.enso.table.data.column.storage.LongStorage;
import org.enso.table.data.column.storage.Storage;
import java.util.BitSet;
import java.util.stream.IntStream;
import java.util.stream.LongStream;
/** An aggregator consuming a {@link LongStorage} and returning a {@link LongStorage} */
public abstract class LongToLongAggregator extends Aggregator {
private final LongStorage storage;
private final long[] items;
private final BitSet missing;
private int position = 0;
/**
* @param storage the data source
* @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called
*/
public LongToLongAggregator(LongStorage storage, int resultSize) {
this.storage = storage;
this.items = new long[resultSize];
this.missing = new BitSet();
}
/** Used by subclasses to return a missing value from a given group. */
protected void submitMissing() {
missing.set(position++);
}
/**
* Used by subclasses to return a value from a given group.
*
* @param value the return value of a group
*/
protected void submit(long value) {
items[position++] = value;
}
/**
* Runs the aggregation on a particular set of values.
*
* @param items the values contained in the current group
*/
protected abstract void runGroup(LongStream items);
@Override
public void nextGroup(IntStream positions) {
LongStream items = positions.filter(x -> !storage.isNa(x)).mapToLong(storage::getItem);
runGroup(items);
}
@Override
public Storage<Long> seal() {
return new LongStorage(items, items.length, missing);
}
}

View File

@ -1,78 +0,0 @@
package org.enso.table.data.column.operation.aggregate.numeric;
import org.enso.table.data.column.operation.aggregate.Aggregator;
import org.enso.table.data.column.storage.DoubleStorage;
import org.enso.table.data.column.storage.NumericStorage;
import org.enso.table.data.column.storage.Storage;
import java.util.BitSet;
import java.util.OptionalDouble;
import java.util.stream.DoubleStream;
import java.util.stream.IntStream;
/**
* An aggregator sourcing data from any {@link NumericStorage} and returning a {@link
* DoubleStorage}.
*/
public abstract class NumericAggregator extends Aggregator {
private final NumericStorage<?> storage;
private final long[] data;
private final BitSet missing;
private int position = 0;
/**
* @param storage the data source
* @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called
*/
public NumericAggregator(NumericStorage<?> storage, int resultSize) {
this.storage = storage;
this.data = new long[resultSize];
this.missing = new BitSet();
}
/**
* Runs the aggregation on a particular set of values.
*
* @param elements the values contained in the current group
*/
protected abstract void runGroup(DoubleStream elements);
/**
* Used by subclasses to return a value from a given group.
*
* @param value the return value of a group
*/
protected void submit(double value) {
data[position++] = Double.doubleToRawLongBits(value);
}
/**
* Used by subclasses to return a value from a given group.
*
* @param value the return value of a group
*/
protected void submit(OptionalDouble value) {
if (value.isPresent()) {
submit(value.getAsDouble());
} else {
submitMissing();
}
}
/** Used by subclasses to return a missing value from a given group. */
protected void submitMissing() {
missing.set(position++);
}
@Override
public void nextGroup(IntStream positions) {
DoubleStream elements =
positions.filter(i -> !storage.isNa(i)).mapToDouble(storage::getItemDouble);
runGroup(elements);
}
@Override
public Storage<Double> seal() {
return new DoubleStorage(data, data.length, missing);
}
}

View File

@ -11,11 +11,11 @@ import java.util.BitSet;
/** An operation expecting a numeric argument and returning a boolean. */
public abstract class LongNumericOp extends MapOperation<Long, LongStorage> {
private final boolean alwaysCast;
private final boolean alwaysCastToDouble;
public LongNumericOp(String name, boolean alwaysCast) {
public LongNumericOp(String name, boolean alwaysCastToDouble) {
super(name);
this.alwaysCast = true;
this.alwaysCastToDouble = alwaysCastToDouble;
}
public LongNumericOp(String name) {
@ -28,8 +28,7 @@ public abstract class LongNumericOp extends MapOperation<Long, LongStorage> {
@Override
public NumericStorage<?> runMap(LongStorage storage, Object arg) {
if (arg instanceof Long && !alwaysCast) {
long x = (Long) arg;
if (!alwaysCastToDouble && arg instanceof Long x) {
long[] newVals = new long[storage.size()];
for (int i = 0; i < storage.size(); i++) {
if (!storage.isNa(i)) {
@ -57,12 +56,16 @@ public abstract class LongNumericOp extends MapOperation<Long, LongStorage> {
BitSet newMissing = new BitSet();
for (int i = 0; i < storage.size(); i++) {
if (!storage.isNa(i) && i < v.size() && !v.isNa(i)) {
out[i] = doLong(storage.getItem(i), v.getItem(i));
out[i] = alwaysCastToDouble
? Double.doubleToRawLongBits(doDouble(storage.getItem(i), v.getItem(i)))
: doLong(storage.getItem(i), v.getItem(i));
} else {
newMissing.set(i);
}
}
return new LongStorage(out, storage.size(), newMissing);
return alwaysCastToDouble
? new DoubleStorage(out, storage.size(), newMissing)
: new LongStorage(out, storage.size(), newMissing);
} else if (arg instanceof DoubleStorage v) {
long[] out = new long[storage.size()];
BitSet newMissing = new BitSet();

View File

@ -2,6 +2,8 @@ package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.List;
import java.util.function.IntFunction;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
@ -168,21 +170,29 @@ public final class BoolStorage extends Storage<Boolean> {
}
public Storage<?> iif(Value when_true, Value when_false) {
Object on_true = Polyglot_Utils.convertPolyglotValue(when_true);
Object on_false = Polyglot_Utils.convertPolyglotValue(when_false);
var on_true = makeRowProvider(when_true);
var on_false = makeRowProvider(when_false);
InferredBuilder builder = new InferredBuilder(size);
for (int i = 0; i < size; i++) {
if (isMissing.get(i)) {
builder.append(null);
} else if (getItem(i)) {
builder.append(on_true);
builder.append(on_true.apply(i));
} else {
builder.append(on_false);
builder.append(on_false.apply(i));
}
}
return builder.seal();
}
private static IntFunction<Object> makeRowProvider(Value value) {
if (value.isHostObject() && value.asHostObject() instanceof Storage<?> s) {
return i->(Object)s.getItemBoxed(i);
}
var converted = Polyglot_Utils.convertPolyglotValue(value);
return i->converted;
}
private static MapOpStorage<Boolean, BoolStorage> buildOps() {
MapOpStorage<Boolean, BoolStorage> ops = new MapOpStorage<>();
ops.add(

View File

@ -1,12 +1,9 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.HashSet;
import java.util.List;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.builder.object.NumericBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.SpecializedIsInOp;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.numeric.DoubleBooleanOp;
import org.enso.table.data.column.operation.map.numeric.DoubleIsInOp;
@ -209,6 +206,13 @@ public final class DoubleStorage extends NumericStorage<Double> {
return a % b;
}
})
.add(
new DoubleNumericOp(Maps.POWER) {
@Override
protected double doDouble(double a, double b) {
return Math.pow(a, b);
}
})
.add(
new DoubleBooleanOp(Maps.LT) {
@Override

View File

@ -1,16 +1,9 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.HashSet;
import java.util.List;
import java.util.OptionalLong;
import java.util.stream.LongStream;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.builder.object.NumericBuilder;
import org.enso.table.data.column.operation.aggregate.Aggregator;
import org.enso.table.data.column.operation.aggregate.numeric.LongToLongAggregator;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.SpecializedIsInOp;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.numeric.LongBooleanOp;
import org.enso.table.data.column.operation.map.numeric.LongIsInOp;
@ -43,17 +36,13 @@ public final class LongStorage extends NumericStorage<Long> {
this(data, data.length, new BitSet());
}
/**
* @inheritDoc
*/
/** @inheritDoc */
@Override
public int size() {
return size;
}
/**
* @inheritDoc
*/
/** @inheritDoc */
@Override
public int countMissing() {
return isMissing.cardinality();
@ -77,17 +66,13 @@ public final class LongStorage extends NumericStorage<Long> {
return isMissing.get(idx) ? null : data[idx];
}
/**
* @inheritDoc
*/
/** @inheritDoc */
@Override
public int getType() {
return Type.LONG;
}
/**
* @inheritDoc
*/
/** @inheritDoc */
@Override
public boolean isNa(long idx) {
return isMissing.get((int) idx);
@ -108,46 +93,6 @@ public final class LongStorage extends NumericStorage<Long> {
return ops.runZip(name, this, argument);
}
@Override
protected Aggregator getVectorizedAggregator(String name, int resultSize) {
return switch (name) {
case Aggregators.SUM -> new LongToLongAggregator(this, resultSize) {
@Override
protected void runGroup(LongStream items) {
long[] elements = items.toArray();
if (elements.length == 0) {
submitMissing();
} else {
submit(LongStream.of(elements).sum());
}
}
};
case Aggregators.MAX -> new LongToLongAggregator(this, resultSize) {
@Override
protected void runGroup(LongStream items) {
OptionalLong r = items.max();
if (r.isPresent()) {
submit(r.getAsLong());
} else {
submitMissing();
}
}
};
case Aggregators.MIN -> new LongToLongAggregator(this, resultSize) {
@Override
protected void runGroup(LongStream items) {
OptionalLong r = items.min();
if (r.isPresent()) {
submit(r.getAsLong());
} else {
submitMissing();
}
}
};
default -> super.getVectorizedAggregator(name, resultSize);
};
}
private Storage<?> fillMissingDouble(double arg) {
final var builder = NumericBuilder.createDoubleBuilder(size());
long rawArg = Double.doubleToRawLongBits(arg);
@ -291,6 +236,19 @@ public final class LongStorage extends NumericStorage<Long> {
return in % arg;
}
})
.add(
new LongNumericOp(Maps.POWER, true) {
@Override
public double doDouble(long in, double arg) {
return Math.pow(in, arg);
}
@Override
public long doLong(long in, long arg) {
throw new IllegalStateException(
"Internal error: Power operation should cast to double.");
}
})
.add(
new LongNumericOp(Maps.DIV, true) {
@Override
@ -300,7 +258,7 @@ public final class LongStorage extends NumericStorage<Long> {
@Override
public long doLong(long in, long arg) {
return in / arg;
throw new UnsupportedOperationException("Divide operation should cast to double.");
}
})
.add(

View File

@ -1,9 +1,5 @@
package org.enso.table.data.column.storage;
import java.util.stream.DoubleStream;
import org.enso.table.data.column.operation.aggregate.Aggregator;
import org.enso.table.data.column.operation.aggregate.numeric.NumericAggregator;
/** A storage containing items representable as a {@code double}. */
public abstract class NumericStorage<T> extends Storage<T> {
/**
@ -14,45 +10,4 @@ public abstract class NumericStorage<T> extends Storage<T> {
* @return the value associated with {@code idx}
*/
public abstract double getItemDouble(int idx);
@Override
protected Aggregator getVectorizedAggregator(String name, int resultSize) {
switch (name) {
case Aggregators.MAX:
return new NumericAggregator(this, resultSize) {
@Override
protected void runGroup(DoubleStream elements) {
submit(elements.max());
}
};
case Aggregators.MIN:
return new NumericAggregator(this, resultSize) {
@Override
protected void runGroup(DoubleStream elements) {
submit(elements.min());
}
};
case Aggregators.SUM:
return new NumericAggregator(this, resultSize) {
@Override
protected void runGroup(DoubleStream elements) {
double[] its = elements.toArray();
if (its.length == 0) {
submitMissing();
} else {
submit(DoubleStream.of(its).sum());
}
}
};
case Aggregators.MEAN:
return new NumericAggregator(this, resultSize) {
@Override
protected void runGroup(DoubleStream elements) {
submit(elements.average());
}
};
default:
return super.getVectorizedAggregator(name, resultSize);
}
}
}

View File

@ -9,9 +9,6 @@ import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.builder.object.ObjectBuilder;
import org.enso.table.data.column.operation.aggregate.Aggregator;
import org.enso.table.data.column.operation.aggregate.CountAggregator;
import org.enso.table.data.column.operation.aggregate.FunctionAggregator;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
import org.graalvm.polyglot.Value;
@ -73,6 +70,7 @@ public abstract class Storage<T> {
public static final String SUB = "-";
public static final String DIV = "/";
public static final String MOD = "%";
public static final String POWER = "^";
public static final String NOT = "not";
public static final String AND = "&&";
public static final String OR = "||";
@ -86,14 +84,6 @@ public abstract class Storage<T> {
public static final String IS_IN = "is_in";
}
public static final class Aggregators {
public static final String SUM = "sum";
public static final String MEAN = "mean";
public static final String MAX = "max";
public static final String MIN = "min";
public static final String COUNT = "count";
}
/**
* Specifies if the given operation has a vectorized implementation available for this storage.
*/
@ -137,36 +127,6 @@ public abstract class Storage<T> {
return builder.seal();
}
protected Aggregator getVectorizedAggregator(String name, int resultSize) {
if (name.equals(Aggregators.COUNT)) {
return new CountAggregator(this, resultSize);
}
return null;
}
/**
* Returns an aggregator created based on the provided parameters.
*
* @param name name of a vectorized operation that can be used if possible. If null is passed,
* this parameter is unused.
* @param fallback the function to use if a vectorized operation is not available.
* @param skipNa whether missing values should be passed to the {@code fallback} function.
* @param resultSize the number of times the {@link
* Aggregator#nextGroup(java.util.stream.IntStream)} method will be called.
* @return an aggregator satisfying the above properties.
*/
public final Aggregator getAggregator(
String name, Function<List<Object>, Value> fallback, boolean skipNa, int resultSize) {
Aggregator result = null;
if (name != null) {
result = getVectorizedAggregator(name, resultSize);
}
if (result == null) {
result = new FunctionAggregator(fallback, this, skipNa, resultSize);
}
return result;
}
/**
* Runs a function on each non-missing element in this storage and gathers the results.
*

View File

@ -2,7 +2,6 @@ package org.enso.table.data.table;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.operation.aggregate.Aggregator;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.index.DefaultIndex;
@ -15,8 +14,6 @@ import org.graalvm.polyglot.Value;
import java.util.BitSet;
import java.util.List;
import java.util.function.Function;
import java.util.stream.IntStream;
/** A representation of a column. Consists of a column name and the underlying storage. */
public class Column {
@ -89,11 +86,11 @@ public class Column {
* @return the result of masking this column with the provided column
*/
public Column mask(Column maskCol) {
if (!(maskCol.getStorage() instanceof BoolStorage storage)) {
if (!(maskCol.getStorage() instanceof BoolStorage boolStorage)) {
throw new UnexpectedColumnTypeException("Boolean");
}
var mask = BoolStorage.toMask(storage);
var mask = BoolStorage.toMask(boolStorage);
var localStorageMask = new BitSet();
localStorageMask.set(0, getStorage().size());
mask.and(localStorageMask);
@ -156,25 +153,6 @@ public class Column {
return index;
}
/**
* Aggregates the values in this column, using a given aggregation operation.
*
* @param aggName name of a vectorized operation that can be used if possible. If null is passed,
* this parameter is unused.
* @param aggregatorFunction the function to use if a vectorized operation is not available.
* @param skipNa whether missing values should be passed to the {@code fallback} function.
* @return a column indexed by the unique index of this aggregate, storing results of applying the
* specified operation.
*/
public Object aggregate(
String aggName, Function<List<Object>, Value> aggregatorFunction, boolean skipNa) {
Aggregator aggregator = storage.getAggregator(aggName, aggregatorFunction, skipNa, 1);
IntStream ixes = IntStream.range(0, storage.size());
aggregator.nextGroup(ixes);
return aggregator.seal().getItemBoxed(0);
}
/**
* @param mask the reordering to apply
* @return a new column, resulting from reordering this column according to {@code mask}.

View File

@ -0,0 +1,373 @@
package org.enso.table.expressions;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.PolyglotException;
import org.graalvm.polyglot.Value;
import java.time.LocalDate;
import java.time.LocalTime;
import java.time.LocalDateTime;
import java.time.ZonedDateTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Pattern;
public class ExpressionVisitorImpl extends ExpressionBaseVisitor<Value> {
private static class ThrowOnErrorListener extends BaseErrorListener {
public static final ThrowOnErrorListener INSTANCE = new ThrowOnErrorListener();
@Override
public void syntaxError(
Recognizer<?, ?> recognizer,
Object offendingSymbol,
int line,
int charPositionInLine,
String msg,
RecognitionException e)
throws SyntaxErrorException {
throw new SyntaxErrorException(msg, line, charPositionInLine);
}
}
public static class SyntaxErrorException extends RuntimeException {
private final int line;
private final int column;
public SyntaxErrorException(String message, int line, int column) {
super(message);
this.line = line;
this.column = column;
}
public int getLine() {
return line;
}
public int getColumn() {
return column;
}
}
public static Value evaluate(
String expression,
Function<String, Value> getColumn,
Function<Object, Value> makeConstantColumn,
String moduleName,
String typeName,
String[] variableArgumentFunctions)
throws UnsupportedOperationException, IllegalArgumentException {
var lexer = new ExpressionLexer(CharStreams.fromString(expression));
lexer.removeErrorListeners();
lexer.addErrorListener(ThrowOnErrorListener.INSTANCE);
var tokens = new CommonTokenStream(lexer);
var parser = new ExpressionParser(tokens);
parser.removeErrorListeners();
parser.addErrorListener(ThrowOnErrorListener.INSTANCE);
var visitor =
new ExpressionVisitorImpl(
getColumn, makeConstantColumn, moduleName, typeName, variableArgumentFunctions);
var expr = parser.prog();
return visitor.visit(expr);
}
private final Function<String, Value> getColumn;
private final Function<Object, Value> makeConstantColumn;
private final Function<String, Value> getMethod;
private final Set<String> variableArgumentFunctions;
private ExpressionVisitorImpl(
Function<String, Value> getColumn,
Function<Object, Value> makeConstantColumn,
String moduleName,
String typeName,
String[] variableArgumentFunctions) {
this.getColumn = getColumn;
this.makeConstantColumn = makeConstantColumn;
final Value module =
Context.getCurrent().getBindings("enso").invokeMember("get_module", moduleName);
final Value type = module.invokeMember("get_type", typeName);
this.getMethod = name -> module.invokeMember("get_method", type, name);
this.variableArgumentFunctions = new HashSet<>(Arrays.asList(variableArgumentFunctions));
}
private Value wrapAsColumn(Value value) {
if (value.isNull()) {
return makeConstantColumn.apply(value);
}
var metaObject = value.getMetaObject();
return metaObject != null && metaObject.asHostObject() instanceof Class<?>
? makeConstantColumn.apply(value)
: value;
}
private Value executeMethod(String name, Value... args) {
Value method = getMethod.apply(name);
if (!method.canExecute()) {
throw new UnsupportedOperationException(name);
}
Object[] objects;
if (this.variableArgumentFunctions.contains(name)) {
objects = new Object[2];
objects[0] = args[0];
objects[1] = Arrays.copyOfRange(args, 1, args.length, Object[].class);
} else {
objects = Arrays.copyOf(args, args.length, Object[].class);
}
objects[0] = wrapAsColumn(args[0]);
try {
var result = method.execute(objects);
if (result.canExecute()) {
throw new IllegalArgumentException("Insufficient arguments for method " + name + ".");
}
return result;
} catch (PolyglotException e) {
if (e.getMessage().startsWith("Type error: expected a function")) {
throw new IllegalArgumentException("Too many arguments for method " + name + ".");
}
throw e;
}
}
@Override
public Value visitProg(ExpressionParser.ProgContext ctx) {
Value base = visit(ctx.expr());
return wrapAsColumn(base);
}
@Override
public Value visitColumn(ExpressionParser.ColumnContext ctx) {
var text = ctx.getText();
return getColumn.apply(text.substring(1, text.length() - 1).replace("]]", "]"));
}
@Override
public Value visitPower(ExpressionParser.PowerContext ctx) {
return executeMethod("^", visit(ctx.expr(0)), visit(ctx.expr(1)));
}
@Override
public Value visitMultDivMod(ExpressionParser.MultDivModContext ctx) {
return executeMethod(ctx.op.getText(), visit(ctx.expr(0)), visit(ctx.expr(1)));
}
@Override
public Value visitCompare(ExpressionParser.CompareContext ctx) {
var op = ctx.op.getText();
if (op.equals("=")) {
op = "==";
}
if (op.equals("<>")) {
op = "!=";
}
return executeMethod(op, visit(ctx.expr(0)), visit(ctx.expr(1)));
}
@Override
public Value visitLike(ExpressionParser.LikeContext ctx) {
var condition = executeMethod("like", visit(ctx.expr(0)), visit(ctx.expr(1)));
return ctx.NOT_LIKE() != null ? executeMethod("not", condition) : condition;
}
@Override
public Value visitIsNull(ExpressionParser.IsNullContext ctx) {
var op = ctx.IS_NULL() != null || ctx.IS_NOT_NULL() != null ? "is_missing" : "is_empty";
var condition = executeMethod(op, visit(ctx.expr()));
return ctx.IS_NOT_NULL() != null || ctx.IS_NOT_EMPTY() != null
? executeMethod("not", condition)
: condition;
}
@Override
public Value visitIf(ExpressionParser.IfContext ctx) {
return executeMethod("iif", visit(ctx.expr(0)), visit(ctx.expr(1)), visit(ctx.expr(2)));
}
@Override
public Value visitAddSub(ExpressionParser.AddSubContext ctx) {
return executeMethod(ctx.op.getText(), visit(ctx.expr(0)), visit(ctx.expr(1)));
}
@Override
public Value visitAnd(ExpressionParser.AndContext ctx) {
return executeMethod("&&", visit(ctx.expr(0)), visit(ctx.expr(1)));
}
@Override
public Value visitOr(ExpressionParser.OrContext ctx) {
return executeMethod("||", visit(ctx.expr(0)), visit(ctx.expr(1)));
}
@Override
public Value visitUnaryNot(ExpressionParser.UnaryNotContext ctx) {
return executeMethod("not", visit(ctx.expr()));
}
@Override
public Value visitUnaryMinus(ExpressionParser.UnaryMinusContext ctx) {
return executeMethod("*", visit(ctx.expr()), Value.asValue(-1));
}
@Override
public Value visitNullOrNothing(ExpressionParser.NullOrNothingContext ctx) {
return Value.asValue(null);
}
@Override
public Value visitBoolean(ExpressionParser.BooleanContext ctx) {
return Value.asValue(ctx.TRUE() != null);
}
@Override
public Value visitNumber(ExpressionParser.NumberContext ctx) {
var text = ctx.getText().replace("_", "");
if (text.contains(".")) {
return Value.asValue(Double.parseDouble(text));
} else {
return Value.asValue(Long.parseLong(text));
}
}
@Override
public Value visitExcelString(ExpressionParser.ExcelStringContext ctx) {
var text = ctx.getText();
return Value.asValue(text.substring(1, text.length() - 1).replace("\"\"", "\""));
}
private static final Pattern pythonRegex = Pattern.compile("(\\\\[abtnfrv\"'\\\\])|(\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{4}))|\\\\|([^\\\\]+)");
private static String unescapePython(String text) {
var matcher = pythonRegex.matcher(text);
var builder = new StringBuilder(text.length());
while (matcher.find()) {
if (matcher.group(1) != null) {
builder.append(switch (matcher.group(1).charAt(1)) {
case 'a' -> (char) 0x07;
case 'f' -> (char) 0x0c;
case 'b' -> '\b';
case 't' -> '\t';
case 'r' -> '\r';
case 'n' -> '\n';
case 'v' -> (char) 0x0b;
case '\\' -> '\\';
case '\'' -> '\'';
case '"' -> '"';
default -> throw new IllegalArgumentException("Unknown Python escape sequence.");
});
} else if (matcher.group(2) != null) {
builder.append((char) Integer.parseInt(matcher.group(2).substring(2), 16));
} else {
builder.append(matcher.group(0));
}
}
return builder.toString();
}
@Override
public Value visitPythonString(ExpressionParser.PythonStringContext ctx) {
var text = ctx.getText();
return Value.asValue(unescapePython(text.substring(1, text.length() - 1)));
}
@Override
public Value visitDate(ExpressionParser.DateContext ctx) {
var text = ctx.text.getText();
try {
return Value.asValue(LocalDate.parse(ctx.text.getText()));
} catch (DateTimeParseException e) {
throw new SyntaxErrorException(
"Invalid Date format: " + text,
ctx.getStart().getLine(),
ctx.getStart().getCharPositionInLine());
}
}
@Override
public Value visitTime(ExpressionParser.TimeContext ctx) {
var text = ctx.text.getText();
try {
return Value.asValue(LocalTime.parse(ctx.text.getText()));
} catch (DateTimeParseException e) {
throw new SyntaxErrorException(
"Invalid Time format: " + text,
ctx.getStart().getLine(),
ctx.getStart().getCharPositionInLine());
}
}
@Override
public Value visitDatetime(ExpressionParser.DatetimeContext ctx) {
var text = ctx.text.getText().replace(' ', 'T');
var timezone = text.contains("[") ? text.substring(text.indexOf('[')) : "";
text = text.substring(0, text.length() - timezone.length());
var zoneId =
timezone.equals("")
? ZoneId.systemDefault()
: ZoneId.of(timezone.substring(1, timezone.length() - 1));
try {
var zonedDateTime =
ZonedDateTime.parse(text, DateTimeFormatter.ISO_OFFSET_DATE_TIME.withZone(zoneId));
return Value.asValue(zonedDateTime);
} catch (DateTimeParseException ignored) {
}
try {
var localDateTime = LocalDateTime.parse(text);
return Value.asValue(localDateTime.atZone(zoneId));
} catch (DateTimeParseException e) {
throw new SyntaxErrorException(
"Invalid Date_Time format: " + text,
ctx.getStart().getLine(),
ctx.getStart().getCharPositionInLine());
}
}
@Override
public Value visitParen(ExpressionParser.ParenContext ctx) {
return visit(ctx.expr());
}
@Override
public Value visitBetween(ExpressionParser.BetweenContext ctx) {
var self = visit(ctx.expr(0));
var lower = visit(ctx.expr(1));
var upper = visit(ctx.expr(2));
var condition = executeMethod("between", self, lower, upper);
return ctx.NOT_BETWEEN() != null ? executeMethod("not", condition) : condition;
}
@Override
public Value visitIn(ExpressionParser.InContext ctx) {
var args = ctx.expr().stream().map(this::visit).toArray(Value[]::new);
var condition = executeMethod("is_in", args);
return ctx.NOT_IN() != null ? executeMethod("not", condition) : condition;
}
@Override
public Value visitFunction(ExpressionParser.FunctionContext ctx) {
var name = ctx.IDENTIFIER().getText().toLowerCase();
var args = ctx.expr().stream().map(this::visit).toArray(Value[]::new);
return executeMethod(name, args);
}
}

View File

@ -53,11 +53,9 @@ spec = Test.group "Columns" <|
test_column.take (Last 7) . to_vector . should_equal expected_2.to_vector
test_column.take (Last 0) . to_vector . should_equal expected_3.to_vector
Test.specify "should be able to get the first / head element" <|
Test.specify "should be able to get the first element" <|
test_column.first . should_equal 1
test_column.head . should_equal 1
empty_column.first.should_fail_with Empty_Error
empty_column.head.should_fail_with Empty_Error
Test.specify "should be able to get the last element" <|
test_column.last . should_equal 6
@ -85,25 +83,12 @@ spec = Test.group "Columns" <|
col = Examples.decimal_column.set_index Examples.integer_column
col.index.to_vector . should_equal Examples.integer_column.to_vector
Test.specify "should allow computing variance and standard deviation" <|
const = Column.from_vector 'const' [1, 1, 1, 1, 1]
const.variance . should_equal 0
const.standard_deviation.should_equal 0
rand = Column.from_vector 'random' [10.0, 4.2, 6.8, 6.2, 7.2]
rand.variance . should_equal 4.372
rand.variance degrees_of_freedom_correction=0 . should_equal 3.4976
rand.standard_deviation . should_equal 2.090932806 epsilon=(10 ^ -6)
rand.standard_deviation degrees_of_freedom_correction=0 . should_equal 1.870187156 epsilon=(10 ^ -6)
Test.specify "should allow computing the R² score of a prediction" <|
sample = Column.from_vector 'sample' [1,2,3,4,5]
mean_pred = Column.from_vector 'mean' [3,3,3,3,3]
perfect_pred = Column.from_vector 'perfect' [1,2,3,4,5]
bad_pred = Column.from_vector 'bad' [5,4,3,2,1]
sample.r_squared mean_pred . should_equal 0
sample.r_squared perfect_pred . should_equal 1
sample.r_squared bad_pred . should_equal -3
Test.specify "should result in correct Storage if operation allows it" <|
another = Column.from_vector "Test" [10, 20, 30, 40, 50, 60]
(test_column + 1).storage_type . should_equal Storage.Integer
(test_column - 1).storage_type . should_equal Storage.Integer
(test_column * 2).storage_type . should_equal Storage.Integer
(test_column * 1.5).storage_type . should_equal Storage.Decimal
(test_column + another).storage_type . should_equal Storage.Integer
main = Test_Suite.run_main spec

View File

@ -79,7 +79,7 @@ spec =
Test.specify "should support simple text operations" <|
b = t1.at "B"
add = b + "SUFFIX"
add.to_sql.prepare . should_equal ['SELECT ("T1"."B" + ?) AS "B" FROM "T1" AS "T1"', [["SUFFIX", str]]]
add.to_sql.prepare . should_equal ['SELECT ("T1"."B" || ?) AS "B" FROM "T1" AS "T1"', [["SUFFIX", str]]]
ends = b.ends_with "suf"
starts = b.starts_with "pref"

View File

@ -12,8 +12,6 @@ from Standard.Test import Test, Problems
import project.Database.Helpers.Name_Generator
spec prefix connection pending=Nothing =
eps=0.000001
tables_to_clean = Vector.new_builder
upload prefix data =
name = Name_Generator.random_name prefix
@ -185,16 +183,6 @@ spec prefix connection pending=Nothing =
col.count . should_equal 3
col.count_missing . should_equal 2
Test.group prefix+"Column-wide statistics" pending=pending <|
Test.specify 'should allow computing basic column-wide stats' <|
t7 = upload "T7" <|
Table.new [['price', [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]]]
price = t7.at 'price'
price.sum.should_equal 107.6 epsilon=eps
price.min.should_equal 0.4 epsilon=eps
price.max.should_equal 97 epsilon=eps
price.mean.should_equal 26.9 epsilon=eps
Test.group prefix+"Sorting" pending=pending <|
df = upload "clothes" <|
Table.new [["id", [1,2,3,4,5,6]], ["name", ["shoes","trousers","dress","skirt","blouse","t-shirt"]], ["quantity", [20,10,20,10,30,30]], ["rating", [3.0,Nothing,7.3,3.0,2.2,Nothing]], ["price", [37.2,42.1,64.1,87.4,13.5,64.2]]]

View File

@ -0,0 +1,290 @@
from Standard.Base import all
from Standard.Base.Error.Problem_Behavior import Report_Error
from Standard.Table import Table, Column, Sort_Column, Column_Selector, Sort_Column_Selector, Aggregate_Column
import Standard.Table.Data.Expression.Expression_Error
import Standard.Visualization
from Standard.Test import Test, Test_Suite, Problems
import project.Common_Table_Spec
from project.Util import all
spec detailed=False =
# Create Test Table
column_a = ["A", [1, 2, 3, 4, 5]]
column_b = ["B", [1.0, 1.5, 2.5, 4, 6]]
column_c = ["C", ["Hello", "World", "Hello World!", "", Nothing]]
column_odd = ["Bad] Name", [True, False, True, False, True]]
test_table = Table.new [column_a, column_b, column_c, column_odd]
epsilon=0.0000000001
tester expression value =
new_table = test_table.set "NEW_COL" expression
new_table.column_count . should_equal (test_table.column_count + 1)
new_column = new_table.at -1
new_column.name . should_equal "NEW_COL"
expected = case value of
_ : Vector.Vector -> value
_ -> Vector.new test_table.row_count _->value
values = new_column.to_vector
values . each_with_index i->v->
e = expected.at i
match = case e of
_ : Number -> e.equals v epsilon
_ -> e == v
if match.not then values.should_equal expected
specify_test label action expression_test=tester =
case detailed of
True ->
specify_tester expression value =
Test.specify (label + ": " + expression) <|
expression_test expression value
action specify_tester
False ->
Test.specify label (action expression_test)
Test.group "Expression Integer literals" <|
specify_test "should be able to add an integer column" expression_test->
expression_test "1" 1
expression_test "-3" -3
expression_test "1_000" 1000
Test.group "Expression Decimal literals" <|
specify_test "should be able to add an decimal column" expression_test->
expression_test "1.23" 1.23
expression_test "-3.1415" -3.1415
expression_test "1_000.456" 1000.456
Test.group "Expression Boolean literals" <|
specify_test "should be able to add a boolean column" expression_test->
expression_test "True" True
expression_test "true" True
expression_test "TRUE" True
expression_test "tRuE" True
expression_test "False" False
expression_test "false" False
expression_test "FALSE" False
expression_test "FaLsE" False
Test.group "Expression Text literals" <|
specify_test "should be able to add a text column" expression_test->
expression_test "'Hello World'" 'Hello World'
expression_test "'Hello \'World\''" "Hello 'World'"
expression_test '"Hello World"' 'Hello World'
expression_test '"Hello ""World"""' 'Hello "World"'
expression_test '"Hello \\""World"""' 'Hello \\"World"'
expression_test "'Alpha\r\n\gBeta'" 'Alpha\r\n\\gBeta'
Test.group "Expression Text literals" <|
specify_test "should be able to get a Column" expression_test->
expression_test "[A]" (column_a.at 1)
expression_test "[Bad]] Name]" (column_odd.at 1)
Test.group "Expression Nothing literals" <|
specify_test "should be able to add an nothing column" expression_test->
expression_test "null" Nothing
expression_test "nUlL" Nothing
expression_test "Nothing" Nothing
expression_test "NOTHING" Nothing
Test.group "Expression Date and Time literals" <|
specify_test "should be able to add a date or time column" expression_test->
expression_test "#2020-12-23#" (Date.new 2020 12 23)
expression_test "#12:34#" (Time_Of_Day.new 12 34)
expression_test "#12:34:56#" (Time_Of_Day.new 12 34 56)
expression_test "#12:34:56.789#" (Time_Of_Day.new 12 34 56 789000000)
expression_test "#12:34:56.789000123#" (Time_Of_Day.new 12 34 56 789000123)
expression_test "#2020-12-23 12:34#" (Date_Time.new 2020 12 23 12 34)
expression_test "#2020-12-23 12:34:56#" (Date_Time.new 2020 12 23 12 34 56)
expression_test "#2020-12-23 12:34:56Z[UTC]#" (Date_Time.new 2020 12 23 12 34 56 zone=Time_Zone.utc)
expression_test "#2020-12-23 12:34:56+02:30[UTC]#" (Date_Time.new 2020 12 23 10 04 56 zone=Time_Zone.utc)
expression_test "#2020-12-23 12:34:56.157+01[UTC]#" (Date_Time.new 2020 12 23 11 34 56 157000000 zone=Time_Zone.utc)
expression_test "#2020-12-23T12:34[Europe/Warsaw]#" (Date_Time.new 2020 12 23 12 34 zone=Time_Zone.parse("Europe/Warsaw"))
Test.group "Expression Arithmetic" <|
specify_test "should be able to do basic arithmetic" expression_test->
expression_test "1+1" 2
expression_test "23-15" 8
expression_test "2.5*4.2" 10.5
expression_test "1_000.456/2" 500.228
expression_test "2^4" 16
expression_test "11%3" 2
specify_test "should be able to do basic arithmetic with order" expression_test->
expression_test "1+1*2+2" 5
expression_test "23-15/3+6" 24
expression_test "52.92/4.2^2" 3
expression_test "(1+1)*2+2" 6
specify_test "should be able to do basic arithmetic with whitespace" expression_test->
expression_test "1 + 1" 2
expression_test " 23 -15 " 8
expression_test "2.5* 4.2" 10.5
expression_test "1_000.456/ 2" 500.228
expression_test " 2 ^ 4 " 16
expression_test " 11 % 3 " 2
expression_test "1+1 * 2" 3
expression_test "1 + 1*2" 3
Test.group "Column Arithmetic" <|
specify_test "should be able to perform arithmetic on columns" expression_test->
expression_test "[A] + 2" [3, 4, 5, 6, 7]
expression_test "[B] - 2" [-1, -0.5, 0.5, 2, 4]
expression_test "[A] * 4" [4, 8, 12, 16, 20]
expression_test "[B] / 2" [0.5, 0.75, 1.25, 2, 3]
expression_test "[A] + [B]" [2, 3.5, 5.5, 8, 11]
expression_test "[A] - [B]" [0, 0.5, 0.5, 0, -1]
expression_test "[A] * [B]" [1, 3, 7.5, 16, 30]
expression_test "[B] / [A]" [1, 0.75, 0.8333333333333334, 1, 1.2]
expression_test "[A] ^ [B]" [1, 2.8284271247461903, 15.588457268119896, 256, 15625]
expression_test "[A] % [B]" [0, 0.5, 0.5, 0, 5]
expression_test "[A] + [B] + [A]" [3, 5.5, 8.5, 12, 16]
expression_test "[A] - [B] - [A]" [-1, -1.5, -2.5, -4, -6]
expression_test "[A] * [B] * [A]" [1, 6, 22.5, 64, 150]
expression_test "[A] / [B] / [A]" [1.0, 0.6666666666666667, 0.39999999999999999, 0.25, 0.16666666666666667]
expression_test "[A] ^ [B] * [A]" [1, 5.65685424949238, 46.7653718043597, 1024, 78125]
expression_test "[A] % [B] % [A]" [0, 0.5, 0.5, 0, 0]
specify_test "should be able to perform arithmetic on columns with order" expression_test->
expression_test "([A] + [B]) * 3" [6, 10.5, 16.5, 24, 33]
expression_test "[A] * (4 + [B])" [5, 11, 19.5, 32, 50]
expression_test "[A] * [B] + [A]" [2, 5, 10.5, 20, 35]
expression_test "[A] + [B] * [B]" [2, 4.25, 9.25, 20, 41]
expression_test "([A] + [B]) / [A]" [2, 1.75, 1.83333333333333, 2, 2.2]
expression_test "[A] / [B] + 2" [3, 3.33333333333333, 3.2, 3, 2.83333333333333]
expression_test "([A] + [B]) % 4" [2, 3.5, 1.5, 0, 3]
expression_test "[A] % [B] + 2" [2, 2.5, 2.5, 2, 7]
expression_test "([A] - [B]) ^ [A]" [0, 0.25, 0.125, 0, -1]
expression_test "[A] ^ ([B] - [A])" [1, 0.707106781186547, 0.577350269189626, 1, 5]
Test.group "Comparison Operators" <|
specify_test "should be able to compare equality" expression_test->
expression_test "2 = 1 + 1" True
expression_test "2 == 1 + 1" True
expression_test "[A] = 2" [False, True, False, False, False]
expression_test "[A] == 2" [False, True, False, False, False]
expression_test "3 != 1 + 1" True
expression_test "3 <> 1 + 1" True
expression_test "[A] != 2" [True, False, True, True, True]
expression_test "[A] <> 2" [True, False, True, True, True]
specify_test "should be able to compare ordering" expression_test->
expression_test "1 > 2" False
expression_test "1 < 2" True
expression_test "[A] > 2" [False, False, True, True, True]
expression_test "[A] >= 2" [False, True, True, True, True]
expression_test "[A] < 2" [True, False, False, False, False]
expression_test "[A] <= 2" [True, True, False, False, False]
specify_test "should be able to use between" expression_test->
expression_test "1 + 1 BETWEEN 1 AND 3" True
expression_test "1 + 1 between 2 AND 3" True
expression_test "1 + 1 bETWEEN 1 AND 2" True
expression_test "[A] between 2 AND 3" [False, True, True, False, False]
expression_test "1 + 1 NOT BETWEEN 1 AND 3" False
expression_test "[A] not between 2 AND 3" [True, False, False, True, True]
specify_test "should be able to use in" expression_test->
expression_test "1 + 1 IN (2, 4, 6)" True
expression_test "[A] IN (2, 4, 6)" [False, True, False, True, False]
expression_test "1 + 1 NOT IN (2, 4, 6)" False
expression_test "[A] NOT IN (2, 4, 6)" [True, False, True, False, True]
expression_test "[A] IN (3)" [False, False, True, False, False]
expression_test "[A] NOT IN (3)" [True, True, False, True, True]
specify_test "should be able to check null" expression_test->
expression_test "1 IS NULL" False
expression_test "1 IS NoTHing" False
expression_test "Nothing IS NULL" True
expression_test "1 IS NOT NULL" True
expression_test "Nothing IS NOT NULL" False
expression_test "[A] IS NULL" [False, False, False, False, False]
expression_test "[C] IS NULL" [False, False, False, False, True]
expression_test "[A] IS NOT NULL" [True, True, True, True, True]
expression_test "[C] IS NOT NULL" [True, True, True, True, False]
specify_test "should be able to check empty" expression_test->
expression_test "'Hello World' IS EMPTY" False
expression_test "'' IS EMPTY" True
expression_test "Nothing IS EMPTY" True
expression_test "'Hello World' IS NOT EMPTY" True
expression_test "'' IS NOT EMPTY" False
expression_test "Nothing IS NOT EMPTY" False
Test.group "Text Operators" <|
specify_test "should be able to concatenate text" expression_test->
expression_test "'Hello ' + 'World'" "Hello World"
expression_test "[C] + ' World'" ["Hello World", "World World", "Hello World! World", " World", Nothing]
expression_test "'Hello ' + [C]" ["Hello Hello", "Hello World", "Hello Hello World!", "Hello ", Nothing]
expression_test "[C] + [C]" ["HelloHello", "WorldWorld", "Hello World!Hello World!", "", Nothing]
specify_test "should be able to use like" expression_test->
expression_test "'Hello World' LIKE 'Hello%'" True
expression_test "'Hello' LIKE 'H_llo'" True
expression_test "'Hello' LIKE 'H_l%'" True
expression_test "'Hello' LIKE 'H___o'" True
expression_test "'World' LIKE 'H___o'" False
expression_test "'Hello World' NOT LIKE 'Hello%'" False
expression_test "[C] LIKE 'Hello%'" [True, False, True, False, Nothing]
expression_test "[C] NOT LIKE 'Hello%'" [False, True, False, True, Nothing]
Test.group "Boolean Operators" <|
specify_test "should be able to AND booleans" expression_test->
expression_test "True && TRUE" True
expression_test "True AND False" False
expression_test "True && [Bad]] Name]" [True, False, True, False, True]
expression_test "False AND [Bad]] Name]" False
specify_test "should be able to OR booleans" expression_test->
expression_test "True || TRUE" True
expression_test "True OR False" True
expression_test "False OR False" False
expression_test "True OR [Bad]] Name]" True
expression_test "False || [Bad]] Name]" [True, False, True, False, True]
specify_test "should be able to NOT booleans" expression_test->
expression_test "!TRUE" False
expression_test "Not False" True
expression_test "NOT [Bad]] Name]" [False, True, False, True, False]
specify_test "should be able to use IF" expression_test->
expression_test "IF True THEN 1 ELSE 0" 1
expression_test "IF False THEN 'A' ELSE 'B' END" 'B'
expression_test "IF [Bad]] Name] THEN [A] ELSE [B] ENDIF" [1, 1.5, 3, 4, 5]
Test.group "Function invocation" <|
specify_test "should be able to call a function with arguments" expression_test->
expression_test "Not(True)" False
expression_test "not(False)" True
expression_test "iif(True, 1, 3)" 1
expression_test "iif([Bad]] Name], 2, 3)" [2, 3, 2, 3, 2]
specify_test "should be able to call a variable args function" expression_test->
expression_test "min(10, 3, 8)" 3
expression_test "max([A], [B], 3)" [3, 3, 3, 4, 6]
Test.group "Errors should be handled" <|
error_tester expression fail_type =
test_table.set "NEW_COL" expression on_problems=Problem_Behavior.Report_Error . should_fail_with fail_type
test_table.set "NEW_COL" expression . column_count . should_equal test_table.column_count
specify_test "should fail with Syntax_Error if badly formed" expression_test=error_tester expression_test->
expression_test "IIF [A] THEN 1 ELSE 2" Expression_Error.Syntax_Error
expression_test "A + B" Expression_Error.Syntax_Error
expression_test "#2022-31-21#" Expression_Error.Syntax_Error
specify_test "should fail with Unsupported_Operation if not sufficient arguments" expression_test=error_tester expression_test->
expression_test "unknown([C])" Expression_Error.Unsupported_Operation
specify_test "should fail with Argument_Mismatch if not sufficient arguments" expression_test=error_tester expression_test->
expression_test "starts_with([C])" Expression_Error.Argument_Mismatch
specify_test "should fail with Argument_Mismatch if too many arguments" expression_test=error_tester expression_test->
expression_test "starts_with([C], 'Hello', 'World')" Expression_Error.Argument_Mismatch
main = Test_Suite.run_main (spec True)

View File

@ -5,8 +5,10 @@ from Standard.Test import Test_Suite
import project.In_Memory_Tests
import project.Database.Main as Database_Tests
import project.Data_Formatter_Spec
import project.Expression_Spec
main = Test_Suite.run_main <|
In_Memory_Tests.in_memory_spec
Database_Tests.databases_spec
Data_Formatter_Spec.spec
Expression_Spec.spec
Database_Tests.databases_spec

View File

@ -425,14 +425,6 @@ spec =
i.at "Items Count" . to_vector . should_equal [3, 2, 4]
i.at "Storage Type" . to_vector . should_equal [Storage.Text, Storage.Integer, Storage.Any]
Test.group "Column-wide statistics" <|
Test.specify 'should allow computing basic column-wide stats' <|
price = Column.from_vector 'price' [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]
price.sum.should_equal 107.6
price.min.should_equal 0.4
price.max.should_equal 97
price.mean.should_equal 26.9
Test.group "Sorting Tables" <|
df = (enso_project.data / "clothes.csv").read

View File

@ -0,0 +1 @@
~ Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.

View File

@ -0,0 +1 @@
Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.

View File

@ -1,3 +1,3 @@
3D20F317407799FC2002CA1A005A2F5CDBFE3A082AD7BA59D08F04270EF9B88C
0DF140BB506529B02B8A79B1E32040D7B4515E690EB2C8F32B7F74DD0E821719
840031EDBA6D7166EE1BABF8D1AB65F7219F5258683A2D487D12D3D4B8387BD7
4BC5787A7330388C3B8BF8C5955FEFB57E57CB47DFAA243180AF0DA066E3D0D6
0