diff --git a/CHANGELOG.md b/CHANGELOG.md index f7a1b9e1fc9..342ff3ec046 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -230,6 +230,8 @@ - [Implemented `Table.rows` giving access to a vector of rows.][3827] - [Define Enso epoch start as 15th October 1582][3804] - [Implemented `Period` type][3818] +- [Implemented new functions on Column and added expression syntax support to + create derived Columns.][3782] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -368,6 +370,7 @@ [3818]: https://github.com/enso-org/enso/pull/3818 [3776]: https://github.com/enso-org/enso/pull/3776 [3836]: https://github.com/enso-org/enso/pull/3836 +[3782]: https://github.com/enso-org/enso/pull/3782 #### Enso Compiler diff --git a/build.sbt b/build.sbt index 5e01534dba7..30225f2a4ca 100644 --- a/build.sbt +++ b/build.sbt @@ -1872,17 +1872,26 @@ lazy val `std-base` = project lazy val `std-table` = project .in(file("std-bits") / "table") + .enablePlugins(Antlr4Plugin) .settings( frgaalJavaCompilerSetting, autoScalaLibrary := false, Compile / packageBin / artifactPath := `table-polyglot-root` / "std-table.jar", + Antlr4 / antlr4PackageName := Some("org.enso.table.expressions"), + Antlr4 / antlr4Version := "4.10.1", + Antlr4 / antlr4GenVisitor := true, + Antlr4 / antlr4TreatWarningsAsErrors := true, + Compile / managedSourceDirectories += { + (Antlr4 / sourceManaged).value / "main" / "antlr4" + }, libraryDependencies ++= Seq( "org.graalvm.truffle" % "truffle-api" % graalVersion % "provided", "org.netbeans.api" % "org-openide-util-lookup" % netbeansApiVersion % "provided", "com.univocity" % "univocity-parsers" % "2.9.1", "org.apache.poi" % "poi-ooxml" % "5.2.2", - "org.apache.xmlbeans" % "xmlbeans" % "5.1.0" + "org.apache.xmlbeans" % "xmlbeans" % "5.1.0", + "org.antlr" % "antlr4-runtime" % "4.10.1" ), Compile / packageBin := Def.task { val result = (Compile / packageBin).value diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso index 48c1935f501..c7381378e62 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso @@ -8,7 +8,7 @@ import project.Data.SQL_Statement.SQL_Statement import project.Data.SQL_Type.SQL_Type import project.Data.Table as Database_Table import project.Internal.IR.Context.Context -import project.Internal.IR.Expression.Expression +import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.IR.Query.Query from project.Internal.Result_Set import read_column, result_set_to_table @@ -182,7 +182,7 @@ type Connection db_table = if create_table.is_error then create_table else self.query (SQL_Query.Table_Name name) if db_table.is_error.not then - pairs = db_table.internal_columns.map col->[col.name, Expression.Constant col.sql_type Nothing] + pairs = db_table.internal_columns.map col->[col.name, SQL_Expression.Constant col.sql_type Nothing] insert_query = self.dialect.generate_sql <| Query.Insert name pairs insert_template = insert_query.prepare.first self.jdbc_connection.load_table insert_template db_table table batch_size diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index a5013664aed..f30a28c0837 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -9,7 +9,7 @@ import project.Data.SQL_Type.SQL_Type import project.Data.Table.Integrity_Error import project.Internal.Helpers import project.Internal.IR.Context.Context -import project.Internal.IR.Expression.Expression +import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.IR.Internal_Column.Internal_Column import project.Internal.IR.Query.Query @@ -36,7 +36,7 @@ type Column which they come. Combined expressions must come from the same context - they must both have the same filtering, grouping etc. rules applied to be able to be combined. - Value name:Text connection:Connection sql_type:SQL_Type expression:Expression context:Context + Value name:Text connection:Connection sql_type:SQL_Type expression:SQL_Expression context:Context ## UNSTABLE @@ -75,7 +75,7 @@ type Column ## UNSTABLE - Returns a materialized dataframe containing rows of this table. + Returns a materialized column containing rows of this column. Arguments: - max_rows: specifies a maximum amount of rows to fetch; if not set, all @@ -135,12 +135,13 @@ type Column Error.throw <| Unsupported_Database_Operation_Error_Data "Cannot use columns coming from different contexts in one expression without a join." constant -> actual_operand_type = operand_type.if_nothing self.sql_type - Expression.Constant actual_operand_type constant + SQL_Expression.Constant actual_operand_type constant + actual_operand_types = operand_types.if_nothing (Vector.fill operands.length Nothing) expressions = operands.zip actual_operand_types prepare_operand actual_new_type = new_type.if_nothing self.sql_type - new_expr = Expression.Operation op_kind ([self.expression] + expressions) + new_expr = SQL_Expression.Operation op_kind ([self.expression] + expressions) Column.Value self.name self.connection actual_new_type new_expr self.context ## PRIVATE @@ -195,41 +196,6 @@ type Column join self other on=Nothing drop_unmatched=False left_suffix='_left' right_suffix='_right' = self.to_table.join other on drop_unmatched left_suffix right_suffix - ## UNSTABLE - - Sums the values in this column. - sum : Any - sum self = self.compute_aggregate "SUM" - - ## UNSTABLE - - Computes the maximum element of this column. - max : Any - max self = self.compute_aggregate "MAX" - - ## UNSTABLE - - Computes the minimum element of this column. - min : Any - min self = self.compute_aggregate "MIN" - - ## UNSTABLE - - Computes the mean of non-missing elements of this column. - mean : Any - mean self = self.compute_aggregate "AVG" - - ## PRIVATE - - Computes an aggregate operator. - - Arguments: - - op_name: The name of the operator to compute. - compute_aggregate : Text - compute_aggregate self op_name = - agg = make_aggregate self op_name - agg.to_vector . at 0 - ## UNSTABLE Returns the length of this column. @@ -355,7 +321,12 @@ type Column of `self`. If `other` is a column, the operation is performed pairwise between corresponding elements of `self` and `other`. + : Column | Any -> Column - + self other = self.make_binary_op "+" other + + self other = + ## TODO: Revisit this as part of the column value type work. + op = case other of + _ : Column -> if self.sql_type.is_definitely_numeric || other.sql_type.is_definitely_numeric then 'ADD_NUMBER' else 'ADD_TEXT' + _ -> if self.sql_type.is_definitely_numeric then 'ADD_NUMBER' else 'ADD_TEXT' + self.make_binary_op op other ## UNSTABLE @@ -422,6 +393,34 @@ type Column % : Column | Any -> Column % self other = self.make_binary_op "%" other + ## ALIAS Power + + Element-wise raising to the power. + + Arguments: + - other: The exponent to raise `self` by. If `other` is a column, the + power operation is performed pairwise between corresponding elements + of `self` and `other`. + + Returns a column containing the result of raising each element of `self` + by `other`. + + > Example + Squares the elements of one column. + + import Standard.Examples + + example_div = Examples.decimal_column ^ 2 + + > Example + Raises each value in a column by the value in another column. + + import Standard.Examples + + example_div = Examples.decimal_column ^ Examples.integer_column + ^ : Column | Any -> Column + ^ self other = self.make_binary_op '^' other + ## UNSTABLE Element-wise boolean conjunction. @@ -456,12 +455,14 @@ type Column not : Column not self = self.make_unary_op "NOT" - ## UNSTABLE + ## ALIAS IF + Replaces `True` values with `when_true` and `False` with `when_false`. Only meant for use with boolean columns. - TODO: Currently `when_true` and `when_false` need to be a single value. - In the future the API will also support row-based IIF if they are columns. + Arguments: + - when_true: value or column when `self` is `True`. + - when_false: value or column when `self` is `False`. iif : Any -> Any -> Column iif self when_true when_false = ## TODO we should adjust new_type based on types when_true and @@ -473,17 +474,68 @@ type Column when_false being either columns or regular values and rely on a mapping of Enso base types to SQL types, and a rule for extracting a common type. - approximate_type x = case x of - _ : Integer -> SQL_Type.integer - _ : Decimal -> SQL_Type.real - _ : Text -> SQL_Type.text - _ : Boolean -> SQL_Type.boolean - _ -> Error.throw (Illegal_Argument_Error_Data "Unsupported type.") - left_type = approximate_type when_true - right_type = approximate_type when_false + left_type = get_approximate_type when_true self.sql_type + right_type = get_approximate_type when_false self.sql_type if left_type != right_type then Error.throw (Illegal_Argument_Error_Data "when_true and when_false types do not match") else self.make_op "IIF" [when_true, when_false] new_type=left_type + ## Returns a column of first non-`Nothing` value on each row of `self` and + `values` list. + + Arguments: + - values: list of columns or values to coalesce with `self`. + + > Example + Get the first non-`Nothing` value in two columns. + + import Standard.Examples + + example_coalesce = Examples.decimal_column.coalesce Examples.integer_column + coalesce : (Any | Vector Any) -> Column + coalesce self values = case values of + _ : Vector.Vector -> + if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument_Error_Data "self and values types do not all match") else + self.make_op "COALESCE" values new_type=self.sql_type + _ : Array -> self.coalesce (Vector.from_polyglot_array values) + _ -> self.coalesce [values] + + ## Returns a column of minimum on each row of `self` and `values` list. + + Arguments: + - values: list of columns or values to minimum with `self`. + + > Example + Get the minimum value in two columns. + + import Standard.Examples + + example_min = Examples.decimal_column.min Examples.integer_column + min : (Any | Vector Any) -> Column + min self values = case values of + _ : Vector.Vector -> + if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument_Error_Data "self and values types do not all match") else + self.make_op "ROW_MIN" values new_type=self.sql_type + _ : Array -> self.min (Vector.from_polyglot_array values) + _ -> self.min [values] + + ## Returns a column of maximum on each row of `self` and `values` list. + + Arguments: + - values: list of columns or values to maximum with `self`. + + > Example + Get the maximum value in two columns. + + import Standard.Examples + + example_max = Examples.decimal_column.max Examples.integer_column + max : (Any | Vector Any) -> Column + max self values = case values of + _ : Vector.Vector -> + if values.any (v->(self.sql_type != get_approximate_type v self.sql_type)) then Error.throw (Illegal_Argument_Error_Data "self and values types do not all match") else + self.make_op "ROW_MAX" values new_type=self.sql_type + _ : Array -> self.max (Vector.from_polyglot_array values) + _ -> self.max [values] ## UNSTABLE @@ -504,6 +556,18 @@ type Column is_empty : Column is_empty self = self.make_unary_op "IS_EMPTY" new_type=SQL_Type.boolean + ## Returns a column of booleans, with `True` items at the positions where + this column does not contain a `Nothing`. + + > Example + Check a column for present values. + + import Standard.Examples + + example_is_present = Examples.decimal_column.is_present + is_present : Column + is_present self = self.is_missing.not + ## PRIVATE Returns a column of booleans with `True` at the positions where this column contains a blank value. @@ -667,6 +731,7 @@ type Column example_contains = Examples.text_column_1.is_in [1, 2, 5] is_in : Column | Vector -> Column is_in self vector = case vector of + _ : Array -> self.is_in (Vector.from_polyglot_array vector) _ : Vector.Vector -> ## This is slightly hacky - we don't provide operand types as we want to allow any type to get through and currently we do not have a mapping @@ -695,18 +760,17 @@ type Column column : Column -> if Helpers.check_connection self column . not then (Error.throw (Integrity_Error.Error "Column "+column.name)) else ## We slightly abuse the expression syntax putting a Query as one of the sub-expressions. Once type-checking is added, we may need to - amend the signature of `Expression.Operation` to account for + amend the signature of `SQL_Expression.Operation` to account for this. Also, unfortunately as `NULL IN (...)` is `NULL` in SQL, we need to do separate handling of nulls - we check if the target column has any nulls and if so, we will do `IS NULL` checks for our columns too. That is because, we want the containment check for `NULL` to work the same way as for any other value. in_subquery = Query.Select [Pair_Data column.name column.expression] column.context - has_nulls_expression = Expression.Operation "BOOL_OR" [column.is_missing.expression] + has_nulls_expression = SQL_Expression.Operation "BOOL_OR" [column.is_missing.expression] has_nulls_subquery = Query.Select [Pair_Data "has_nulls" has_nulls_expression] column.context - new_type = SQL_Type.boolean - new_expr = Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery, has_nulls_subquery] - Column.Value self.name self.connection new_type new_expr self.context + new_expr = SQL_Expression.Operation "IS_IN_COLUMN" [self.expression, in_subquery, has_nulls_subquery] + Column.Value self.name self.connection SQL_Type.boolean new_expr self.context ## PRIVATE as_internal : Internal_Column @@ -716,48 +780,14 @@ type Column to_text : Text to_text self = "(Database Column "+self.name.to_text+")" -## PRIVATE - - A helper method for creating an aggregated column by applying some - operation. - - Arguments: - - column: The column to aggregate. - - operation: The name of the aggregation operation. - - name_suffix: The suffix to apply to the name of the aggregate column. - - new_type: The SQL type of the result column. -make_aggregate : Column -> Text -> Text -> SQL_Type -> Column -make_aggregate column operation name_suffix="_agg" new_type=Nothing = - actual_new_type = new_type.if_nothing column.sql_type - expr = Expression.Operation operation [column.expression] - case Helpers.ensure_name_is_sane name_suffix of - True -> - new_name = column.name + name_suffix - lift_aggregate new_name column.connection actual_new_type expr column.context + ## PRIVATE + Helper for the expression to tell it which functions needs a Vector. + var_args_functions : Array + var_args_functions = ['is_in', 'coalesce', 'min', 'max'] ## PRIVATE - - A helper function that lifts an aggregate query into a subquery to ensure - correctness of further processing. - - Argument: - - new_name: The new name for the aggregate column. - - connection: The connection with which the aggregate is associated. - - expected_type: The expected SQL type of the column. - - expr: The expression for the query. - - context: The context in which the query exists. -lift_aggregate : Text -> Connection -> SQL_Type -> Expression -> Context -> Column -lift_aggregate new_name connection expected_type expr context = - # TODO [RW] This is a simple workaround for #1643 - we always wrap the - # aggregate into a subquery, thus making it safe to use it everywhere. A - # more complex solution may be adopted at some point. - ixes = freshen_columns [new_name] context.meta_index - col = Internal_Column.Value new_name expected_type expr - setup = context.as_subquery new_name+"_sub" [[col], ixes] - subquery = setup.first - cols = setup.second - new_col = cols.first.first - new_ixes = cols.second - new_ctx = Context.for_subquery subquery . set_index new_ixes - Column.Value new_name connection new_col.sql_type new_col.expression new_ctx - + TODO: Revisit this as part of the column value type work. +get_approximate_type value default = case value of + _ : Column -> value.sql_type + Nothing -> default + _ -> SQL_Type.approximate_type value \ No newline at end of file diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Statement.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Statement.enso index ad3e6a67bfb..eee8e7bf875 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Statement.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Statement.enso @@ -43,8 +43,12 @@ type SQL_Statement strings = self.internal_fragments . map <| case _ of SQL_Fragment.Code_Part code -> code # TODO at some point we may try more sophisticated serialization based on data type + # TODO #183734954: date and time formatting is limited and will lose sub-second precision and timezone offset. SQL_Fragment.Interpolation _ obj -> case obj of Number -> obj.to_text + Date_Time.Date_Time -> "'" + (obj.format "yyyy-MM-dd HH:mm:ss") + "'" + Date.Date -> "'" + (obj.format "yyyy-MM-dd") + "'" + Time_Of_Day.Time_Of_Day -> "'" + (obj.format "HH:mm:ss") + "'" _ -> "'" + obj.to_text.replace "'" "''" + "'" strings.join "" diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso index 55b97825496..c7a27bfc8aa 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Type.enso @@ -49,12 +49,12 @@ type SQL_Type numeric : SQL_Type numeric = SQL_Type.Value Types.NUMERIC "NUMERIC" - ## The SQL type representing one of the suppported textual types. + ## The SQL type representing one of the supported textual types. varchar : SQL_Type varchar = SQL_Type.Value Types.VARCHAR "VARCHAR" ## UNSTABLE - The SQL type representing one of the suppported textual types. + The SQL type representing one of the supported textual types. It seems that JDBC treats the `TEXT` and `VARCHAR` types as interchangeable. text : SQL_Type @@ -64,6 +64,40 @@ type SQL_Type blob : SQL_Type blob = SQL_Type.Value Types.BLOB "BLOB" + ## The SQL type representing a date type. + date : SQL_Type + date = SQL_Type.Value Types.DATE "DATE" + + ## The SQL type representing a time type. + time : SQL_Type + time = SQL_Type.Value Types.TIME "TIME" + + ## The SQL type representing a time type. + date_time : SQL_Type + date_time = SQL_Type.Value Types.TIMESTAMP_WITH_TIMEZONE "TIMESTAMP" + + ## ADVANCED + Given an Enso value gets the approximate SQL type. + approximate_type : Any -> SQL_Type ! Illegal_Argument_Error_Data + approximate_type value = case value of + _ : Boolean -> SQL_Type.boolean + _ : Integer -> SQL_Type.integer + _ : Decimal -> SQL_Type.double + _ : Text -> SQL_Type.varchar + _ : Date.Date -> SQL_Type.date + _ : Time_Of_Day.Time_Of_Day -> SQL_Type.time_of_day + _ : Date_Time.Date_Time -> SQL_Type.date_time + _ -> Error.throw (Illegal_Argument_Error_Data "Unsupported type.") + + ## PRIVATE + + Returns True if this type represents an integer or a double. + + It only handles the standard types so it may return false negatives for + non-standard ones. + is_definitely_numeric : Boolean + is_definitely_numeric self = self.is_definitely_double || self.is_definitely_integer + ## PRIVATE Returns True if this type represents an integer. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index ec9ba718788..c53fac4886f 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -2,6 +2,8 @@ from Standard.Base import all from Standard.Base.Error.Problem_Behavior import Report_Warning from Standard.Table import Auto_Detect, Aggregate_Column, Data_Formatter, Column_Name_Mapping, Column_Selector, Sort_Column_Selector, Sort_Column, Match_Columns, Position +import Standard.Table.Data.Expression.Expression +import Standard.Table.Data.Expression.Expression_Error import Standard.Table.Data.Table.Table as Materialized_Table import Standard.Table.Internal.Java_Exports import Standard.Table.Internal.Table_Helpers @@ -18,7 +20,7 @@ import project.Data.SQL_Type.SQL_Type import project.Internal.Helpers import project.Internal.Aggregate_Helper import project.Internal.IR.Context.Context -import project.Internal.IR.Expression.Expression +import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.IR.From_Spec.From_Spec import project.Internal.IR.Internal_Column.Internal_Column import project.Internal.IR.Join_Kind.Join_Kind @@ -27,6 +29,7 @@ import project.Internal.IR.Query.Query from Standard.Database.Errors import Unsupported_Database_Operation_Error_Data polyglot java import java.sql.JDBCType +polyglot java import java.util.UUID ## Represents a column-oriented table data structure backed by a database. type Table @@ -396,13 +399,20 @@ type Table on_problems.handle_errors fallback=self.with_no_rows <| mask (make_filter_column column filter) _ : Function -> Error.throw (Unsupported_Database_Operation_Error_Data "Filtering with a custom predicate is not supported in the database.") + _ : Text -> + table_at = self.at column + if table_at.is_error.not then self.filter table_at filter on_problems else + expression = self.evaluate column + if expression.is_error.not then self.filter expression filter on_problems else + pick_error = expression.catch Expression_Error.Syntax_Error (_->table_at) + on_problems.handle_errors pick_error fallback=self _ -> case on_problems.handle_errors (self.at column) fallback=Nothing of Nothing -> self resolved_column -> self.filter resolved_column filter on_problems ## PRIVATE with_no_rows self = - false_expression = Expression.Operation "=" [Expression.Constant SQL_Type.integer 1, Expression.Constant SQL_Type.integer 2] + false_expression = SQL_Expression.Operation "=" [SQL_Expression.Constant SQL_Type.integer 1, SQL_Expression.Constant SQL_Type.integer 2] new_filters = self.context.where_filters + [false_expression] new_ctx = self.context.set_where_filters new_filters self.updated_context new_ctx @@ -477,21 +487,35 @@ type Table If a column with the given name already exists, it will be replaced. Otherwise a new column is added. - set : Text -> Column -> Table - set self name column = case Helpers.ensure_name_is_sane name of - True -> - is_used_in_index = self.context.meta_index.exists i-> i.name == name - case is_used_in_index of - True -> Error.throw <| Illegal_State_Error_Data "Cannot override column "+name+", because it is used as an index. Remove the index or use a different name." - False -> - new_col = Internal_Column.Value name column.sql_type column.expression - replace = self.internal_columns.exists (c -> c.name == name) - case replace of - True -> - new_cols = self.internal_columns.map (c -> if c.name == name then new_col else c) - self.updated_columns new_cols - False -> - self.updated_columns (self.internal_columns + [new_col]) + set : Text -> Column | Text -> Problem_Behavior -> Table + set self name column on_problems=Report_Warning = on_problems.handle_errors fallback=self <| + case Helpers.ensure_name_is_sane name of + True -> + is_used_in_index = self.context.meta_index.exists i-> i.name == name + case is_used_in_index of + True -> Error.throw <| Illegal_State_Error_Data "Cannot override column "+name+", because it is used as an index. Remove the index or use a different name." + False -> + resolved = case column of + _ : Text -> self.evaluate column + _ -> column + new_col = Internal_Column.Value name resolved.sql_type resolved.expression + replace = self.internal_columns.exists (c -> c.name == name) + case replace of + True -> + new_cols = self.internal_columns.map (c -> if c.name == name then new_col else c) + self.updated_columns new_cols + False -> + self.updated_columns (self.internal_columns + [new_col]) + + ## PRIVATE + evaluate : Text -> Column + evaluate self expression = + get_column name = self.at name + make_constant value = + new_type = SQL_Type.approximate_type value + other = SQL_Expression.Constant new_type value + Column.Value ("Constant_" + UUID.randomUUID.to_text) self.connection new_type other self.context + Expression.evaluate expression get_column make_constant "Standard.Database.Data.Column" "Column" Column.var_args_functions ## UNSTABLE @@ -732,7 +756,7 @@ type Table new_columns = left_renamed_columns + right_renamed_columns on_exprs = left_new_join_index.zip right_new_join_index l-> r-> - Expression.Operation "=" [l.expression, r.expression] + SQL_Expression.Operation "=" [l.expression, r.expression] new_from = From_Spec.Join kind left_subquery right_subquery on_exprs new_limit = Nothing new_ctx = Context.Value new_from [] [] [] new_index new_limit @@ -814,7 +838,7 @@ type Table ## Returns the amount of rows in this table. row_count : Integer row_count self = if self.internal_columns.is_empty then 0 else - expr = Expression.Operation "COUNT_ROWS" [] + expr = SQL_Expression.Operation "COUNT_ROWS" [] column_name = "row_count" ## We need to keep some column in the subquery which will determine if the query is performing regular selection or aggregation. To avoid @@ -889,7 +913,7 @@ type Table setup = self.context.as_subquery self.name [self.internal_columns] new_ctx = Context.for_subquery setup.first new_columns = setup.second.first.map column-> - [column.name, Expression.Operation "COUNT" [column.expression]] + [column.name, SQL_Expression.Operation "COUNT" [column.expression]] query = Query.Select new_columns new_ctx self.connection.dialect.generate_sql query count_table = self.connection.read_statement count_query @@ -971,7 +995,7 @@ type Table _ -> Error.throw <| Illegal_State_Error_Data "Inserting can only be performed on tables as returned by `query`, any further processing is not allowed." # TODO [RW] before removing the PRIVATE tag, add a check that no bad stuff was done to the table as described above pairs = self.internal_columns.zip values col-> value-> - [col.name, Expression.Constant col.sql_type value] + [col.name, SQL_Expression.Constant col.sql_type value] query = self.connection.dialect.generate_sql <| Query.Insert table_name pairs affected_rows = self.connection.execute_update query case affected_rows == 1 of @@ -1071,7 +1095,7 @@ type Integrity_Error # make_table : Connection -> Text -> Vector [Text, SQL_Type] -> Context -> Table make_table : Connection -> Text -> Vector -> Context -> Table make_table connection table_name columns ctx = - cols = columns.map (p -> Internal_Column.Value p.first p.second (Expression.Column table_name p.first)) + cols = columns.map (p -> Internal_Column.Value p.first p.second (SQL_Expression.Column table_name p.first)) Table.Value table_name connection cols ctx ## PRIVATE diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso index 0adfa75645d..f0d82f24de5 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso @@ -3,7 +3,7 @@ from Standard.Base import all hiding First, Last from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all import project.Data.SQL_Type.SQL_Type -import project.Internal.IR.Expression.Expression +import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.IR.Internal_Column.Internal_Column from project.Errors import Unsupported_Database_Operation_Error_Data @@ -23,48 +23,48 @@ make_aggregate_column table aggregate new_name = ## PRIVATE Creates an Internal Representation of the expression that computes a requested statistic. -make_expression : Aggregate_Column -> Dialect -> Expression +make_expression : Aggregate_Column -> Dialect -> SQL_Expression make_expression aggregate dialect = is_non_empty_selector v = if v.is_nothing then False else v.columns.not_empty case aggregate of Group_By c _ -> c.expression - Count _ -> Expression.Operation "COUNT_ROWS" [] + Count _ -> SQL_Expression.Operation "COUNT_ROWS" [] Count_Distinct columns _ ignore_nothing -> if columns.is_empty then Error.throw (Illegal_Argument_Error_Data "Count_Distinct must have at least one column.") else case ignore_nothing of - True -> Expression.Operation "COUNT_DISTINCT" (columns.map .expression) - False -> Expression.Operation "COUNT_DISTINCT_INCLUDE_NULL" (columns.map .expression) - Count_Not_Nothing c _ -> Expression.Operation "COUNT" [c.expression] - Count_Nothing c _ -> Expression.Operation "COUNT_IS_NULL" [c.expression] - Count_Not_Empty c _ -> Expression.Operation "COUNT_NOT_EMPTY" [c.expression] - Count_Empty c _ -> Expression.Operation "COUNT_EMPTY" [c.expression] - Percentile p c _ -> Expression.Operation "PERCENTILE" [Expression.Constant SQL_Type.double p, c.expression] - Mode c _ -> Expression.Operation "MODE" [c.expression] + True -> SQL_Expression.Operation "COUNT_DISTINCT" (columns.map .expression) + False -> SQL_Expression.Operation "COUNT_DISTINCT_INCLUDE_NULL" (columns.map .expression) + Count_Not_Nothing c _ -> SQL_Expression.Operation "COUNT" [c.expression] + Count_Nothing c _ -> SQL_Expression.Operation "COUNT_IS_NULL" [c.expression] + Count_Not_Empty c _ -> SQL_Expression.Operation "COUNT_NOT_EMPTY" [c.expression] + Count_Empty c _ -> SQL_Expression.Operation "COUNT_EMPTY" [c.expression] + Percentile p c _ -> SQL_Expression.Operation "PERCENTILE" [SQL_Expression.Constant SQL_Type.double p, c.expression] + Mode c _ -> SQL_Expression.Operation "MODE" [c.expression] First c _ ignore_nothing order_by -> case is_non_empty_selector order_by of False -> Error.throw (Unsupported_Database_Operation_Error_Data "`First` aggregation requires at least one `order_by` column.") True -> order_bys = order_by.columns.map c-> dialect.prepare_order_descriptor c.column.as_internal c.direction Text_Ordering.Default case ignore_nothing of - False -> Expression.Operation "FIRST" [c.expression]+order_bys - True -> Expression.Operation "FIRST_NOT_NULL" [c.expression]+order_bys + False -> SQL_Expression.Operation "FIRST" [c.expression]+order_bys + True -> SQL_Expression.Operation "FIRST_NOT_NULL" [c.expression]+order_bys Last c _ ignore_nothing order_by -> case is_non_empty_selector order_by of False -> Error.throw (Unsupported_Database_Operation_Error_Data "`Last` aggregation requires at least one `order_by` column.") True -> order_bys = order_by.columns.map c-> dialect.prepare_order_descriptor c.column.as_internal c.direction Text_Ordering.Default case ignore_nothing of - False -> Expression.Operation "LAST" [c.expression]+order_bys - True -> Expression.Operation "LAST_NOT_NULL" [c.expression]+order_bys - Maximum c _ -> Expression.Operation "MAX" [c.expression] - Minimum c _ -> Expression.Operation "MIN" [c.expression] - Shortest c _ -> Expression.Operation "SHORTEST" [c.expression] - Longest c _ -> Expression.Operation "LONGEST" [c.expression] + False -> SQL_Expression.Operation "LAST" [c.expression]+order_bys + True -> SQL_Expression.Operation "LAST_NOT_NULL" [c.expression]+order_bys + Maximum c _ -> SQL_Expression.Operation "MAX" [c.expression] + Minimum c _ -> SQL_Expression.Operation "MIN" [c.expression] + Shortest c _ -> SQL_Expression.Operation "SHORTEST" [c.expression] + Longest c _ -> SQL_Expression.Operation "LONGEST" [c.expression] Standard_Deviation c _ population -> case population of - True -> Expression.Operation "STDDEV_POP" [c.expression] - False -> Expression.Operation "STDDEV_SAMP" [c.expression] + True -> SQL_Expression.Operation "STDDEV_POP" [c.expression] + False -> SQL_Expression.Operation "STDDEV_SAMP" [c.expression] Concatenate c _ separator prefix suffix quote_char -> - base_args = [c.expression, Expression.Constant SQL_Type.text separator, Expression.Constant SQL_Type.text prefix, Expression.Constant SQL_Type.text suffix] + base_args = [c.expression, SQL_Expression.Constant SQL_Type.text separator, SQL_Expression.Constant SQL_Type.text prefix, SQL_Expression.Constant SQL_Type.text suffix] case quote_char.is_empty of - True -> Expression.Operation "CONCAT" base_args - False -> Expression.Operation "CONCAT_QUOTE_IF_NEEDED" base_args+[Expression.Constant SQL_Type.text quote_char] - Sum c _ -> Expression.Operation "SUM" [c.expression] - Average c _ -> Expression.Operation "AVG" [c.expression] - Median c _ -> Expression.Operation "MEDIAN" [c.expression] + True -> SQL_Expression.Operation "CONCAT" base_args + False -> SQL_Expression.Operation "CONCAT_QUOTE_IF_NEEDED" base_args+[SQL_Expression.Constant SQL_Type.text quote_char] + Sum c _ -> SQL_Expression.Operation "SUM" [c.expression] + Average c _ -> SQL_Expression.Operation "AVG" [c.expression] + Median c _ -> SQL_Expression.Operation "MEDIAN" [c.expression] diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso index 864cf2bea82..486b200e2f0 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso @@ -3,7 +3,7 @@ from Standard.Base import all import project.Data.SQL import project.Data.SQL.Builder import project.Internal.IR.Context.Context -import project.Internal.IR.Expression.Expression +import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.IR.From_Spec.From_Spec import project.Internal.IR.Join_Kind.Join_Kind import project.Internal.IR.Order_Descriptor.Order_Descriptor @@ -169,15 +169,16 @@ base_dialect = unary = name -> [name, make_unary_op name] fun = name -> [name, make_function name] - arith = [bin "+", bin "-", bin "*", bin "/", bin "%"] + arith = [["ADD_NUMBER", make_binary_op "+"], ["ADD_TEXT", make_binary_op "||"], bin "-", bin "*", bin "/", bin "%", ["^", make_function "POWER"]] logic = [bin "AND", bin "OR", unary "NOT", ["IIF", make_iif]] compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]] + functions = [["COALESCE", make_function "COALESCE"], ["ROW_MIN", make_function "MIN"], ["ROW_MAX", make_function "MAX"]] agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"] counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]] text = [is_empty, bin "LIKE"] nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]] contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]] - base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls + contains) + base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains) Internal_Dialect.Value base_map wrap_in_quotes ## PRIVATE @@ -242,12 +243,12 @@ make_is_in_column arguments = case arguments.length of Arguments: - dialect: The SQL dialect in which the expression is being generated. - expr: The expression to generate SQL code for. -generate_expression : Internal_Dialect -> Expression | Order_Descriptor | Query -> Builder +generate_expression : Internal_Dialect -> SQL_Expression | Order_Descriptor | Query -> Builder generate_expression dialect expr = case expr of - Expression.Column origin name -> + SQL_Expression.Column origin name -> dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name - Expression.Constant sql_type value -> SQL.interpolation sql_type value - Expression.Operation kind arguments -> + SQL_Expression.Constant sql_type value -> SQL.interpolation sql_type value + SQL_Expression.Operation kind arguments -> op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error_Data kind) parsed_args = arguments.map (generate_expression dialect) op parsed_args diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Context.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Context.enso index a4006d48a58..f3b002986c0 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Context.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Context.enso @@ -1,6 +1,6 @@ from Standard.Base import all -import project.Internal.IR.Expression.Expression +import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.IR.From_Spec.From_Spec import project.Internal.IR.Internal_Column.Internal_Column import project.Internal.IR.Order_Descriptor.Order_Descriptor @@ -31,7 +31,7 @@ type Context - meta_index: a list of internal columns to use for joining or grouping. - limit: an optional maximum number of elements that the equery should return. - Value (from_spec : From_Spec) (where_filters : Vector Expression) (orders : Vector Order_Descriptor) (groups : Vector Expression) (meta_index : Vector Internal_Column) (limit : Nothing | Integer) + Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (meta_index : Vector Internal_Column) (limit : Nothing | Integer) ## PRIVATE @@ -82,7 +82,7 @@ type Context Arguments: - new_filters: The new filters to set in the query. - set_where_filters : Vector Expression -> Context + set_where_filters : Vector SQL_Expression -> Context set_where_filters self new_filters = Context.Value self.from_spec new_filters self.orders self.groups self.meta_index self.limit @@ -119,7 +119,7 @@ type Context Arguments: - new_groups: The new grouping clauses to set in the query. - set_groups : Vector Expression -> Context + set_groups : Vector SQL_Expression -> Context set_groups self new_groups = Context.Value self.from_spec self.where_filters self.orders new_groups self.meta_index self.limit @@ -152,7 +152,7 @@ type Context as_subquery self alias column_lists = rewrite_internal_column : Internal_Column -> Internal_Column rewrite_internal_column column = - Internal_Column.Value column.name column.sql_type (Expression.Column alias column.name) + Internal_Column.Value column.name column.sql_type (SQL_Expression.Column alias column.name) new_columns = column_lists.map columns-> columns.map rewrite_internal_column diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/From_Spec.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/From_Spec.enso index 40ba84fb8f1..0825ec42542 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/From_Spec.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/From_Spec.enso @@ -1,7 +1,7 @@ from Standard.Base import all import project.Internal.IR.Context.Context -import project.Internal.IR.Expression.Expression +import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.IR.Join_Kind.Join_Kind ## PRIVATE @@ -44,7 +44,7 @@ type From_Spec - on: a list of expressions that will be used as join conditions, these are usually be equalities between expressions from the left and right sources. - Join (kind : Join_Kind) (left_spec : From_Spec) (right_spec : From_Spec) (on : Vector Expression) + Join (kind : Join_Kind) (left_spec : From_Spec) (right_spec : From_Spec) (on : Vector SQL_Expression) ## PRIVATE @@ -57,4 +57,4 @@ type From_Spec - context: the context for the sub-query. - alias: the name upon which the results of this sub-query can be referred to in other parts of the query. - Sub_Query (columns : Vector (Pair Text Expression)) (context : Context) (alias : Text) + Sub_Query (columns : Vector (Pair Text SQL_Expression)) (context : Context) (alias : Text) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Internal_Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Internal_Column.enso index db734304d81..85253612a27 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Internal_Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Internal_Column.enso @@ -1,7 +1,7 @@ from Standard.Base import all import project.Data.SQL_Type.SQL_Type -import project.Internal.IR.Expression.Expression +import project.Internal.IR.SQL_Expression.SQL_Expression type Internal_Column ## PRIVATE @@ -12,7 +12,7 @@ type Internal_Column - name: The column name. - sql_type: The SQL type of the column. - expression: An expression for applying to the column. - Value name:Text sql_type:SQL_Type expression:Expression + Value name:Text sql_type:SQL_Type expression:SQL_Expression ## PRIVATE diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Order_Descriptor.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Order_Descriptor.enso index e899c311cdd..270acaca89e 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Order_Descriptor.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Order_Descriptor.enso @@ -1,8 +1,8 @@ from Standard.Base import all -import project.Internal.IR.Expression.Expression +import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.IR.Nulls_Order.Nulls_Order ## PRIVATE type Order_Descriptor - Value (expression : Expression) (direction : Sort_Direction) (nulls_order : Nothing | Nulls_Order = Nothing) (collation : Nothing | Text = Nothing) + Value (expression : SQL_Expression) (direction : Sort_Direction) (nulls_order : Nothing | Nulls_Order = Nothing) (collation : Nothing | Text = Nothing) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Query.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Query.enso index 543bc257310..df03c13936c 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Query.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Query.enso @@ -1,7 +1,7 @@ from Standard.Base import all import project.Internal.IR.Context.Context -import project.Internal.IR.Expression.Expression +import project.Internal.IR.SQL_Expression.SQL_Expression ## PRIVATE @@ -17,7 +17,7 @@ type Query is a pair whose first element is the name of the materialized column and the second element is the expression to compute. - context: The query context, see `Context` for more detail. - Select (expressions : Vector (Pair Text Expression)) (context : Context) + Select (expressions : Vector (Pair Text SQL_Expression)) (context : Context) ## PRIVATE diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Expression.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/SQL_Expression.enso similarity index 94% rename from distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Expression.enso rename to distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/SQL_Expression.enso index 4ebf104aa3e..5b328a8e5ef 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Expression.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/SQL_Expression.enso @@ -7,7 +7,7 @@ import project.Data.SQL_Type.SQL_Type The internal representation of an SQL expression which can be a column reference, an interpolated constant or an operation that combines other expressions. -type Expression +type SQL_Expression ## PRIVATE The internal representation of an SQL expression that gets a value from a @@ -43,4 +43,4 @@ type Expression dialect. - expressions: a list of expressions which are arguments to the operation different operations support different amounts of arguments. - Operation (kind : Text) (expressions : Vector Expression) + Operation (kind : Text) (expressions : Vector SQL_Expression) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso index edfe446abc3..8b907b29e75 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso @@ -198,6 +198,9 @@ default_storage_type storage_type = case storage_type of Storage.Integer -> SQL_Type.integer Storage.Decimal -> SQL_Type.double Storage.Boolean -> SQL_Type.boolean + Storage.Date -> SQL_Type.date + Storage.Time_Of_Day -> SQL_Type.time_of_day + Storage.Date_Time -> SQL_Type.date_time ## Support for mixed type columns in Table upload is currently very limited, falling back to treating everything as text. Storage.Any -> SQL_Type.text diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso index 4ad0cb16c01..3a0259ef13b 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso @@ -7,7 +7,7 @@ import project.Data.SQL import project.Data.SQL_Statement.SQL_Statement import project.Data.SQL_Type.SQL_Type import project.Internal.Base_Generator -import project.Internal.IR.Expression.Expression +import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.IR.Internal_Column.Internal_Column import project.Internal.IR.Order_Descriptor.Order_Descriptor import project.Internal.IR.Nulls_Order.Nulls_Order @@ -244,8 +244,8 @@ make_order_descriptor internal_column sort_direction text_ordering = False -> Error.throw (Unsupported_Database_Operation_Error_Data "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.") True -> - upper = Expression.Operation "UPPER" [internal_column.expression] - folded_expression = Expression.Operation "LOWER" [upper] + upper = SQL_Expression.Operation "UPPER" [internal_column.expression] + folded_expression = SQL_Expression.Operation "LOWER" [upper] Order_Descriptor.Value folded_expression sort_direction nulls_order=nulls collation=Nothing False -> Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation=Nothing diff --git a/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/NOTICE b/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/NOTICE index 7d3f410db39..e14dd46a6bd 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/NOTICE +++ b/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/NOTICE @@ -26,6 +26,11 @@ The license file can be found at `licenses/APACHE2.0`. Copyright notices related to this dependency can be found in the directory `commons-io.commons-io-2.11.0`. +'antlr4-runtime', licensed under the The BSD License, is distributed with the Table. +The license file can be found at `licenses/BSD-3-Clause`. +Copyright notices related to this dependency can be found in the directory `org.antlr.antlr4-runtime-4.10.1`. + + 'commons-collections4', licensed under the Apache License, Version 2.0, is distributed with the Table. The license information can be found along with the copyright notices. Copyright notices related to this dependency can be found in the directory `org.apache.commons.commons-collections4-4.4`. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/org.antlr.antlr4-runtime-4.10.1/NOTICES b/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/org.antlr.antlr4-runtime-4.10.1/NOTICES new file mode 100644 index 00000000000..960b2a8926c --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/THIRD-PARTY/org.antlr.antlr4-runtime-4.10.1/NOTICES @@ -0,0 +1 @@ +Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 3ded48eb913..7a6bc96b36d 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -400,6 +400,34 @@ type Column % : Column | Any -> Column % self other = run_vectorized_binary_op self '%' (%) other + ## ALIAS Power Columns + + Element-wise raising to the power. + + Arguments: + - other: The exponent to raise `self` by. If `other` is a column, the + power operation is performed pairwise between corresponding elements + of `self` and `other`. + + Returns a column containing the result of raising each element of `self` + by `other`. + + > Example + Squares the elements of one column. + + import Standard.Examples + + example_div = Examples.decimal_column ^ 2 + + > Example + Raises each value in a column by the value in another column. + + import Standard.Examples + + example_div = Examples.decimal_column ^ Examples.integer_column + ^ : Column | Any -> Column + ^ self other = run_vectorized_binary_op self '^' (^) other + ## ALIAS AND Columns Element-wise boolean conjunction. @@ -460,7 +488,7 @@ type Column || self other = run_vectorized_binary_op self "||" (||) other - ## ALIAS NOT Columns + ## ALIAS NOT Boolean negation of each element in self column. @@ -473,21 +501,91 @@ type Column not : Column not self = run_vectorized_unary_op self "not" .not - ## UNSTABLE + ## ALIAS IF + Replaces `True` values with `when_true` and `False` with `when_false`. Only meant for use with boolean columns. - TODO: Currently `when_true` and `when_false` need to be a single value. - In the future the API will also support row-based IIF if they are columns. + Arguments: + - when_true: value or column when `self` is `True`. + - when_false: value or column when `self` is `False`. + + > Example + If the value in a column is `True`, replace it with `1`, otherwise `0`. + + import Standard.Examples + + example_if = Examples.bool_column_1.iif 1 0 iif : Any -> Any -> Column iif self when_true when_false = case self.storage_type of Storage.Boolean -> s = self.java_column.getStorage ix = self.java_column.getIndex - rs = s.iif when_true when_false + + true_val = case when_true of + _ : Column -> when_true.java_column.getStorage + _ -> when_true + + false_val = case when_false of + _ : Column -> when_false.java_column.getStorage + _ -> when_false + + rs = s.iif true_val false_val Column.Column_Data (Java_Column.new "Result" ix rs) _ -> Error.throw (Illegal_Argument_Error "`iif` can only be used with boolean columns.") + ## Returns a column of first non-`Nothing` value on each row of `self` and + `values` list. + + Arguments: + - values: list of columns or values to coalesce with `self`. + + > Example + Get the first non-`Nothing` value in two columns. + + import Standard.Examples + + example_coalesce = Examples.decimal_column.coalesce Examples.integer_column + coalesce : (Any | Vector Any) -> Column + coalesce self values = + fallback a b = a.if_nothing b + run_vectorized_many_op self "coalesce" fallback values + + ## Returns a column of minimum on each row of `self` and `values` list. + + Arguments: + - values: list of columns or values to minimum with `self`. + + > Example + Get the minimum value in two columns. + + import Standard.Examples + + example_min = Examples.decimal_column.min Examples.integer_column + min : (Any | Vector Any) -> Column + min self values = + fallback a b = if a.is_nothing then b else + if b.is_nothing then a else + if b < a then b else a + run_vectorized_many_op self "minimum" fallback values + + ## Returns a column of maximum on each row of `self` and `values` list. + + Arguments: + - values: list of columns or values to maximum with `self`. + + > Example + Get the maximum value in two columns. + + import Standard.Examples + + example_max = Examples.decimal_column.max Examples.integer_column + max : (Any | Vector Any) -> Column + max self values = + fallback a b = if a.is_nothing then b else + if b.is_nothing then a else + if b > a then b else a + run_vectorized_many_op self "maximum" fallback values ## Returns a column of booleans, with `True` items at the positions where this column contains a `Nothing`. @@ -696,7 +794,11 @@ type Column True -> fallback_fn _ _ = Panic.throw (Illegal_State_Error_Data "Impossible: This is a bug in the Standard.Table library.") - run_vectorized_binary_op self op_name fallback_fn vector skip_nulls=False new_name=result_name + true_vector = case vector of + _ : Array -> Vector.from_polyglot_array vector + _ : Vector.Vector -> vector + column : Column -> column.to_vector + run_vectorized_binary_op self op_name fallback_fn true_vector skip_nulls=False new_name=result_name False -> ## We have custom code for the non-vectorized case, because usually a vectorized binary op will apply the fallback @@ -705,6 +807,7 @@ type Column against the whole other column, instead of just the corresponding row - so we need to go around a bit. true_vector = case vector of + _ : Array -> Vector.from_polyglot_array vector _ : Vector.Vector -> vector ## This does no copying, as `Column.to_vector` just returns a view of the storage. @@ -881,6 +984,17 @@ type Column if storage.isNa index then Nothing else storage.getItem index + ## UNSTABLE + + Returns a column containing rows of this column. + + Arguments: + - max_rows: specifies a maximum amount of rows to fetch; if not set, all + available rows are fetched. + read : (Nothing | Integer) -> Column + read self max_rows=Nothing = + if max_rows.is_nothing then self else self.slice 0 max_rows + ## Returns a vector containing all the elements in this column. > Example @@ -996,98 +1110,6 @@ type Column data = ['data', self.to_vector.take (First max_data)] Json.from_pairs [size, name, data] . to_text - ## ALIAS Sum Columns - - Sums the values in this column. - - > Example - Sum the values in a column. - - import Standard.Examples - - example_sum = Examples.integer_column.sum - sum : Any - sum self = self.java_column.aggregate 'sum' (x-> Vector.from_polyglot_array x . reduce (+)) True - - ## ALIAS Max Columns - - Computes the maximum element of this column. - - > Example - Compute the maximum value of a column. - - import Standard.Examples - - example_max = Examples.integer_column.max - max : Any - max self = - self.java_column.aggregate 'max' (x-> Vector.from_polyglot_array x . reduce Math.max) True - - ## ALIAS Min Columns - - Computes the minimum element of this column. - - > Example - Compute the minimum value of a column. - - import Standard.Examples - - example_min = Examples.integer_column.min - min : Any - min self = - self.java_column.aggregate 'min' (x-> Vector.from_polyglot_array x . reduce Math.min) True - - ## ALIAS Mean Columns - - Computes the mean of non-missing elements of this column. - - > Example - Compute the mean value of a column. - - import Standard.Examples - - example_mean = Examples.integer_column.mean - mean : Any - mean self = - vec_mean v = if v.length == 0 then Nothing else - (Vector.from_polyglot_array v).reduce (+) / v.length - self.java_column.aggregate 'mean' vec_mean True - - ## Computes the variance of the sample represented by this column. - - Arguments: - - degrees_of_freedom_correction: a correction to account for the - missing degrees of freedom in the sample. The default value of `1` - computes a sample variance. Setting it to `0` will compute population - variance instead. - variance self degrees_of_freedom_correction=1 = - mean = self.mean - shifted = self - mean - sq = shifted * shifted - sq.sum / (self.length - degrees_of_freedom_correction) - - ## Computes the standard deviation of the sample represented by this column. - - Arguments: - - degrees_of_freedom_correction: a correction to account for the - missing degrees of freedom in the sample. The default value of `1` - computes a sample standard deviation. Setting it to `0` will compute - population standard deviation instead. - standard_deviation self degrees_of_freedom_correction=1 = - self.variance degrees_of_freedom_correction . sqrt - - ## Computes the coefficient of determination of a given prediction column. - - Arguments: - - predictions: the column predicting the values of this column. - r_squared self predictions = - prediction_diff = self - predictions - ss_res = prediction_diff*prediction_diff . sum - ss_tot_lin = self - self.mean - ss_tot = ss_tot_lin*ss_tot_lin . sum - 1 - ss_res / ss_tot - - ## UNSTABLE Sorts the column according to the specified rules. @@ -1181,22 +1203,6 @@ type Column first : Any ! Empty_Error first self = self.at 0 . catch Index_Out_Of_Bounds_Error_Data (_ -> Error.throw Empty_Error) - ## UNSTABLE - - Returns the first element in the column, if it exists. - - If the column is empty, this method will return a dataflow error - containing an `Empty_Error`. - - > Example - Get the first element of a column. - - import Standard.Examples - - example_head = Examples.integer_column.head - head : Any ! Empty_Error - head self = self.first - ## UNSTABLE Returns the last element in the column, if it exists. @@ -1243,6 +1249,11 @@ type Column duplicate_count : Column duplicate_count self = Column_Data self.java_column.duplicateCount + ## PRIVATE + Helper for the expression to tell it which functions needs a Vector. + var_args_functions : Vector + var_args_functions = ['is_in', 'coalesce', 'min', 'max'] + ## UNSTABLE An error for when the column contains no elements. @@ -1253,6 +1264,33 @@ type Empty_Error to_display_text : Text to_display_text self = "The column is empty." +## PRIVATE + + Folds the vectorized operation over the provided column and values. When more + than one value to is provided, the result is folded with subsequent values. + + Arguments: + - column: The column to execute the operation over. + - name: The name of the vectorized operation. + - fallback_fn: A function used if the vectorized operation isn't available. + - operands: The vector of operands to apply to the function after `column`. + - skip_nulls: Specifies if nulls should be skipped. If set to `True`, a null + value results in null without passing it to the function. If set to + `False`, the null values are passed as any other value and can have custom + handling logic. + - new_name: The name of the column created as the result of this operation. +run_vectorized_many_op : Column -> Text -> (Any -> Any -> Any) -> Vector -> Boolean -> Text -> Column +run_vectorized_many_op column name fallback_fn operands skip_nulls=False new_name=(name + "_" + column.name) = + case operands of + _ : Vector.Vector -> + folded = operands.fold column.java_column.getStorage current-> operand-> + case operand of + _ : Column -> current.zip name fallback_fn operand.java_column.getStorage skip_nulls + _ -> current.bimap name fallback_fn operand skip_nulls + Column.Column_Data (Java_Column.new new_name column.java_column.getIndex folded) + _ : Array -> run_vectorized_many_op column name fallback_fn (Vector.from_polyglot_array operands) skip_nulls new_name + _ -> run_vectorized_many_op column name fallback_fn [operands] skip_nulls new_name + ## PRIVATE Executes a vectorized binary operation over the provided column. @@ -1267,7 +1305,7 @@ type Empty_Error `False`, the null values are passed as any other value and can have custom handling logic. - new_name: The name of the column created as the result of this operation. -run_vectorized_binary_op : Column -> Text -> (Any -> Any) -> Any -> Boolean -> Text -> Column +run_vectorized_binary_op : Column -> Text -> (Any -> Any -> Any) -> Any -> Boolean -> Text -> Column run_vectorized_binary_op column name fallback_fn operand skip_nulls=True new_name="Result" = case operand of Column.Column_Data col2 -> s1 = column.java_column.getStorage diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Expression.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Expression.enso new file mode 100644 index 00000000000..d42d7c7ee89 --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Expression.enso @@ -0,0 +1,49 @@ +from Standard.Base import all + +polyglot java import org.enso.table.expressions.ExpressionVisitorImpl +polyglot java import java.lang.IllegalArgumentException +polyglot java import java.lang.UnsupportedOperationException + +type Expression + ## Evaluates an expression and returns the result + + Arguments: + - expression: the expression to evaluate + - get_column: a function that takes a column name and returns the + associated Column object. + - make_constant`: a function that takes an object and returns a + constant Column object. + - module_name: the name of the Column module that the expression is + being evaluated against. + - type_name: the name of the Column type that the expression is being + evaluated against. + - var_args_functions: a Vector of function names which take a single + Vector argument but which should be exposed with variable parameters. + evaluate : Text -> (Text -> Any) -> (Any -> Any) -> Text -> Text -> Vector Text -> Any + evaluate expression get_column make_constant module_name type_name var_args_functions = + handle_parse_error = Panic.catch_java ExpressionVisitorImpl.SyntaxErrorException handler=(cause-> Error.throw (Expression_Error.Syntax_Error cause.getMessage cause.getLine cause.getColumn)) + handle_unsupported = handle_java_error UnsupportedOperationException Expression_Error.Unsupported_Operation + handle_arguments = handle_java_error IllegalArgumentException Expression_Error.Argument_Mismatch + + handle_parse_error <| handle_unsupported <| handle_arguments <| + ExpressionVisitorImpl.evaluate expression get_column make_constant module_name type_name var_args_functions.to_array + +type Expression_Error + ## The expression supplied could not be parsed due to a syntax error. + Syntax_Error message:Text line:Integer column:Integer + + ## Expression error when a function could not be found on the target type. + Unsupported_Operation name:Text + + ## Expression error when the number of arguments for a function is incorrect. + Argument_Mismatch message:Text + + to_display_text : Text + to_display_text self = case self of + Expression_Error.Syntax_Error _ _ _ -> "Expression.Syntax_Error: " + self.message + " (line " + self.line.to_text + ", column " + self.column.to_text + ")." + Expression_Error.Unsupported_Operation _ -> "Expression.Unsupported: " + self.name + " is not a supported method." + Expression_Error.Argument_Mismatch _ -> "Expression.Argument_Mismatch: " + self.message + +## PRIVATE +handle_java_error java_type enso_constructor = + Panic.catch_java java_type handler=(cause-> Error.throw (enso_constructor cause.getMessage)) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 1cd06f46245..e30813807eb 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -24,6 +24,8 @@ import project.Internal.Table_Helpers import project.Internal.Aggregate_Column_Helper import project.Internal.Parse_Values_Helper import project.Internal.Problem_Builder.Problem_Builder +import project.Data.Expression.Expression +import project.Data.Expression.Expression_Error from project.Data.Column import get_item_string from project.Data.Column_Type_Selection import Column_Type_Selection, Auto @@ -38,6 +40,7 @@ polyglot java import org.enso.table.data.table.Table as Java_Table polyglot java import org.enso.table.data.table.Column as Java_Column polyglot java import org.enso.table.operations.OrderBuilder polyglot java import org.enso.table.data.mask.OrderMask +polyglot java import java.util.UUID ## Represents a column-oriented table data structure. type Table @@ -865,6 +868,13 @@ type Table on_problems.handle_errors fallback=self.with_no_rows <| mask (make_filter_column column filter) _ : Function -> mask (column.map filter) + _ : Text -> + table_at = self.at column + if table_at.is_error.not then self.filter table_at filter on_problems else + expression = self.evaluate column + if expression.is_error.not then self.filter expression filter on_problems else + pick_error = expression.catch Expression_Error.Syntax_Error (_->table_at) + on_problems.handle_errors pick_error fallback=self _ -> case on_problems.handle_errors (self.at column) fallback=Nothing of Nothing -> self resolved_column -> self.filter resolved_column filter on_problems @@ -922,7 +932,10 @@ type Table Arguments: - name: The name of the column to set the value of. - - column: The new value for the column called `name`. + - column: The new value for the column either a `Column` or `Text` of an + expression. + - on_problems: Specifies how to handle if a problem occurs, raising as a + warning by default. If a column with the given name already exists, it will be replaced. Otherwise a new column is added. @@ -937,13 +950,24 @@ type Table table = Examples.inventory_table double_inventory = table.at "total_stock" * 2 table.set "total_stock" double_inventory - set : Text -> Column | Vector.Vector -> Table - set self name column = case column of - _ : Vector.Vector -> - self.set name (Column.from_vector name column) - Column.Column_Data _ -> + table.set "total_stock_expr" "2 * [total_stock]" + set : Text -> Column | Vector.Vector | Text -> Problem_Behavior -> Table + set self name column on_problems=Report_Warning = case column of + _ : Text -> + expression = self.evaluate column + if expression.is_error.not then self.set name expression on_problems else + on_problems.handle_errors expression fallback=self + _ : Vector.Vector -> self.set name (Column.from_vector name column) + _ : Column -> Table.Table_Data (self.java_table.addOrReplaceColumn (column.rename name . java_column)) + ## PRIVATE + evaluate : Text -> Column + evaluate self expression = + get_column name = self.at name + make_constant value = Column.from_vector (UUID.randomUUID.to_text) (Vector.new self.row_count _->value) + Expression.evaluate expression get_column make_constant "Standard.Table.Data.Column" "Column" Column.var_args_functions + ## Returns the vector of columns contained in this table. > Examples diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso index 60433768b31..fb05a306fe4 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso @@ -192,10 +192,13 @@ type Table_Column_Helper resolve_column_helper : (Integer | Text | Column) -> Problem_Builder -> a | Nothing resolve_column_helper self selector problem_builder = case selector of _ : Text -> - matched_columns = Matching.match_criteria_callback Text_Matcher.Case_Sensitive self.internal_columns [selector] reorder=True name_mapper=(_.name) problem_callback=problem_builder.report_missing_input_columns + matched_columns = self.internal_columns.filter column->(column.name==selector) if matched_columns.length == 1 then matched_columns.first else - if matched_columns.length == 0 then Nothing else - Panic.throw (Illegal_State_Error_Data "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?") + if matched_columns.length != 0 then Panic.throw (Illegal_State_Error_Data "A single exact match should never match more than one column. Perhaps the table breaks the invariant of unique column names?") else + expression = (self.table.evaluate selector).catch Any _->Nothing + if expression != Nothing then expression else + problem_builder.report_missing_input_columns [selector] + Nothing _ : Integer -> case is_index_valid self.internal_columns.length selector of True -> self.internal_columns.at selector False -> diff --git a/distribution/lib/Standard/Test/0.0.0-dev/src/Bench.enso b/distribution/lib/Standard/Test/0.0.0-dev/src/Bench.enso index 6ba3d198010..79312f8db84 100644 --- a/distribution/lib/Standard/Test/0.0.0-dev/src/Bench.enso +++ b/distribution/lib/Standard/Test/0.0.0-dev/src/Bench.enso @@ -97,7 +97,7 @@ len_list list = Arguments: - act: The action to perform `count` number of times. -times : Integer-> List Any +times : Integer -> (Integer -> Any) -> List Any times count act = go = results -> number -> if number == 0 then results else @Tail_Call go (Cons (act number) results) number-1 diff --git a/project/plugins.sbt b/project/plugins.sbt index 68f47f9a850..6270b42c4fc 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -5,6 +5,7 @@ addSbtPlugin("org.scala-js" % "sbt-scalajs" % "1.10.1") addSbtPlugin("com.typesafe.sbt" % "sbt-license-report" % "1.2.0") addSbtPlugin("com.lightbend.sbt" % "sbt-java-formatter" % "0.7.0") addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.6") +addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.3") libraryDependencies += "io.circe" %% "circe-yaml" % "0.14.1" libraryDependencies += "commons-io" % "commons-io" % "2.11.0" diff --git a/std-bits/table/src/main/antlr4/Expression.g4 b/std-bits/table/src/main/antlr4/Expression.g4 new file mode 100644 index 00000000000..825996d8845 --- /dev/null +++ b/std-bits/table/src/main/antlr4/Expression.g4 @@ -0,0 +1,126 @@ +grammar Expression; +prog: expr EOF ; + +expr: expr op=POWER expr # Power + | expr op=(MULTIPLY|DIVIDE|MODULO) expr # MultDivMod + | expr op=(ADD|MINUS) expr # AddSub + | expr op=(EQUALS|NOT_EQUALS|LESS_THAN_OR_EQUAL|GREATER_THAN_OR_EQUAL|LESS_THAN|GREATER_THAN) expr # Compare + | expr (IS_NULL|IS_EMPTY|IS_NOT_EMPTY|IS_NOT_NULL) # IsNull + | expr (LIKE|NOT_LIKE) expr # Like + | expr (IN|NOT_IN) '(' expr (',' expr)* ')' # In + | expr (NOT_BETWEEN | BETWEEN) expr AND expr # Between + | UNARY_NOT expr # UnaryNot + | expr op=(AND | '&&') expr # And + | expr op=(OR | '||') expr # Or + | IF expr THEN expr ELSE expr END? # If + | IDENTIFIER '(' (expr (',' expr)*)? ')' # Function // This allows for functions of 0 or more arguments within brackets (e.g. PI(), SIN(1), MOD(3,4) etc.) + | '(' expr ')' # Paren + | COLUMN_NAME # Column + | MINUS expr # UnaryMinus + | value # Literal + ; + +POWER : '^'; +MULTIPLY : '*'; +DIVIDE : '/'; +MODULO : '%'; +ADD : '+'; +MINUS : '-'; +EQUALS : '==' | '='; +NOT_EQUALS : '!=' | '<>'; +LESS_THAN_OR_EQUAL : '<='; +GREATER_THAN_OR_EQUAL : '>='; +LESS_THAN : '<'; +GREATER_THAN : '>'; + +WHITESPACE : [ \t\r\n]+ -> skip; + +fragment A:[aA]; +fragment B:[bB]; +fragment C:[cC]; +fragment D:[dD]; +fragment E:[eE]; +fragment F:[fF]; +fragment G:[gG]; +fragment H:[hH]; +fragment I:[iI]; +fragment J:[jJ]; +fragment K:[kK]; +fragment L:[lL]; +fragment M:[mM]; +fragment N:[nN]; +fragment O:[oO]; +fragment P:[pP]; +fragment Q:[qQ]; +fragment R:[rR]; +fragment S:[sS]; +fragment T:[tT]; +fragment U:[uU]; +fragment V:[vV]; +fragment W:[wW]; +fragment X:[xX]; +fragment Y:[yY]; +fragment Z:[zZ]; +fragment LETTER : [A-Za-z]; +fragment DIGIT : [0-9]; +fragment HEX : [0-9a-fA-F]; +fragment IS : I S; +fragment EMPTY : E M P T Y; + +AND : A N D ; +OR : O R ; +NULL : N U L L; +NOTHING : N O T H I N G; +IS_NULL: IS ' ' (NOTHING | NULL); +IS_NOT_NULL : IS ' ' N O T ' ' (NOTHING | NULL); +IS_EMPTY: IS ' ' EMPTY; +IS_NOT_EMPTY : IS ' ' N O T ' ' EMPTY; +LIKE : L I K E; +NOT_LIKE : N O T ' ' LIKE; +IN : I N; +NOT_IN : N O T ' ' IN; +BETWEEN : B E T W E E N; +NOT_BETWEEN : N O T ' ' BETWEEN; +TRUE : T R U E; +FALSE : F A L S E; +IF : I F; +THEN : T H E N; +ELSE : E L S E; +UNARY_NOT : (N O T) | '!'; +END : E N D IF?; + +IDENTIFIER : LETTER (LETTER|DIGIT|'_')*; + +EXCEL_STRING : '"' ('""'|~'"')* '"'; + +PYTHON_STRING : '\'' (ESC|~['])* '\''; +fragment ESC : '\\' [abtnfrv"'\\] | '\\u' HEX HEX HEX HEX | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX | '\\x' HEX HEX; + +fragment YEAR : DIGIT DIGIT DIGIT DIGIT; +fragment DATE_PART : '-' DIGIT DIGIT; +fragment HOUR : DIGIT DIGIT; +fragment TIME_PART : ':' DIGIT DIGIT; +fragment NANO_PART : '.' DIGIT DIGIT? DIGIT? DIGIT? DIGIT? DIGIT? DIGIT? DIGIT? DIGIT?; +fragment UTCOFFSET : ('Z' | ('+'|'-') HOUR TIME_PART?); +fragment TIMEZONE : '[' (~']')+ ']'; +fragment INTEGER : '0' | [1-9] (DIGIT | '_')* ; +fragment DECIMAL : '.' (DIGIT | '_')+; + +DATE : YEAR DATE_PART DATE_PART ; +TIME : HOUR TIME_PART (TIME_PART NANO_PART?)? ; +DATE_TIME : YEAR DATE_PART DATE_PART ('T' | ' ') HOUR TIME_PART (TIME_PART NANO_PART?)? UTCOFFSET? TIMEZONE? ; + +NUMBER : INTEGER DECIMAL? ; + +value + : (NULL | NOTHING) # nullOrNothing + | (TRUE | FALSE) # boolean + | '#' text=DATE '#' # date + | '#' text=TIME '#' # time + | '#' text=DATE_TIME '#' # datetime + | NUMBER # number + | EXCEL_STRING # excelString + | PYTHON_STRING # pythonString + ; + +COLUMN_NAME : '[' (']]'|~']')* ']'; diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/Aggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/Aggregator.java deleted file mode 100644 index 86d817f27c2..00000000000 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/Aggregator.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.enso.table.data.column.operation.aggregate; - -import org.enso.table.data.column.storage.Storage; - -import java.util.stream.IntStream; - -/** - * Represents a fold-like operation on a storage. An aggregator is usually created for a given - * storage, then {@link #nextGroup(IntStream)} is repeatedly called and the aggregator is - * responsible for collecting the results of such calls. After that, {@link #seal()} is called to - * obtain a storage containing all the results. - */ -public abstract class Aggregator { - /** - * Requests the aggregator to append the result of aggregating the values at the specified - * positions. - * - * @param positions the positions to aggregate in this round. - */ - public abstract void nextGroup(IntStream positions); - - /** - * Returns the results of all previous {@link #nextGroup(IntStream)} calls. - * - * @return the storage containing all aggregation results. - */ - public abstract Storage seal(); -} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/CountAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/CountAggregator.java deleted file mode 100644 index 391dbac5a23..00000000000 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/CountAggregator.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.enso.table.data.column.operation.aggregate; - -import org.enso.table.data.column.storage.LongStorage; -import org.enso.table.data.column.storage.Storage; - -import java.util.stream.IntStream; - -/** Aggregates a storage by counting the non-missing values in each group. */ -public class CountAggregator extends Aggregator { - private final Storage storage; - private final long[] counts; - private int position = 0; - - /** - * @param storage the storage used as data source - * @param resultSize the exact number of times {@link Aggregator#nextGroup(IntStream)} will be - * called. - */ - public CountAggregator(Storage storage, int resultSize) { - this.storage = storage; - this.counts = new long[resultSize]; - } - - @Override - public void nextGroup(IntStream positions) { - counts[position++] = positions.filter(i -> !storage.isNa(i)).count(); - } - - @Override - public Storage seal() { - return new LongStorage(counts); - } -} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java deleted file mode 100644 index 411f0edaa66..00000000000 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.enso.table.data.column.operation.aggregate; - -import org.enso.base.polyglot.Polyglot_Utils; -import org.enso.table.data.column.builder.object.InferredBuilder; -import org.enso.table.data.column.storage.Storage; -import org.graalvm.polyglot.Value; - -import java.util.List; -import java.util.Objects; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import java.util.stream.Stream; - -/** Aggregates the storage using a provided {@link Function}. */ -public class FunctionAggregator extends Aggregator { - private final Function, Value> aggregateFunction; - private final boolean skipNa; - private final Storage storage; - private final InferredBuilder builder; - - /** - * @param aggregateFunction the function used to obtain aggregation of a group - * @param storage the storage serving as data source - * @param skipNa whether missing values should be passed to the function - * @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called - */ - public FunctionAggregator( - Function, Value> aggregateFunction, - Storage storage, - boolean skipNa, - int resultSize) { - this.aggregateFunction = aggregateFunction; - this.storage = storage; - this.skipNa = skipNa; - this.builder = new InferredBuilder(resultSize); - } - - @Override - public void nextGroup(IntStream positions) { - List items = getItems(positions); - Value result = aggregateFunction.apply(items); - Object converted = Polyglot_Utils.convertPolyglotValue(result); - builder.appendNoGrow(converted); - } - - private List getItems(IntStream positions) { - Stream items = positions.mapToObj(storage::getItemBoxed); - if (skipNa) { - items = items.filter(Objects::nonNull); - } - return items.collect(Collectors.toList()); - } - - @Override - public Storage seal() { - return builder.seal(); - } -} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/LongToLongAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/LongToLongAggregator.java deleted file mode 100644 index ea891917474..00000000000 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/LongToLongAggregator.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.enso.table.data.column.operation.aggregate.numeric; - -import org.enso.table.data.column.operation.aggregate.Aggregator; -import org.enso.table.data.column.storage.LongStorage; -import org.enso.table.data.column.storage.Storage; - -import java.util.BitSet; -import java.util.stream.IntStream; -import java.util.stream.LongStream; - -/** An aggregator consuming a {@link LongStorage} and returning a {@link LongStorage} */ -public abstract class LongToLongAggregator extends Aggregator { - private final LongStorage storage; - private final long[] items; - private final BitSet missing; - private int position = 0; - - /** - * @param storage the data source - * @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called - */ - public LongToLongAggregator(LongStorage storage, int resultSize) { - this.storage = storage; - this.items = new long[resultSize]; - this.missing = new BitSet(); - } - - /** Used by subclasses to return a missing value from a given group. */ - protected void submitMissing() { - missing.set(position++); - } - - /** - * Used by subclasses to return a value from a given group. - * - * @param value the return value of a group - */ - protected void submit(long value) { - items[position++] = value; - } - - /** - * Runs the aggregation on a particular set of values. - * - * @param items the values contained in the current group - */ - protected abstract void runGroup(LongStream items); - - @Override - public void nextGroup(IntStream positions) { - LongStream items = positions.filter(x -> !storage.isNa(x)).mapToLong(storage::getItem); - runGroup(items); - } - - @Override - public Storage seal() { - return new LongStorage(items, items.length, missing); - } -} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/NumericAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/NumericAggregator.java deleted file mode 100644 index ed8bec7c017..00000000000 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/NumericAggregator.java +++ /dev/null @@ -1,78 +0,0 @@ -package org.enso.table.data.column.operation.aggregate.numeric; - -import org.enso.table.data.column.operation.aggregate.Aggregator; -import org.enso.table.data.column.storage.DoubleStorage; -import org.enso.table.data.column.storage.NumericStorage; -import org.enso.table.data.column.storage.Storage; - -import java.util.BitSet; -import java.util.OptionalDouble; -import java.util.stream.DoubleStream; -import java.util.stream.IntStream; - -/** - * An aggregator sourcing data from any {@link NumericStorage} and returning a {@link - * DoubleStorage}. - */ -public abstract class NumericAggregator extends Aggregator { - private final NumericStorage storage; - private final long[] data; - private final BitSet missing; - private int position = 0; - - /** - * @param storage the data source - * @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called - */ - public NumericAggregator(NumericStorage storage, int resultSize) { - this.storage = storage; - this.data = new long[resultSize]; - this.missing = new BitSet(); - } - - /** - * Runs the aggregation on a particular set of values. - * - * @param elements the values contained in the current group - */ - protected abstract void runGroup(DoubleStream elements); - - /** - * Used by subclasses to return a value from a given group. - * - * @param value the return value of a group - */ - protected void submit(double value) { - data[position++] = Double.doubleToRawLongBits(value); - } - - /** - * Used by subclasses to return a value from a given group. - * - * @param value the return value of a group - */ - protected void submit(OptionalDouble value) { - if (value.isPresent()) { - submit(value.getAsDouble()); - } else { - submitMissing(); - } - } - - /** Used by subclasses to return a missing value from a given group. */ - protected void submitMissing() { - missing.set(position++); - } - - @Override - public void nextGroup(IntStream positions) { - DoubleStream elements = - positions.filter(i -> !storage.isNa(i)).mapToDouble(storage::getItemDouble); - runGroup(elements); - } - - @Override - public Storage seal() { - return new DoubleStorage(data, data.length, missing); - } -} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java index b0657f4d036..a7222558413 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java @@ -11,11 +11,11 @@ import java.util.BitSet; /** An operation expecting a numeric argument and returning a boolean. */ public abstract class LongNumericOp extends MapOperation { - private final boolean alwaysCast; + private final boolean alwaysCastToDouble; - public LongNumericOp(String name, boolean alwaysCast) { + public LongNumericOp(String name, boolean alwaysCastToDouble) { super(name); - this.alwaysCast = true; + this.alwaysCastToDouble = alwaysCastToDouble; } public LongNumericOp(String name) { @@ -28,8 +28,7 @@ public abstract class LongNumericOp extends MapOperation { @Override public NumericStorage runMap(LongStorage storage, Object arg) { - if (arg instanceof Long && !alwaysCast) { - long x = (Long) arg; + if (!alwaysCastToDouble && arg instanceof Long x) { long[] newVals = new long[storage.size()]; for (int i = 0; i < storage.size(); i++) { if (!storage.isNa(i)) { @@ -57,12 +56,16 @@ public abstract class LongNumericOp extends MapOperation { BitSet newMissing = new BitSet(); for (int i = 0; i < storage.size(); i++) { if (!storage.isNa(i) && i < v.size() && !v.isNa(i)) { - out[i] = doLong(storage.getItem(i), v.getItem(i)); + out[i] = alwaysCastToDouble + ? Double.doubleToRawLongBits(doDouble(storage.getItem(i), v.getItem(i))) + : doLong(storage.getItem(i), v.getItem(i)); } else { newMissing.set(i); } } - return new LongStorage(out, storage.size(), newMissing); + return alwaysCastToDouble + ? new DoubleStorage(out, storage.size(), newMissing) + : new LongStorage(out, storage.size(), newMissing); } else if (arg instanceof DoubleStorage v) { long[] out = new long[storage.size()]; BitSet newMissing = new BitSet(); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java index e5b7891d053..82b9e11517f 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java @@ -2,6 +2,8 @@ package org.enso.table.data.column.storage; import java.util.BitSet; import java.util.List; +import java.util.function.IntFunction; + import org.enso.base.polyglot.Polyglot_Utils; import org.enso.table.data.column.builder.object.InferredBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; @@ -168,21 +170,29 @@ public final class BoolStorage extends Storage { } public Storage iif(Value when_true, Value when_false) { - Object on_true = Polyglot_Utils.convertPolyglotValue(when_true); - Object on_false = Polyglot_Utils.convertPolyglotValue(when_false); + var on_true = makeRowProvider(when_true); + var on_false = makeRowProvider(when_false); InferredBuilder builder = new InferredBuilder(size); for (int i = 0; i < size; i++) { if (isMissing.get(i)) { builder.append(null); } else if (getItem(i)) { - builder.append(on_true); + builder.append(on_true.apply(i)); } else { - builder.append(on_false); + builder.append(on_false.apply(i)); } } return builder.seal(); } + private static IntFunction makeRowProvider(Value value) { + if (value.isHostObject() && value.asHostObject() instanceof Storage s) { + return i->(Object)s.getItemBoxed(i); + } + var converted = Polyglot_Utils.convertPolyglotValue(value); + return i->converted; + } + private static MapOpStorage buildOps() { MapOpStorage ops = new MapOpStorage<>(); ops.add( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java index 4553371e7e3..56f61b96309 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java @@ -1,12 +1,9 @@ package org.enso.table.data.column.storage; import java.util.BitSet; -import java.util.HashSet; import java.util.List; -import org.enso.base.polyglot.NumericConverter; import org.enso.table.data.column.builder.object.NumericBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; -import org.enso.table.data.column.operation.map.SpecializedIsInOp; import org.enso.table.data.column.operation.map.UnaryMapOperation; import org.enso.table.data.column.operation.map.numeric.DoubleBooleanOp; import org.enso.table.data.column.operation.map.numeric.DoubleIsInOp; @@ -209,6 +206,13 @@ public final class DoubleStorage extends NumericStorage { return a % b; } }) + .add( + new DoubleNumericOp(Maps.POWER) { + @Override + protected double doDouble(double a, double b) { + return Math.pow(a, b); + } + }) .add( new DoubleBooleanOp(Maps.LT) { @Override diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java index 2215061ef5c..3d056d9f9d6 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java @@ -1,16 +1,9 @@ package org.enso.table.data.column.storage; import java.util.BitSet; -import java.util.HashSet; import java.util.List; -import java.util.OptionalLong; -import java.util.stream.LongStream; -import org.enso.base.polyglot.NumericConverter; import org.enso.table.data.column.builder.object.NumericBuilder; -import org.enso.table.data.column.operation.aggregate.Aggregator; -import org.enso.table.data.column.operation.aggregate.numeric.LongToLongAggregator; import org.enso.table.data.column.operation.map.MapOpStorage; -import org.enso.table.data.column.operation.map.SpecializedIsInOp; import org.enso.table.data.column.operation.map.UnaryMapOperation; import org.enso.table.data.column.operation.map.numeric.LongBooleanOp; import org.enso.table.data.column.operation.map.numeric.LongIsInOp; @@ -43,17 +36,13 @@ public final class LongStorage extends NumericStorage { this(data, data.length, new BitSet()); } - /** - * @inheritDoc - */ + /** @inheritDoc */ @Override public int size() { return size; } - /** - * @inheritDoc - */ + /** @inheritDoc */ @Override public int countMissing() { return isMissing.cardinality(); @@ -77,17 +66,13 @@ public final class LongStorage extends NumericStorage { return isMissing.get(idx) ? null : data[idx]; } - /** - * @inheritDoc - */ + /** @inheritDoc */ @Override public int getType() { return Type.LONG; } - /** - * @inheritDoc - */ + /** @inheritDoc */ @Override public boolean isNa(long idx) { return isMissing.get((int) idx); @@ -108,46 +93,6 @@ public final class LongStorage extends NumericStorage { return ops.runZip(name, this, argument); } - @Override - protected Aggregator getVectorizedAggregator(String name, int resultSize) { - return switch (name) { - case Aggregators.SUM -> new LongToLongAggregator(this, resultSize) { - @Override - protected void runGroup(LongStream items) { - long[] elements = items.toArray(); - if (elements.length == 0) { - submitMissing(); - } else { - submit(LongStream.of(elements).sum()); - } - } - }; - case Aggregators.MAX -> new LongToLongAggregator(this, resultSize) { - @Override - protected void runGroup(LongStream items) { - OptionalLong r = items.max(); - if (r.isPresent()) { - submit(r.getAsLong()); - } else { - submitMissing(); - } - } - }; - case Aggregators.MIN -> new LongToLongAggregator(this, resultSize) { - @Override - protected void runGroup(LongStream items) { - OptionalLong r = items.min(); - if (r.isPresent()) { - submit(r.getAsLong()); - } else { - submitMissing(); - } - } - }; - default -> super.getVectorizedAggregator(name, resultSize); - }; - } - private Storage fillMissingDouble(double arg) { final var builder = NumericBuilder.createDoubleBuilder(size()); long rawArg = Double.doubleToRawLongBits(arg); @@ -291,6 +236,19 @@ public final class LongStorage extends NumericStorage { return in % arg; } }) + .add( + new LongNumericOp(Maps.POWER, true) { + @Override + public double doDouble(long in, double arg) { + return Math.pow(in, arg); + } + + @Override + public long doLong(long in, long arg) { + throw new IllegalStateException( + "Internal error: Power operation should cast to double."); + } + }) .add( new LongNumericOp(Maps.DIV, true) { @Override @@ -300,7 +258,7 @@ public final class LongStorage extends NumericStorage { @Override public long doLong(long in, long arg) { - return in / arg; + throw new UnsupportedOperationException("Divide operation should cast to double."); } }) .add( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java index d577fc41f08..50ebd4bc3e2 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java @@ -1,9 +1,5 @@ package org.enso.table.data.column.storage; -import java.util.stream.DoubleStream; -import org.enso.table.data.column.operation.aggregate.Aggregator; -import org.enso.table.data.column.operation.aggregate.numeric.NumericAggregator; - /** A storage containing items representable as a {@code double}. */ public abstract class NumericStorage extends Storage { /** @@ -14,45 +10,4 @@ public abstract class NumericStorage extends Storage { * @return the value associated with {@code idx} */ public abstract double getItemDouble(int idx); - - @Override - protected Aggregator getVectorizedAggregator(String name, int resultSize) { - switch (name) { - case Aggregators.MAX: - return new NumericAggregator(this, resultSize) { - @Override - protected void runGroup(DoubleStream elements) { - submit(elements.max()); - } - }; - case Aggregators.MIN: - return new NumericAggregator(this, resultSize) { - @Override - protected void runGroup(DoubleStream elements) { - submit(elements.min()); - } - }; - case Aggregators.SUM: - return new NumericAggregator(this, resultSize) { - @Override - protected void runGroup(DoubleStream elements) { - double[] its = elements.toArray(); - if (its.length == 0) { - submitMissing(); - } else { - submit(DoubleStream.of(its).sum()); - } - } - }; - case Aggregators.MEAN: - return new NumericAggregator(this, resultSize) { - @Override - protected void runGroup(DoubleStream elements) { - submit(elements.average()); - } - }; - default: - return super.getVectorizedAggregator(name, resultSize); - } - } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java index 545b1e3a302..3c5433397b7 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java @@ -9,9 +9,6 @@ import org.enso.base.polyglot.Polyglot_Utils; import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.InferredBuilder; import org.enso.table.data.column.builder.object.ObjectBuilder; -import org.enso.table.data.column.operation.aggregate.Aggregator; -import org.enso.table.data.column.operation.aggregate.CountAggregator; -import org.enso.table.data.column.operation.aggregate.FunctionAggregator; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.graalvm.polyglot.Value; @@ -73,6 +70,7 @@ public abstract class Storage { public static final String SUB = "-"; public static final String DIV = "/"; public static final String MOD = "%"; + public static final String POWER = "^"; public static final String NOT = "not"; public static final String AND = "&&"; public static final String OR = "||"; @@ -86,14 +84,6 @@ public abstract class Storage { public static final String IS_IN = "is_in"; } - public static final class Aggregators { - public static final String SUM = "sum"; - public static final String MEAN = "mean"; - public static final String MAX = "max"; - public static final String MIN = "min"; - public static final String COUNT = "count"; - } - /** * Specifies if the given operation has a vectorized implementation available for this storage. */ @@ -137,36 +127,6 @@ public abstract class Storage { return builder.seal(); } - protected Aggregator getVectorizedAggregator(String name, int resultSize) { - if (name.equals(Aggregators.COUNT)) { - return new CountAggregator(this, resultSize); - } - return null; - } - - /** - * Returns an aggregator created based on the provided parameters. - * - * @param name name of a vectorized operation that can be used if possible. If null is passed, - * this parameter is unused. - * @param fallback the function to use if a vectorized operation is not available. - * @param skipNa whether missing values should be passed to the {@code fallback} function. - * @param resultSize the number of times the {@link - * Aggregator#nextGroup(java.util.stream.IntStream)} method will be called. - * @return an aggregator satisfying the above properties. - */ - public final Aggregator getAggregator( - String name, Function, Value> fallback, boolean skipNa, int resultSize) { - Aggregator result = null; - if (name != null) { - result = getVectorizedAggregator(name, resultSize); - } - if (result == null) { - result = new FunctionAggregator(fallback, this, skipNa, resultSize); - } - return result; - } - /** * Runs a function on each non-missing element in this storage and gathers the results. * diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java index b3254ae0303..1f6967bee90 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java @@ -2,7 +2,6 @@ package org.enso.table.data.table; import org.enso.base.polyglot.Polyglot_Utils; import org.enso.table.data.column.builder.object.InferredBuilder; -import org.enso.table.data.column.operation.aggregate.Aggregator; import org.enso.table.data.column.storage.BoolStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.index.DefaultIndex; @@ -15,8 +14,6 @@ import org.graalvm.polyglot.Value; import java.util.BitSet; import java.util.List; -import java.util.function.Function; -import java.util.stream.IntStream; /** A representation of a column. Consists of a column name and the underlying storage. */ public class Column { @@ -89,11 +86,11 @@ public class Column { * @return the result of masking this column with the provided column */ public Column mask(Column maskCol) { - if (!(maskCol.getStorage() instanceof BoolStorage storage)) { + if (!(maskCol.getStorage() instanceof BoolStorage boolStorage)) { throw new UnexpectedColumnTypeException("Boolean"); } - var mask = BoolStorage.toMask(storage); + var mask = BoolStorage.toMask(boolStorage); var localStorageMask = new BitSet(); localStorageMask.set(0, getStorage().size()); mask.and(localStorageMask); @@ -156,25 +153,6 @@ public class Column { return index; } - /** - * Aggregates the values in this column, using a given aggregation operation. - * - * @param aggName name of a vectorized operation that can be used if possible. If null is passed, - * this parameter is unused. - * @param aggregatorFunction the function to use if a vectorized operation is not available. - * @param skipNa whether missing values should be passed to the {@code fallback} function. - * @return a column indexed by the unique index of this aggregate, storing results of applying the - * specified operation. - */ - public Object aggregate( - String aggName, Function, Value> aggregatorFunction, boolean skipNa) { - Aggregator aggregator = storage.getAggregator(aggName, aggregatorFunction, skipNa, 1); - - IntStream ixes = IntStream.range(0, storage.size()); - aggregator.nextGroup(ixes); - return aggregator.seal().getItemBoxed(0); - } - /** * @param mask the reordering to apply * @return a new column, resulting from reordering this column according to {@code mask}. diff --git a/std-bits/table/src/main/java/org/enso/table/expressions/ExpressionVisitorImpl.java b/std-bits/table/src/main/java/org/enso/table/expressions/ExpressionVisitorImpl.java new file mode 100644 index 00000000000..331360c5a9d --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/expressions/ExpressionVisitorImpl.java @@ -0,0 +1,373 @@ +package org.enso.table.expressions; + +import org.antlr.v4.runtime.BaseErrorListener; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; + +import org.graalvm.polyglot.Context; +import org.graalvm.polyglot.PolyglotException; +import org.graalvm.polyglot.Value; + +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.LocalDateTime; +import java.time.ZonedDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.function.Function; +import java.util.regex.Pattern; + +public class ExpressionVisitorImpl extends ExpressionBaseVisitor { + private static class ThrowOnErrorListener extends BaseErrorListener { + public static final ThrowOnErrorListener INSTANCE = new ThrowOnErrorListener(); + + @Override + public void syntaxError( + Recognizer recognizer, + Object offendingSymbol, + int line, + int charPositionInLine, + String msg, + RecognitionException e) + throws SyntaxErrorException { + throw new SyntaxErrorException(msg, line, charPositionInLine); + } + } + + public static class SyntaxErrorException extends RuntimeException { + private final int line; + private final int column; + + public SyntaxErrorException(String message, int line, int column) { + super(message); + this.line = line; + this.column = column; + } + + public int getLine() { + return line; + } + + public int getColumn() { + return column; + } + } + + public static Value evaluate( + String expression, + Function getColumn, + Function makeConstantColumn, + String moduleName, + String typeName, + String[] variableArgumentFunctions) + throws UnsupportedOperationException, IllegalArgumentException { + var lexer = new ExpressionLexer(CharStreams.fromString(expression)); + lexer.removeErrorListeners(); + lexer.addErrorListener(ThrowOnErrorListener.INSTANCE); + + var tokens = new CommonTokenStream(lexer); + var parser = new ExpressionParser(tokens); + parser.removeErrorListeners(); + parser.addErrorListener(ThrowOnErrorListener.INSTANCE); + + var visitor = + new ExpressionVisitorImpl( + getColumn, makeConstantColumn, moduleName, typeName, variableArgumentFunctions); + + var expr = parser.prog(); + return visitor.visit(expr); + } + + private final Function getColumn; + private final Function makeConstantColumn; + private final Function getMethod; + private final Set variableArgumentFunctions; + + private ExpressionVisitorImpl( + Function getColumn, + Function makeConstantColumn, + String moduleName, + String typeName, + String[] variableArgumentFunctions) { + this.getColumn = getColumn; + this.makeConstantColumn = makeConstantColumn; + + final Value module = + Context.getCurrent().getBindings("enso").invokeMember("get_module", moduleName); + final Value type = module.invokeMember("get_type", typeName); + this.getMethod = name -> module.invokeMember("get_method", type, name); + + this.variableArgumentFunctions = new HashSet<>(Arrays.asList(variableArgumentFunctions)); + } + + private Value wrapAsColumn(Value value) { + if (value.isNull()) { + return makeConstantColumn.apply(value); + } + + var metaObject = value.getMetaObject(); + return metaObject != null && metaObject.asHostObject() instanceof Class + ? makeConstantColumn.apply(value) + : value; + } + + private Value executeMethod(String name, Value... args) { + Value method = getMethod.apply(name); + if (!method.canExecute()) { + throw new UnsupportedOperationException(name); + } + + Object[] objects; + if (this.variableArgumentFunctions.contains(name)) { + objects = new Object[2]; + objects[0] = args[0]; + objects[1] = Arrays.copyOfRange(args, 1, args.length, Object[].class); + } else { + objects = Arrays.copyOf(args, args.length, Object[].class); + } + objects[0] = wrapAsColumn(args[0]); + + try { + var result = method.execute(objects); + if (result.canExecute()) { + throw new IllegalArgumentException("Insufficient arguments for method " + name + "."); + } + return result; + } catch (PolyglotException e) { + if (e.getMessage().startsWith("Type error: expected a function")) { + throw new IllegalArgumentException("Too many arguments for method " + name + "."); + } + throw e; + } + } + + @Override + public Value visitProg(ExpressionParser.ProgContext ctx) { + Value base = visit(ctx.expr()); + return wrapAsColumn(base); + } + + @Override + public Value visitColumn(ExpressionParser.ColumnContext ctx) { + var text = ctx.getText(); + return getColumn.apply(text.substring(1, text.length() - 1).replace("]]", "]")); + } + + @Override + public Value visitPower(ExpressionParser.PowerContext ctx) { + return executeMethod("^", visit(ctx.expr(0)), visit(ctx.expr(1))); + } + + @Override + public Value visitMultDivMod(ExpressionParser.MultDivModContext ctx) { + return executeMethod(ctx.op.getText(), visit(ctx.expr(0)), visit(ctx.expr(1))); + } + + @Override + public Value visitCompare(ExpressionParser.CompareContext ctx) { + var op = ctx.op.getText(); + if (op.equals("=")) { + op = "=="; + } + if (op.equals("<>")) { + op = "!="; + } + + return executeMethod(op, visit(ctx.expr(0)), visit(ctx.expr(1))); + } + + @Override + public Value visitLike(ExpressionParser.LikeContext ctx) { + var condition = executeMethod("like", visit(ctx.expr(0)), visit(ctx.expr(1))); + return ctx.NOT_LIKE() != null ? executeMethod("not", condition) : condition; + } + + @Override + public Value visitIsNull(ExpressionParser.IsNullContext ctx) { + var op = ctx.IS_NULL() != null || ctx.IS_NOT_NULL() != null ? "is_missing" : "is_empty"; + var condition = executeMethod(op, visit(ctx.expr())); + return ctx.IS_NOT_NULL() != null || ctx.IS_NOT_EMPTY() != null + ? executeMethod("not", condition) + : condition; + } + + @Override + public Value visitIf(ExpressionParser.IfContext ctx) { + return executeMethod("iif", visit(ctx.expr(0)), visit(ctx.expr(1)), visit(ctx.expr(2))); + } + + @Override + public Value visitAddSub(ExpressionParser.AddSubContext ctx) { + return executeMethod(ctx.op.getText(), visit(ctx.expr(0)), visit(ctx.expr(1))); + } + + @Override + public Value visitAnd(ExpressionParser.AndContext ctx) { + return executeMethod("&&", visit(ctx.expr(0)), visit(ctx.expr(1))); + } + + @Override + public Value visitOr(ExpressionParser.OrContext ctx) { + return executeMethod("||", visit(ctx.expr(0)), visit(ctx.expr(1))); + } + + @Override + public Value visitUnaryNot(ExpressionParser.UnaryNotContext ctx) { + return executeMethod("not", visit(ctx.expr())); + } + + @Override + public Value visitUnaryMinus(ExpressionParser.UnaryMinusContext ctx) { + return executeMethod("*", visit(ctx.expr()), Value.asValue(-1)); + } + + @Override + public Value visitNullOrNothing(ExpressionParser.NullOrNothingContext ctx) { + return Value.asValue(null); + } + + @Override + public Value visitBoolean(ExpressionParser.BooleanContext ctx) { + return Value.asValue(ctx.TRUE() != null); + } + + @Override + public Value visitNumber(ExpressionParser.NumberContext ctx) { + var text = ctx.getText().replace("_", ""); + if (text.contains(".")) { + return Value.asValue(Double.parseDouble(text)); + } else { + return Value.asValue(Long.parseLong(text)); + } + } + + @Override + public Value visitExcelString(ExpressionParser.ExcelStringContext ctx) { + var text = ctx.getText(); + return Value.asValue(text.substring(1, text.length() - 1).replace("\"\"", "\"")); + } + + private static final Pattern pythonRegex = Pattern.compile("(\\\\[abtnfrv\"'\\\\])|(\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{4}))|\\\\|([^\\\\]+)"); + + private static String unescapePython(String text) { + var matcher = pythonRegex.matcher(text); + var builder = new StringBuilder(text.length()); + while (matcher.find()) { + if (matcher.group(1) != null) { + builder.append(switch (matcher.group(1).charAt(1)) { + case 'a' -> (char) 0x07; + case 'f' -> (char) 0x0c; + case 'b' -> '\b'; + case 't' -> '\t'; + case 'r' -> '\r'; + case 'n' -> '\n'; + case 'v' -> (char) 0x0b; + case '\\' -> '\\'; + case '\'' -> '\''; + case '"' -> '"'; + default -> throw new IllegalArgumentException("Unknown Python escape sequence."); + }); + } else if (matcher.group(2) != null) { + builder.append((char) Integer.parseInt(matcher.group(2).substring(2), 16)); + } else { + builder.append(matcher.group(0)); + } + } + return builder.toString(); + } + + @Override + public Value visitPythonString(ExpressionParser.PythonStringContext ctx) { + var text = ctx.getText(); + return Value.asValue(unescapePython(text.substring(1, text.length() - 1))); + } + + @Override + public Value visitDate(ExpressionParser.DateContext ctx) { + var text = ctx.text.getText(); + try { + return Value.asValue(LocalDate.parse(ctx.text.getText())); + } catch (DateTimeParseException e) { + throw new SyntaxErrorException( + "Invalid Date format: " + text, + ctx.getStart().getLine(), + ctx.getStart().getCharPositionInLine()); + } + } + + @Override + public Value visitTime(ExpressionParser.TimeContext ctx) { + var text = ctx.text.getText(); + try { + return Value.asValue(LocalTime.parse(ctx.text.getText())); + } catch (DateTimeParseException e) { + throw new SyntaxErrorException( + "Invalid Time format: " + text, + ctx.getStart().getLine(), + ctx.getStart().getCharPositionInLine()); + } + } + + @Override + public Value visitDatetime(ExpressionParser.DatetimeContext ctx) { + var text = ctx.text.getText().replace(' ', 'T'); + var timezone = text.contains("[") ? text.substring(text.indexOf('[')) : ""; + text = text.substring(0, text.length() - timezone.length()); + + var zoneId = + timezone.equals("") + ? ZoneId.systemDefault() + : ZoneId.of(timezone.substring(1, timezone.length() - 1)); + + try { + var zonedDateTime = + ZonedDateTime.parse(text, DateTimeFormatter.ISO_OFFSET_DATE_TIME.withZone(zoneId)); + return Value.asValue(zonedDateTime); + } catch (DateTimeParseException ignored) { + } + + try { + var localDateTime = LocalDateTime.parse(text); + return Value.asValue(localDateTime.atZone(zoneId)); + } catch (DateTimeParseException e) { + throw new SyntaxErrorException( + "Invalid Date_Time format: " + text, + ctx.getStart().getLine(), + ctx.getStart().getCharPositionInLine()); + } + } + + @Override + public Value visitParen(ExpressionParser.ParenContext ctx) { + return visit(ctx.expr()); + } + + @Override + public Value visitBetween(ExpressionParser.BetweenContext ctx) { + var self = visit(ctx.expr(0)); + var lower = visit(ctx.expr(1)); + var upper = visit(ctx.expr(2)); + var condition = executeMethod("between", self, lower, upper); + return ctx.NOT_BETWEEN() != null ? executeMethod("not", condition) : condition; + } + + @Override + public Value visitIn(ExpressionParser.InContext ctx) { + var args = ctx.expr().stream().map(this::visit).toArray(Value[]::new); + var condition = executeMethod("is_in", args); + return ctx.NOT_IN() != null ? executeMethod("not", condition) : condition; + } + + @Override + public Value visitFunction(ExpressionParser.FunctionContext ctx) { + var name = ctx.IDENTIFIER().getText().toLowerCase(); + var args = ctx.expr().stream().map(this::visit).toArray(Value[]::new); + return executeMethod(name, args); + } +} diff --git a/test/Table_Tests/src/Column_Spec.enso b/test/Table_Tests/src/Column_Spec.enso index 16cb2dab8ae..ce27c36c333 100644 --- a/test/Table_Tests/src/Column_Spec.enso +++ b/test/Table_Tests/src/Column_Spec.enso @@ -53,11 +53,9 @@ spec = Test.group "Columns" <| test_column.take (Last 7) . to_vector . should_equal expected_2.to_vector test_column.take (Last 0) . to_vector . should_equal expected_3.to_vector - Test.specify "should be able to get the first / head element" <| + Test.specify "should be able to get the first element" <| test_column.first . should_equal 1 - test_column.head . should_equal 1 empty_column.first.should_fail_with Empty_Error - empty_column.head.should_fail_with Empty_Error Test.specify "should be able to get the last element" <| test_column.last . should_equal 6 @@ -85,25 +83,12 @@ spec = Test.group "Columns" <| col = Examples.decimal_column.set_index Examples.integer_column col.index.to_vector . should_equal Examples.integer_column.to_vector - Test.specify "should allow computing variance and standard deviation" <| - const = Column.from_vector 'const' [1, 1, 1, 1, 1] - const.variance . should_equal 0 - const.standard_deviation.should_equal 0 - - rand = Column.from_vector 'random' [10.0, 4.2, 6.8, 6.2, 7.2] - rand.variance . should_equal 4.372 - rand.variance degrees_of_freedom_correction=0 . should_equal 3.4976 - rand.standard_deviation . should_equal 2.090932806 epsilon=(10 ^ -6) - rand.standard_deviation degrees_of_freedom_correction=0 . should_equal 1.870187156 epsilon=(10 ^ -6) - - Test.specify "should allow computing the R² score of a prediction" <| - sample = Column.from_vector 'sample' [1,2,3,4,5] - mean_pred = Column.from_vector 'mean' [3,3,3,3,3] - perfect_pred = Column.from_vector 'perfect' [1,2,3,4,5] - bad_pred = Column.from_vector 'bad' [5,4,3,2,1] - - sample.r_squared mean_pred . should_equal 0 - sample.r_squared perfect_pred . should_equal 1 - sample.r_squared bad_pred . should_equal -3 + Test.specify "should result in correct Storage if operation allows it" <| + another = Column.from_vector "Test" [10, 20, 30, 40, 50, 60] + (test_column + 1).storage_type . should_equal Storage.Integer + (test_column - 1).storage_type . should_equal Storage.Integer + (test_column * 2).storage_type . should_equal Storage.Integer + (test_column * 1.5).storage_type . should_equal Storage.Decimal + (test_column + another).storage_type . should_equal Storage.Integer main = Test_Suite.run_main spec diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 8bf16013897..2b8f242e47b 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -79,7 +79,7 @@ spec = Test.specify "should support simple text operations" <| b = t1.at "B" add = b + "SUFFIX" - add.to_sql.prepare . should_equal ['SELECT ("T1"."B" + ?) AS "B" FROM "T1" AS "T1"', [["SUFFIX", str]]] + add.to_sql.prepare . should_equal ['SELECT ("T1"."B" || ?) AS "B" FROM "T1" AS "T1"', [["SUFFIX", str]]] ends = b.ends_with "suf" starts = b.starts_with "pref" diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index 7a4465b1071..bf4013ddcb2 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -12,8 +12,6 @@ from Standard.Test import Test, Problems import project.Database.Helpers.Name_Generator spec prefix connection pending=Nothing = - eps=0.000001 - tables_to_clean = Vector.new_builder upload prefix data = name = Name_Generator.random_name prefix @@ -185,16 +183,6 @@ spec prefix connection pending=Nothing = col.count . should_equal 3 col.count_missing . should_equal 2 - Test.group prefix+"Column-wide statistics" pending=pending <| - Test.specify 'should allow computing basic column-wide stats' <| - t7 = upload "T7" <| - Table.new [['price', [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]]] - price = t7.at 'price' - price.sum.should_equal 107.6 epsilon=eps - price.min.should_equal 0.4 epsilon=eps - price.max.should_equal 97 epsilon=eps - price.mean.should_equal 26.9 epsilon=eps - Test.group prefix+"Sorting" pending=pending <| df = upload "clothes" <| Table.new [["id", [1,2,3,4,5,6]], ["name", ["shoes","trousers","dress","skirt","blouse","t-shirt"]], ["quantity", [20,10,20,10,30,30]], ["rating", [3.0,Nothing,7.3,3.0,2.2,Nothing]], ["price", [37.2,42.1,64.1,87.4,13.5,64.2]]] diff --git a/test/Table_Tests/src/Expression_Spec.enso b/test/Table_Tests/src/Expression_Spec.enso new file mode 100644 index 00000000000..156096963ed --- /dev/null +++ b/test/Table_Tests/src/Expression_Spec.enso @@ -0,0 +1,290 @@ +from Standard.Base import all +from Standard.Base.Error.Problem_Behavior import Report_Error + +from Standard.Table import Table, Column, Sort_Column, Column_Selector, Sort_Column_Selector, Aggregate_Column +import Standard.Table.Data.Expression.Expression_Error + +import Standard.Visualization + +from Standard.Test import Test, Test_Suite, Problems + +import project.Common_Table_Spec +from project.Util import all + +spec detailed=False = + # Create Test Table + column_a = ["A", [1, 2, 3, 4, 5]] + column_b = ["B", [1.0, 1.5, 2.5, 4, 6]] + column_c = ["C", ["Hello", "World", "Hello World!", "", Nothing]] + column_odd = ["Bad] Name", [True, False, True, False, True]] + test_table = Table.new [column_a, column_b, column_c, column_odd] + + epsilon=0.0000000001 + + tester expression value = + new_table = test_table.set "NEW_COL" expression + new_table.column_count . should_equal (test_table.column_count + 1) + new_column = new_table.at -1 + new_column.name . should_equal "NEW_COL" + expected = case value of + _ : Vector.Vector -> value + _ -> Vector.new test_table.row_count _->value + + values = new_column.to_vector + values . each_with_index i->v-> + e = expected.at i + match = case e of + _ : Number -> e.equals v epsilon + _ -> e == v + if match.not then values.should_equal expected + + specify_test label action expression_test=tester = + case detailed of + True -> + specify_tester expression value = + Test.specify (label + ": " + expression) <| + expression_test expression value + action specify_tester + False -> + Test.specify label (action expression_test) + + Test.group "Expression Integer literals" <| + specify_test "should be able to add an integer column" expression_test-> + expression_test "1" 1 + expression_test "-3" -3 + expression_test "1_000" 1000 + + Test.group "Expression Decimal literals" <| + specify_test "should be able to add an decimal column" expression_test-> + expression_test "1.23" 1.23 + expression_test "-3.1415" -3.1415 + expression_test "1_000.456" 1000.456 + + Test.group "Expression Boolean literals" <| + specify_test "should be able to add a boolean column" expression_test-> + expression_test "True" True + expression_test "true" True + expression_test "TRUE" True + expression_test "tRuE" True + expression_test "False" False + expression_test "false" False + expression_test "FALSE" False + expression_test "FaLsE" False + + Test.group "Expression Text literals" <| + specify_test "should be able to add a text column" expression_test-> + expression_test "'Hello World'" 'Hello World' + expression_test "'Hello \'World\''" "Hello 'World'" + expression_test '"Hello World"' 'Hello World' + expression_test '"Hello ""World"""' 'Hello "World"' + expression_test '"Hello \\""World"""' 'Hello \\"World"' + expression_test "'Alpha\r\n\gBeta'" 'Alpha\r\n\\gBeta' + + Test.group "Expression Text literals" <| + specify_test "should be able to get a Column" expression_test-> + expression_test "[A]" (column_a.at 1) + expression_test "[Bad]] Name]" (column_odd.at 1) + + Test.group "Expression Nothing literals" <| + specify_test "should be able to add an nothing column" expression_test-> + expression_test "null" Nothing + expression_test "nUlL" Nothing + expression_test "Nothing" Nothing + expression_test "NOTHING" Nothing + + Test.group "Expression Date and Time literals" <| + specify_test "should be able to add a date or time column" expression_test-> + expression_test "#2020-12-23#" (Date.new 2020 12 23) + expression_test "#12:34#" (Time_Of_Day.new 12 34) + expression_test "#12:34:56#" (Time_Of_Day.new 12 34 56) + expression_test "#12:34:56.789#" (Time_Of_Day.new 12 34 56 789000000) + expression_test "#12:34:56.789000123#" (Time_Of_Day.new 12 34 56 789000123) + expression_test "#2020-12-23 12:34#" (Date_Time.new 2020 12 23 12 34) + expression_test "#2020-12-23 12:34:56#" (Date_Time.new 2020 12 23 12 34 56) + expression_test "#2020-12-23 12:34:56Z[UTC]#" (Date_Time.new 2020 12 23 12 34 56 zone=Time_Zone.utc) + expression_test "#2020-12-23 12:34:56+02:30[UTC]#" (Date_Time.new 2020 12 23 10 04 56 zone=Time_Zone.utc) + expression_test "#2020-12-23 12:34:56.157+01[UTC]#" (Date_Time.new 2020 12 23 11 34 56 157000000 zone=Time_Zone.utc) + expression_test "#2020-12-23T12:34[Europe/Warsaw]#" (Date_Time.new 2020 12 23 12 34 zone=Time_Zone.parse("Europe/Warsaw")) + + Test.group "Expression Arithmetic" <| + specify_test "should be able to do basic arithmetic" expression_test-> + expression_test "1+1" 2 + expression_test "23-15" 8 + expression_test "2.5*4.2" 10.5 + expression_test "1_000.456/2" 500.228 + expression_test "2^4" 16 + expression_test "11%3" 2 + + specify_test "should be able to do basic arithmetic with order" expression_test-> + expression_test "1+1*2+2" 5 + expression_test "23-15/3+6" 24 + expression_test "52.92/4.2^2" 3 + expression_test "(1+1)*2+2" 6 + + specify_test "should be able to do basic arithmetic with whitespace" expression_test-> + expression_test "1 + 1" 2 + expression_test " 23 -15 " 8 + expression_test "2.5* 4.2" 10.5 + expression_test "1_000.456/ 2" 500.228 + expression_test " 2 ^ 4 " 16 + expression_test " 11 % 3 " 2 + expression_test "1+1 * 2" 3 + expression_test "1 + 1*2" 3 + + Test.group "Column Arithmetic" <| + specify_test "should be able to perform arithmetic on columns" expression_test-> + expression_test "[A] + 2" [3, 4, 5, 6, 7] + expression_test "[B] - 2" [-1, -0.5, 0.5, 2, 4] + expression_test "[A] * 4" [4, 8, 12, 16, 20] + expression_test "[B] / 2" [0.5, 0.75, 1.25, 2, 3] + expression_test "[A] + [B]" [2, 3.5, 5.5, 8, 11] + expression_test "[A] - [B]" [0, 0.5, 0.5, 0, -1] + expression_test "[A] * [B]" [1, 3, 7.5, 16, 30] + expression_test "[B] / [A]" [1, 0.75, 0.8333333333333334, 1, 1.2] + expression_test "[A] ^ [B]" [1, 2.8284271247461903, 15.588457268119896, 256, 15625] + expression_test "[A] % [B]" [0, 0.5, 0.5, 0, 5] + expression_test "[A] + [B] + [A]" [3, 5.5, 8.5, 12, 16] + expression_test "[A] - [B] - [A]" [-1, -1.5, -2.5, -4, -6] + expression_test "[A] * [B] * [A]" [1, 6, 22.5, 64, 150] + expression_test "[A] / [B] / [A]" [1.0, 0.6666666666666667, 0.39999999999999999, 0.25, 0.16666666666666667] + expression_test "[A] ^ [B] * [A]" [1, 5.65685424949238, 46.7653718043597, 1024, 78125] + expression_test "[A] % [B] % [A]" [0, 0.5, 0.5, 0, 0] + + specify_test "should be able to perform arithmetic on columns with order" expression_test-> + expression_test "([A] + [B]) * 3" [6, 10.5, 16.5, 24, 33] + expression_test "[A] * (4 + [B])" [5, 11, 19.5, 32, 50] + expression_test "[A] * [B] + [A]" [2, 5, 10.5, 20, 35] + expression_test "[A] + [B] * [B]" [2, 4.25, 9.25, 20, 41] + expression_test "([A] + [B]) / [A]" [2, 1.75, 1.83333333333333, 2, 2.2] + expression_test "[A] / [B] + 2" [3, 3.33333333333333, 3.2, 3, 2.83333333333333] + expression_test "([A] + [B]) % 4" [2, 3.5, 1.5, 0, 3] + expression_test "[A] % [B] + 2" [2, 2.5, 2.5, 2, 7] + expression_test "([A] - [B]) ^ [A]" [0, 0.25, 0.125, 0, -1] + expression_test "[A] ^ ([B] - [A])" [1, 0.707106781186547, 0.577350269189626, 1, 5] + + Test.group "Comparison Operators" <| + specify_test "should be able to compare equality" expression_test-> + expression_test "2 = 1 + 1" True + expression_test "2 == 1 + 1" True + expression_test "[A] = 2" [False, True, False, False, False] + expression_test "[A] == 2" [False, True, False, False, False] + expression_test "3 != 1 + 1" True + expression_test "3 <> 1 + 1" True + expression_test "[A] != 2" [True, False, True, True, True] + expression_test "[A] <> 2" [True, False, True, True, True] + + specify_test "should be able to compare ordering" expression_test-> + expression_test "1 > 2" False + expression_test "1 < 2" True + expression_test "[A] > 2" [False, False, True, True, True] + expression_test "[A] >= 2" [False, True, True, True, True] + expression_test "[A] < 2" [True, False, False, False, False] + expression_test "[A] <= 2" [True, True, False, False, False] + + specify_test "should be able to use between" expression_test-> + expression_test "1 + 1 BETWEEN 1 AND 3" True + expression_test "1 + 1 between 2 AND 3" True + expression_test "1 + 1 bETWEEN 1 AND 2" True + expression_test "[A] between 2 AND 3" [False, True, True, False, False] + expression_test "1 + 1 NOT BETWEEN 1 AND 3" False + expression_test "[A] not between 2 AND 3" [True, False, False, True, True] + + specify_test "should be able to use in" expression_test-> + expression_test "1 + 1 IN (2, 4, 6)" True + expression_test "[A] IN (2, 4, 6)" [False, True, False, True, False] + expression_test "1 + 1 NOT IN (2, 4, 6)" False + expression_test "[A] NOT IN (2, 4, 6)" [True, False, True, False, True] + expression_test "[A] IN (3)" [False, False, True, False, False] + expression_test "[A] NOT IN (3)" [True, True, False, True, True] + + specify_test "should be able to check null" expression_test-> + expression_test "1 IS NULL" False + expression_test "1 IS NoTHing" False + expression_test "Nothing IS NULL" True + expression_test "1 IS NOT NULL" True + expression_test "Nothing IS NOT NULL" False + expression_test "[A] IS NULL" [False, False, False, False, False] + expression_test "[C] IS NULL" [False, False, False, False, True] + expression_test "[A] IS NOT NULL" [True, True, True, True, True] + expression_test "[C] IS NOT NULL" [True, True, True, True, False] + + specify_test "should be able to check empty" expression_test-> + expression_test "'Hello World' IS EMPTY" False + expression_test "'' IS EMPTY" True + expression_test "Nothing IS EMPTY" True + expression_test "'Hello World' IS NOT EMPTY" True + expression_test "'' IS NOT EMPTY" False + expression_test "Nothing IS NOT EMPTY" False + + Test.group "Text Operators" <| + specify_test "should be able to concatenate text" expression_test-> + expression_test "'Hello ' + 'World'" "Hello World" + expression_test "[C] + ' World'" ["Hello World", "World World", "Hello World! World", " World", Nothing] + expression_test "'Hello ' + [C]" ["Hello Hello", "Hello World", "Hello Hello World!", "Hello ", Nothing] + expression_test "[C] + [C]" ["HelloHello", "WorldWorld", "Hello World!Hello World!", "", Nothing] + + specify_test "should be able to use like" expression_test-> + expression_test "'Hello World' LIKE 'Hello%'" True + expression_test "'Hello' LIKE 'H_llo'" True + expression_test "'Hello' LIKE 'H_l%'" True + expression_test "'Hello' LIKE 'H___o'" True + expression_test "'World' LIKE 'H___o'" False + expression_test "'Hello World' NOT LIKE 'Hello%'" False + expression_test "[C] LIKE 'Hello%'" [True, False, True, False, Nothing] + expression_test "[C] NOT LIKE 'Hello%'" [False, True, False, True, Nothing] + + Test.group "Boolean Operators" <| + specify_test "should be able to AND booleans" expression_test-> + expression_test "True && TRUE" True + expression_test "True AND False" False + expression_test "True && [Bad]] Name]" [True, False, True, False, True] + expression_test "False AND [Bad]] Name]" False + + specify_test "should be able to OR booleans" expression_test-> + expression_test "True || TRUE" True + expression_test "True OR False" True + expression_test "False OR False" False + expression_test "True OR [Bad]] Name]" True + expression_test "False || [Bad]] Name]" [True, False, True, False, True] + + specify_test "should be able to NOT booleans" expression_test-> + expression_test "!TRUE" False + expression_test "Not False" True + expression_test "NOT [Bad]] Name]" [False, True, False, True, False] + + specify_test "should be able to use IF" expression_test-> + expression_test "IF True THEN 1 ELSE 0" 1 + expression_test "IF False THEN 'A' ELSE 'B' END" 'B' + expression_test "IF [Bad]] Name] THEN [A] ELSE [B] ENDIF" [1, 1.5, 3, 4, 5] + + Test.group "Function invocation" <| + specify_test "should be able to call a function with arguments" expression_test-> + expression_test "Not(True)" False + expression_test "not(False)" True + expression_test "iif(True, 1, 3)" 1 + expression_test "iif([Bad]] Name], 2, 3)" [2, 3, 2, 3, 2] + + specify_test "should be able to call a variable args function" expression_test-> + expression_test "min(10, 3, 8)" 3 + expression_test "max([A], [B], 3)" [3, 3, 3, 4, 6] + + Test.group "Errors should be handled" <| + error_tester expression fail_type = + test_table.set "NEW_COL" expression on_problems=Problem_Behavior.Report_Error . should_fail_with fail_type + test_table.set "NEW_COL" expression . column_count . should_equal test_table.column_count + + specify_test "should fail with Syntax_Error if badly formed" expression_test=error_tester expression_test-> + expression_test "IIF [A] THEN 1 ELSE 2" Expression_Error.Syntax_Error + expression_test "A + B" Expression_Error.Syntax_Error + expression_test "#2022-31-21#" Expression_Error.Syntax_Error + + specify_test "should fail with Unsupported_Operation if not sufficient arguments" expression_test=error_tester expression_test-> + expression_test "unknown([C])" Expression_Error.Unsupported_Operation + + specify_test "should fail with Argument_Mismatch if not sufficient arguments" expression_test=error_tester expression_test-> + expression_test "starts_with([C])" Expression_Error.Argument_Mismatch + + specify_test "should fail with Argument_Mismatch if too many arguments" expression_test=error_tester expression_test-> + expression_test "starts_with([C], 'Hello', 'World')" Expression_Error.Argument_Mismatch + +main = Test_Suite.run_main (spec True) diff --git a/test/Table_Tests/src/Main.enso b/test/Table_Tests/src/Main.enso index d22c805425a..a8e0449740e 100644 --- a/test/Table_Tests/src/Main.enso +++ b/test/Table_Tests/src/Main.enso @@ -5,8 +5,10 @@ from Standard.Test import Test_Suite import project.In_Memory_Tests import project.Database.Main as Database_Tests import project.Data_Formatter_Spec +import project.Expression_Spec main = Test_Suite.run_main <| In_Memory_Tests.in_memory_spec - Database_Tests.databases_spec Data_Formatter_Spec.spec + Expression_Spec.spec + Database_Tests.databases_spec diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index c569005b512..9422181059f 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -425,14 +425,6 @@ spec = i.at "Items Count" . to_vector . should_equal [3, 2, 4] i.at "Storage Type" . to_vector . should_equal [Storage.Text, Storage.Integer, Storage.Any] - Test.group "Column-wide statistics" <| - Test.specify 'should allow computing basic column-wide stats' <| - price = Column.from_vector 'price' [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing] - price.sum.should_equal 107.6 - price.min.should_equal 0.4 - price.max.should_equal 97 - price.mean.should_equal 26.9 - Test.group "Sorting Tables" <| df = (enso_project.data / "clothes.csv").read diff --git a/tools/legal-review/Table/org.antlr.antlr4-runtime-4.10.1/copyright-ignore b/tools/legal-review/Table/org.antlr.antlr4-runtime-4.10.1/copyright-ignore new file mode 100644 index 00000000000..36df61a1478 --- /dev/null +++ b/tools/legal-review/Table/org.antlr.antlr4-runtime-4.10.1/copyright-ignore @@ -0,0 +1 @@ +~ Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. diff --git a/tools/legal-review/Table/org.antlr.antlr4-runtime-4.10.1/copyright-keep b/tools/legal-review/Table/org.antlr.antlr4-runtime-4.10.1/copyright-keep new file mode 100644 index 00000000000..960b2a8926c --- /dev/null +++ b/tools/legal-review/Table/org.antlr.antlr4-runtime-4.10.1/copyright-keep @@ -0,0 +1 @@ +Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. diff --git a/tools/legal-review/Table/report-state b/tools/legal-review/Table/report-state index f7615b77778..565a0f0f3b2 100644 --- a/tools/legal-review/Table/report-state +++ b/tools/legal-review/Table/report-state @@ -1,3 +1,3 @@ -3D20F317407799FC2002CA1A005A2F5CDBFE3A082AD7BA59D08F04270EF9B88C -0DF140BB506529B02B8A79B1E32040D7B4515E690EB2C8F32B7F74DD0E821719 +840031EDBA6D7166EE1BABF8D1AB65F7219F5258683A2D487D12D3D4B8387BD7 +4BC5787A7330388C3B8BF8C5955FEFB57E57CB47DFAA243180AF0DA066E3D0D6 0