Aggregates in the Database library - MVP (#3353)

Implements infrastructure for new aggregations in the Database. It comes with only some basic aggregations and limited error-handling. More aggregations and problem handling will be added in subsequent PRs.

# Important Notes
This introduces basic aggregations using our existing codegen and sets-up our testing infrastructure to be able to use the same aggregate tests as in-memory backend for the database backends.

Many aggregations are not yet implemented - they will be added in subsequent tasks.

There are some TODOs left - they will be addressed in the next tasks.
This commit is contained in:
Radosław Waśko 2022-03-28 17:51:37 +02:00 committed by GitHub
parent bec7a58a28
commit 20be5516a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 1178 additions and 519 deletions

View File

@ -278,7 +278,6 @@ jobs:
run: |
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Tests
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Table_Tests
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Database_Tests
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Image_Tests
@ -304,7 +303,6 @@ jobs:
run: |
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Tests
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Table_Tests
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Database_Tests
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Image_Tests
@ -315,7 +313,6 @@ jobs:
run: |
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Tests
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Table_Tests
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Database_Tests
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Image_Tests
@ -341,7 +338,6 @@ jobs:
run: |
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Tests
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Table_Tests
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Database_Tests
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Image_Tests

View File

@ -5,8 +5,9 @@ import Standard.Database.Data.Internal.IR
import Standard.Database.Data.Sql
import Standard.Database.Data.Table as Database_Table
import Standard.Table.Data.Table as Materialized_Table
import Standard.Table.Data.Storage
import Standard.Table.Internal.Java_Exports
import Standard.Database.Data.Internal.Base_Generator
from Standard.Database.Data.Sql import Sql_Type
polyglot java import java.lang.UnsupportedOperationException
@ -26,7 +27,8 @@ type Connection
A Database connection using a JDBC driver.
Arguments:
- java_connection: the resource managing the underlying JDBC connection.
- connection_resource: the resource managing the underlying JDBC
connection.
- dialect: the dialect associated with the database we are connected to.
Allows accessing tables from a database.
@ -40,7 +42,7 @@ type Connection
Arguments:
- name: name of the table to access
access_table : Text -> Database_Table
access_table name = here.wrap_sql_errors <|
access_table name = here.handle_sql_errors <|
columns = this.fetch_columns name
Database_Table.make_table this name columns
@ -62,7 +64,7 @@ type Connection
- expected_types: an optional array of expected types of each column;
meant only for internal use.
execute_query : Text | Sql.Statement -> Vector Sql.Sql_Type -> Materialized_Table =
execute_query query expected_types=Nothing = here.wrap_sql_errors <|
execute_query query expected_types=Nothing = here.handle_sql_errors <|
Resource.bracket (this.prepare_statement query) .close stmt->
rs = stmt.executeQuery
metadata = rs.getMetaData
@ -94,8 +96,9 @@ type Connection
- query: either raw SQL code as Text or an instance of Sql.Statement
representing the query to execute.
execute_update : Text | Sql.Statement -> Integer
execute_update query = here.wrap_sql_errors <|
execute_update query = here.handle_sql_errors <|
Resource.bracket (this.prepare_statement query) .close stmt->
## FIXME USE CATCH HERE!
result = Panic.recover Any stmt.executeLargeUpdate
result.catch err-> case err of
Polyglot_Error exc ->
@ -115,15 +118,9 @@ type Connection
prepare_statement query =
go template holes=[] = Managed_Resource.with this.connection_resource java_connection->
stmt = java_connection.prepareStatement template
setup_error = Panic.recover Any <|
holes.map_with_index ix-> obj->
position = ix + 1
case obj.first of
Nothing -> stmt.setNull position obj.second.typeid
_ -> stmt.setObject position obj.first
setup_error.catch error->
Panic.catch Any (here.set_statement_values stmt holes) caught_panic->
stmt.close
Panic.throw error
Panic.throw caught_panic
stmt
case query of
Text -> go query []
@ -154,6 +151,52 @@ type Connection
[name, Sql_Type typeid typename]
Vector.new ncols resolve_column
## PRIVATE
UNSTABLE
This is a prototype function used in our test suites. It may change.
It creates a new table in the database with the given name (will fail if
the table already existed), inserts the contents of the provided
in-memory table and returns a handle to the newly created table.
upload_table : Text -> Materialized_Table -> Integer -> Database_Table
upload_table name table batch_size=1000 = Panic.recover Illegal_State_Error <| here.handle_sql_errors <|
column_types = table.columns.map col-> here.default_storage_type col.storage_type
column_names = table.columns.map .name
col_makers = column_names.zip column_types name-> typ->
Base_Generator.wrap_in_quotes name ++ Sql.code " " ++ Sql.code typ.name
create_sql = (Sql.code "CREATE TABLE " ++ Base_Generator.wrap_in_quotes name ++ Sql.code " (" ++ (Sql.join ", " col_makers) ++ Sql.code ")").build
Panic.rethrow <| this.execute_update create_sql
db_table = Panic.rethrow <| this.access_table name
pairs = db_table.internal_columns.map col->
[col.name, IR.Constant col.sql_type Nothing]
db_types = pairs.map p-> p.second.sql_type
insert_query = this.dialect.generate_sql <| IR.Insert name pairs
insert_template = insert_query.prepare.first
Managed_Resource.with this.connection_resource java_connection->
default_autocommit = java_connection.getAutoCommit
java_connection.setAutoCommit False
Resource.bracket Nothing (_ -> java_connection.setAutoCommit default_autocommit) _->
Resource.bracket (java_connection.prepareStatement insert_template) .close stmt->
num_rows = table.row_count
columns = table.columns
check_rows updates_array expected_size =
updates = Vector.Vector updates_array
if updates.length != expected_size then Panic.throw <| Illegal_State_Error "The batch update unexpectedly affected "+updates.length.to_text+" rows instead of "+expected_size.to_text+"." else
updates.each affected_rows->
if affected_rows != 1 then
Panic.throw <| Illegal_State_Error "A single update within the batch unexpectedly affected "+affected_rows.to_text+" rows."
0.up_to num_rows . each row_id->
values = columns.map col-> col.at row_id
holes = values.zip db_types
here.set_statement_values stmt holes
stmt.addBatch
if (row_id+1 % batch_size) == 0 then check_rows stmt.executeBatch batch_size
if (num_rows % batch_size) != 0 then check_rows stmt.executeBatch (num_rows % batch_size)
java_connection.commit
db_table
## PRIVATE
Creates a builder for a column based on a provided SQL type, trying to infer
@ -265,7 +308,7 @@ Unsupported_Dialect.to_display_text =
- url: The URL to connect to.
- properties: A vector of properties for the connection.
create_jdbc_connection : Text -> Vector -> Connection
create_jdbc_connection url properties = here.wrap_sql_errors <|
create_jdbc_connection url properties = here.handle_sql_errors <|
java_props = Properties.new
properties.each pair->
java_props.setProperty pair.first pair.second
@ -300,13 +343,17 @@ type Sql_Error
Arguments:
- java_exception: The underlying exception.
type Sql_Error java_exception
- related_query (optional): A string representation of a query that this
error is related to.
type Sql_Error java_exception related_query=Nothing
## UNSTABLE
Convert the SQL error to a textual representation.
to_text : Text
to_text = "There was an SQL error: " + this.java_exception.getMessage.to_text + "."
to_text =
query = if this.related_query.is_nothing.not then " [Query was: " + query + "]" else ""
"There was an SQL error: " + this.java_exception.getMessage.to_text + "." + query
## UNSTABLE
@ -322,13 +369,17 @@ type Sql_Timeout_Error
Arguments:
- java_exception: The underlying exception.
type Sql_Timeout_Error java_exception
- related_query (optional): A string representation of a query that this
error is related to.
type Sql_Timeout_Error java_exception related_query=Nothing
## UNSTABLE
Convert the timeout error to a textual representation.
to_text : Text
to_text = "The SQL connection timed out: " + this.java_exception.getMessage + "."
to_text =
query = if this.related_query.is_nothing.not then " [Query was: " + query + "]" else ""
"The SQL connection timed out: " + this.java_exception.getMessage + "." + query
## UNSTABLE
@ -339,16 +390,50 @@ type Sql_Timeout_Error
## PRIVATE
Executes `action` and returns its result, catching any panics and if they are
coming from JDBC, wraps them with our own error types.
coming from JDBC, wraps them with our own error types and returns as regular
data-flow errors.
Arguments:
- action: The computation to execute. This computation may throw SQL errors.
wrap_sql_errors : Any -> Any ! Error
wrap_sql_errors ~action =
result = Panic.recover Any action
result.catch err-> case err of
Polyglot_Error exc ->
transformed = if Java.is_instance exc SQLTimeoutException then Sql_Timeout_Error exc else
if Java.is_instance exc SQLException then Sql_Error exc else err
Error.throw transformed
_ -> Error.throw err
handle_sql_errors : Any -> Any ! (Sql_Error | Sql_Timeout_Error)
handle_sql_errors ~action =
Panic.recover [Sql_Error, Sql_Timeout_Error] <|
here.wrap_sql_errors action
## PRIVATE
Executes `action` and returns its result, converting any SQL exceptions into
Enso panics.
Arguments:
- action: The computation to execute. This computation may throw SQL errors.
- related_query: A related query that is currently being run, to enrich the
error metadata.
wrap_sql_errors : Any -> (Text | Nothing) -> Any ! (Sql_Error | Sql_Timeout_Error)
wrap_sql_errors ~action related_query=Nothing =
Panic.catch SQLException action caught_panic->
exc = caught_panic.payload.cause
case Java.is_instance exc SQLTimeoutException of
True -> Panic.throw (Sql_Timeout_Error exc related_query)
False -> Panic.throw (Sql_Error exc related_query)
## PRIVATE
Returns the default database type corresponding to an in-memory storage
type.
default_storage_type : Storage.Type -> Sql_Type
default_storage_type storage_type = case storage_type of
Storage.Text -> Sql_Type.text
Storage.Integer -> Sql_Type.integer
Storage.Decimal -> Sql_Type.double
Storage.Boolean -> Sql_Type.boolean
Storage.Any -> Sql_Type.blob
## PRIVATE
Sets values inside of a prepared statement.
set_statement_values : PreparedStatement -> Vector (Pair Any Sql_Type) -> Nothing
set_statement_values stmt holes =
holes.map_with_index ix-> obj->
position = ix + 1
case obj.first of
Nothing -> stmt.setNull position obj.second.typeid
_ -> stmt.setObject position obj.first

View File

@ -510,7 +510,7 @@ type Column
as_internal : IR.Internal_Column
as_internal = IR.Internal_Column this.name this.sql_type this.expression
type Aggregate_Column
type Aggregate_Column_Builder
## UNSTABLE
@ -524,10 +524,10 @@ type Aggregate_Column
- context: The SQl context in which the column exists.
Allows performing aggregation operations on the contained values.
# type Aggregate_Column (name : Text) (connection : Connection)
# type Aggregate_Column_Builder (name : Text) (connection : Connection)
# (sql_type : Sql_Type) (expression : IR.Expression)
# (context : IR.Context)
type Aggregate_Column name connection sql_type expression context
type Aggregate_Column_Builder name connection sql_type expression context
## UNSTABLE

View File

@ -1,7 +1,11 @@
from Standard.Base import all
import Standard.Base.Error.Extensions as Errors
import Standard.Table.Data.Aggregate_Column
import Standard.Database.Data.Sql
import Standard.Database.Data.Internal.Base_Generator
import Standard.Database.Data.Dialect.Postgres
import Standard.Database.Data.Dialect.Redshift
import Standard.Database.Data.Dialect.Sqlite as Sqlite_Module
## PRIVATE
@ -10,103 +14,39 @@ import Standard.Database.Data.Internal.Base_Generator
It encapsulates dialect-specific code generation details allowing us to
support differing SQL dialects.
type Dialect
## PRIVATE
This is a fake constructor to make the compiler accept this type
definition. It can and should be removed once interface definitions are
allowed.
type Dialect
## PRIVATE
Name of the dialect.
name : Text
name = Errors.unimplemented "This is an interface only."
## PRIVATE
A function which generates SQL code from the internal representation
according to the specific dialect.
generate_sql : Query -> Sql.Statement
generate_sql = Errors.unimplemented "This is an interface only."
Represents a specific SQL dialect.
## PRIVATE
Deduces the result type for an aggregation operation.
Arguments:
- name: name of the dialect.
- generate_sql: a function which generates SQL code from the internal
representation according to the specific dialect.
It encapsulates dialect-specific code generation details allowing us to
support differing SQL dialects.
# type Dialect (name : Text) (generate_sql : Query -> Sql.Statement)
type Dialect name generate_sql
## PRIVATE
The dialect of PostgreSQL databases.
postgresql : Dialect
postgresql =
starts_with arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation starts_with")
ends_with arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation ends_with")
contains arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ", '%')")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation contains")
my_mappings = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]
dialect = Base_Generator.base_dialect . extend_with my_mappings
Dialect "postgresql" (query -> Base_Generator.generate_query dialect query . build)
## PRIVATE
The dialect of SQLite databases.
sqlite : Dialect
sqlite =
starts_with arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE (") ++ sub ++ (Sql.code " || '%')")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation starts_with")
ends_with arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code ")")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation ends_with")
contains arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code " || '%')")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation contains")
my_mappings = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]
dialect = Base_Generator.base_dialect . extend_with my_mappings
Dialect "sqlite" (query -> Base_Generator.generate_query dialect query . build)
## PRIVATE
The dialect for Redshift connections.
redshift : Dialect
redshift = case here.postgresql of
Dialect _ builder -> Dialect "redshift" builder
The provided aggregate is assumed to contain only already resolved columns.
You may need to transform it with `resolve_columns` first.
resolve_target_sql_type : Aggregate_Column -> Sql_Type
resolve_target_sql_type = Errors.unimplemented "This is an interface only."
## PRIVATE
A vector of SQL dialects supported by the Database library.
supported_dialects : Vector Dialect
supported_dialects = [here.postgresql, here.sqlite, here.redshift]
supported_dialects = [Postgres.postgresql, Sqlite_Module.sqlite, Redshift.redshift]
## PRIVATE
The dialect of SQLite databases.
sqlite : Dialect
sqlite = Sqlite_Module.sqlite

View File

@ -0,0 +1,124 @@
from Standard.Base import all
from Standard.Table.Data.Aggregate_Column import all
from Standard.Database.Data.Sql import Sql_Type
import Standard.Database.Data.Dialect
import Standard.Database.Data.Internal.Base_Generator
## PRIVATE
The dialect of PostgreSQL databases.
postgresql : Dialect
postgresql =
Postgresql_Dialect here.make_internal_generator_dialect
## PRIVATE
The dialect of PostgreSQL databases.
type Postgresql_Dialect
## PRIVATE
The dialect of PostgreSQL databases.
type Postgresql_Dialect internal_generator_dialect
## PRIVATE
Name of the dialect.
name : Text
name = "postgresql"
## PRIVATE
A function which generates SQL code from the internal representation
according to the specific dialect.
generate_sql : Query -> Sql.Statement
generate_sql query =
Base_Generator.generate_query this.internal_generator_dialect query . build
## PRIVATE
Deduces the result type for an aggregation operation.
The provided aggregate is assumed to contain only already resolved columns.
You may need to transform it with `resolve_columns` first.
resolve_target_sql_type : Aggregate_Column -> Sql_Type
resolve_target_sql_type aggregate = here.resolve_target_sql_type aggregate
## PRIVATE
make_internal_generator_dialect =
starts_with arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation starts_with")
ends_with arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation ends_with")
contains arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ", '%')")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation contains")
text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]
counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, here.agg_count_distinct_including_nulls]
stats = [here.agg_median]
my_mappings = text + counts + stats
Base_Generator.base_dialect . extend_with my_mappings
## PRIVATE
The provided aggregate is assumed to contain only already resolved columns.
You may need to transform it with `resolve_columns` first.
resolve_target_sql_type aggregate = case aggregate of
Group_By c _ -> c.sql_type
Count _ -> Sql_Type.bigint
Count_Distinct _ _ _ -> Sql_Type.bigint
Count_Not_Nothing _ _ -> Sql_Type.bigint
Count_Nothing _ _ -> Sql_Type.bigint
Count_Not_Empty _ _ -> Sql_Type.bigint
Count_Empty _ _ -> Sql_Type.bigint
Percentile _ _ _ -> Sql_Type.double
Mode c _ -> c.sql_type
First c _ _ _ -> c.sql_type
Last c _ _ _ -> c.sql_type
Maximum c _ -> c.sql_type
Minimum c _ -> c.sql_type
Shortest c _ -> c.sql_type
Longest c _ -> c.sql_type
Standard_Deviation _ _ _ -> Sql_Type.double
Concatenate _ _ _ _ _ _ -> Sql_Type.text
## TODO [RW] revise these
Sum _ _ -> Sql_Type.numeric # TODO can also be bigint, real, double
Average _ _ -> Sql_Type.numeric # TODO can be double sometimes
Median _ _ -> Sql_Type.numeric # TODO can be double sometimes
## PRIVATE
agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg->
Sql.code "COUNT(CASE WHEN " ++ arg.paren ++ Sql.code " IS NULL THEN 1 END)"
## PRIVATE
agg_count_empty = Base_Generator.lift_unary_op "COUNT_EMPTY" arg->
Sql.code "COUNT(CASE WHEN (" ++ arg.paren ++ Sql.code " IS NULL) OR (" ++ arg.paren ++ Sql.code " = '') THEN 1 END)"
## PRIVATE
agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg->
Sql.code "COUNT(CASE WHEN (" ++ arg.paren ++ Sql.code " IS NOT NULL) AND (" ++ arg.paren ++ Sql.code " != '') THEN 1 END)"
## PRIVATE
agg_count_distinct_including_nulls = Base_Generator.lift_unary_op "COUNT_DISTINCT_INCLUDE_NULL" arg->
Sql.code "(COUNT(DISTINCT " ++ arg.paren ++ Sql.code ") + CASE WHEN COUNT(CASE WHEN " ++ arg.paren ++ Sql.code " IS NULL THEN 1 END) > 0 THEN 1 ELSE 0 END)"
## PRIVATE
agg_median = Base_Generator.lift_unary_op "MEDIAN" arg->
Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")"

View File

@ -0,0 +1,44 @@
from Standard.Base import all
import Standard.Table.Data.Aggregate_Column
import Standard.Database.Data.Sql
import Standard.Database.Data.Dialect
import Standard.Database.Data.Dialect.Postgres
import Standard.Database.Data.Internal.Base_Generator
## PRIVATE
The dialect for Redshift connections.
redshift : Dialect
redshift =
Redshift_Dialect Postgres.make_internal_generator_dialect
## PRIVATE
The dialect for Redshift connections.
type Redshift_Dialect
## PRIVATE
The dialect for Redshift connections.
type Redshift_Dialect internal_generator_dialect
## PRIVATE
Name of the dialect.
name : Text
name = "redshift"
## PRIVATE
A function which generates SQL code from the internal representation
according to the specific dialect.
generate_sql : Query -> Sql.Statement
generate_sql query =
Base_Generator.generate_query this.internal_generator_dialect query . build
## PRIVATE
Deduces the result type for an aggregation operation.
The provided aggregate is assumed to contain only already resolved columns.
You may need to transform it with `resolve_columns` first.
resolve_target_sql_type : Aggregate_Column -> Sql_Type
resolve_target_sql_type aggregate =
Postgres.resolve_target_sql_type aggregate

View File

@ -0,0 +1,118 @@
from Standard.Base import all
from Standard.Table.Data.Aggregate_Column import all
from Standard.Database.Data.Sql import Sql_Type
import Standard.Database.Data.Dialect
import Standard.Database.Data.Internal.Base_Generator
## PRIVATE
The dialect of SQLite databases.
sqlite : Dialect
sqlite =
Sqlite_Dialect here.make_internal_generator_dialect
## PRIVATE
The dialect of SQLite databases.
type Sqlite_Dialect
## PRIVATE
The dialect of SQLite databases.
type Sqlite_Dialect internal_generator_dialect
## PRIVATE
Name of the dialect.
name : Text
name = "sqlite"
## PRIVATE
A function which generates SQL code from the internal representation
according to the specific dialect.
generate_sql : Query -> Sql.Statement
generate_sql query =
Base_Generator.generate_query this.internal_generator_dialect query . build
## PRIVATE
Deduces the result type for an aggregation operation.
The provided aggregate is assumed to contain only already resolved columns.
You may need to transform it with `resolve_columns` first.
resolve_target_sql_type : Aggregate_Column -> Sql_Type
resolve_target_sql_type aggregate = here.resolve_target_sql_type aggregate
## PRIVATE
make_internal_generator_dialect =
starts_with arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE (") ++ sub ++ (Sql.code " || '%')")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation starts_with")
ends_with arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code ")")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation ends_with")
contains arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code " || '%')")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation contains")
text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]
counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, here.agg_count_distinct_including_nulls]
my_mappings = text + counts
Base_Generator.base_dialect . extend_with my_mappings
## PRIVATE
The provided aggregate is assumed to contain only already resolved columns.
You may need to transform it with `resolve_columns` first.
resolve_target_sql_type aggregate = case aggregate of
Group_By c _ -> c.sql_type
Count _ -> Sql_Type.integer
Count_Distinct _ _ _ -> Sql_Type.integer
Count_Not_Nothing _ _ -> Sql_Type.integer
Count_Nothing _ _ -> Sql_Type.integer
Count_Not_Empty _ _ -> Sql_Type.integer
Count_Empty _ _ -> Sql_Type.integer
Percentile _ _ _ -> Sql_Type.real
Mode c _ -> c.sql_type
First c _ _ _ -> c.sql_type
Last c _ _ _ -> c.sql_type
Maximum c _ -> c.sql_type
Minimum c _ -> c.sql_type
Shortest c _ -> c.sql_type
Longest c _ -> c.sql_type
Standard_Deviation _ _ _ -> Sql_Type.real
Concatenate _ _ _ _ _ _ -> Sql_Type.text
## TODO revise these
Sum c _ -> c.sql_type
Average _ _ -> Sql_Type.real
Median _ _ -> Sql_Type.real
## PRIVATE
agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg->
Sql.code "COALESCE(SUM(" ++ arg.paren ++ Sql.code " IS NULL), 0)"
## PRIVATE
agg_count_empty = Base_Generator.lift_unary_op "COUNT_EMPTY" arg->
Sql.code "COALESCE(SUM((" ++ arg.paren ++ Sql.code " IS NULL) OR (" ++ arg.paren ++ Sql.code " == '')), 0)"
## PRIVATE
agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg->
Sql.code "COALESCE(SUM((" ++ arg.paren ++ Sql.code " IS NOT NULL) AND (" ++ arg.paren ++ Sql.code " != '')), 0)"
## PRIVATE
agg_count_distinct_including_nulls = Base_Generator.lift_unary_op "COUNT_DISTINCT_INCLUDE_NULL" arg->
Sql.code "(COUNT(DISTINCT " ++ arg.paren ++ Sql.code ") + CASE WHEN SUM(" ++ arg.paren ++ Sql.code " IS NULL) > 0 THEN 1 ELSE 0 END)"

View File

@ -0,0 +1,47 @@
from Standard.Base import all
from Standard.Table.Data.Aggregate_Column import all
import Standard.Database.Data.Internal.IR
from Standard.Database.Data.Sql import Sql_Type
make_aggregate_column : Table -> Aggregate_Column -> IR.Internal_Column
make_aggregate_column table aggregate =
new_name = aggregate.column_name table
sql_type = table.connection.dialect.resolve_target_sql_type aggregate
expression = here.make_expression aggregate
IR.Internal_Column new_name sql_type expression
make_expression : Aggregate_Column -> IR.Expression
make_expression aggregate = case aggregate of
Group_By c _ -> c.expression
Count _ -> IR.Operation "COUNT_ROWS" []
Count_Distinct columns _ ignore_nothing -> case columns.length > 1 of
True ->
## TODO
Error.throw (Illegal_State_Error "Multi column distinct is not implemented yet.")
False ->
column = columns.first
case ignore_nothing of
True -> IR.Operation "COUNT_DISTINCT" [column.expression]
False -> IR.Operation "COUNT_DISTINCT_INCLUDE_NULL" [column.expression]
Count_Not_Nothing c _ -> IR.Operation "COUNT" [c.expression]
Count_Nothing c _ -> IR.Operation "COUNT_IS_NULL" [c.expression]
Count_Not_Empty c _ -> IR.Operation "COUNT_NOT_EMPTY" [c.expression]
Count_Empty c _ -> IR.Operation "COUNT_EMPTY" [c.expression]
Percentile p c _ -> IR.Operation "PERCENTILE" [IR.Constant Sql_Type.double p, c.expression]
Mode c _ -> IR.Operation "MODE" [c.expression]
First _ _ _ _ -> Error.throw (Illegal_State_Error "Not implemented yet.")
Last _ _ _ _ -> Error.throw (Illegal_State_Error "Not implemented yet.")
Maximum c _ -> IR.Operation "MAX" [c.expression]
Minimum c _ -> IR.Operation "MIN" [c.expression]
Shortest c _ -> IR.Operation "MIN" [c.expression]
Longest c _ -> IR.Operation "MAX" [c.expression]
Standard_Deviation c _ population ->
## TODO!
_ = population
IR.Operation "STD_DEV" [c.expression]
Concatenate c _ _ _ _ _ ->
IR.Operation "CONCAT" [c.expression]
Sum c _ -> IR.Operation "SUM" [c.expression]
Average c _ -> IR.Operation "AVG" [c.expression]
Median c _ -> IR.Operation "MEDIAN" [c.expression]

View File

@ -67,6 +67,22 @@ make_unary_op name =
False ->
Error.throw ("Invalid amount of arguments for operation " + name)
## PRIVATE
A helper function to create an unary operation from a function.
Arguments:
- name: Name of the operation, used for error reporting.
- function: A function taking exactly one argument: the generated SQL code
for the argument of the operation, and returning the generated SQL code for
the whole operation.
lift_unary_op : Text -> (Sql.Builder -> Sql.Builder) -> [Text, (Vector Sql.Builder -> Sql.Builder)]
lift_unary_op name function =
generator = arguments -> case arguments.length == 1 of
False -> Error.throw ("Invalid amount of arguments for operation " + name + ".")
True -> function (arguments.at 0)
[name, generator]
## PRIVATE
A helper function to create a unary operator which is added to the right of
@ -128,17 +144,22 @@ wrap_in_quotes identifier =
It is a base to help creating concrete dialects. It can be extended or
completely overridden.
base_dialect =
bin = here.make_binary_op
unary = here.make_unary_op
fun = here.make_function
arith = [["+", bin "+"], ["-", bin "-"], ["*", bin "*"], ["/", bin "/"]]
logic = [["AND", bin "AND"], ["OR", bin "OR"], ["NOT", unary "NOT"]]
compare = [["=", bin "="], ["!=", bin "!="], ["<", bin "<"], [">", bin ">"], ["<=", bin "<="], [">=", bin ">="]]
agg = [["COUNT", fun "COUNT"], ["MAX", fun "MAX"], ["MIN", fun "MIN"], ["AVG", fun "AVG"], ["SUM", fun "SUM"], ["COUNT_ROWS", here.make_constant "COUNT(*)"]]
nulls = [["ISNULL", here.make_right_unary_op "IS NULL"], ["FILLNULL", fun "COALESCE"]]
base_map = Map.from_vector (arith + logic + compare + agg + nulls)
bin = name -> [name, here.make_binary_op name]
unary = name -> [name, here.make_unary_op name]
fun = name -> [name, here.make_function name]
arith = [bin "+", bin "-", bin "*", bin "/"]
logic = [bin "AND", bin "OR", unary "NOT"]
compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">="]
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
counts = [fun "COUNT", ["COUNT_ROWS", here.make_constant "COUNT(*)"], here.count_distinct]
nulls = [["ISNULL", here.make_right_unary_op "IS NULL"], ["FILLNULL", here.make_function "COALESCE"]]
base_map = Map.from_vector (arith + logic + compare + agg + nulls + counts)
Internal_Dialect base_map here.wrap_in_quotes
## PRIVATE
count_distinct = here.lift_unary_op "COUNT_DISTINCT" arg->
Sql.code "COUNT(DISTINCT " ++ arg.paren ++ Sql.code ")"
## PRIVATE
Builds code for an expression.
@ -153,7 +174,7 @@ generate_expression dialect expr = case expr of
dialect.wrap_identifier origin ++ dot ++ dialect.wrap_identifier name
IR.Constant sql_type value -> Sql.interpolation sql_type value
IR.Operation kind arguments ->
op = dialect.operation_map.get_or_else kind (Error.throw <| Illegal_State_Error "Operation "+op+" is not supported.")
op = dialect.operation_map.get_or_else kind (Error.throw <| Illegal_State_Error "Operation "+kind+" is not supported.")
parsed_args = arguments.map (here.generate_expression dialect)
op parsed_args

View File

@ -67,10 +67,22 @@ type Sql_Type
integer : Sql_Type
integer = Sql_Type Types.INTEGER "INTEGER"
## The SQL representation of the `BIGINT` type.
bigint : Sql_Type
bigint = Sql_Type Types.BIGINT "BIGINT"
## The SQL type representing decimal numbers.
decimal : Sql_Type
decimal = Sql_Type Types.DECIMAL "DECIMAL"
## The SQL type representing decimal numbers.
real : Sql_Type
real = Sql_Type Types.REAL "REAL"
## The SQL type representing double-precision floating-point numbers.
double : Sql_Type
double = Sql_Type Types.DOUBLE "DOUBLE PRECISION"
## The SQL type representing a general numeric type.
numeric : Sql_Type
numeric = Sql_Type Types.NUMERIC "NUMERIC"
@ -79,6 +91,17 @@ type Sql_Type
varchar : Sql_Type
varchar = Sql_Type Types.VARCHAR "VARCHAR"
## UNSTABLE
The SQL type representing one of the suppported textual types.
It seems that JDBC treats the `TEXT` and `VARCHAR` types as interchangeable.
text : Sql_Type
text = Sql_Type Types.VARCHAR "VARCHAR"
## The SQL type representing a binary object.
blob : Sql_Type
blob = Sql_Type Types.BLOB "BLOB"
## PRIVATE
Returns True if this type represents an integer.
@ -87,7 +110,7 @@ type Sql_Type
non-standard ones.
is_definitely_integer : Boolean
is_definitely_integer =
[Types.INTEGER, Types.SMALLINT, Types.TINYINT].contains this.typeid
[Types.INTEGER, Types.BIGINT, Types.SMALLINT, Types.TINYINT].contains this.typeid
## PRIVATE

View File

@ -1,6 +1,7 @@
from Standard.Base import all
import Standard.Database.Data.Internal.Helpers
import Standard.Database.Data.Internal.Aggregate_Helper
import Standard.Database.Data.Internal.IR
import Standard.Database.Data.Sql
import Standard.Table.Data.Column as Materialized_Column
@ -8,7 +9,8 @@ import Standard.Table.Data.Table as Materialized_Table
import Standard.Table.Internal.Java_Exports
import Standard.Table.Internal.Table_Helpers
from Standard.Database.Data.Column as Column_Module import Column, Aggregate_Column
import Standard.Table.Data.Aggregate_Column
from Standard.Database.Data.Column as Column_Module import Column, Aggregate_Column_Builder
from Standard.Database.Data.Internal.IR import Internal_Column
from Standard.Table.Data.Table import No_Such_Column_Error
from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
@ -29,7 +31,7 @@ type Table
Arguments:
- name: The name of the table.
- connection: The connection with whicg the table is associated.
- connection: The connection with which the table is associated.
- internal_columns: The internal representation of the table columns.
- context: The context associated with this table.
# type Table (name : Text) (connection : Connection)
@ -458,7 +460,7 @@ type Table
rules specified in the `by` argument will default to this setting,
unless specified in the rule.
- missing_last: Specifies the default placement of missing values when
compared to non-missing ones. This setting may be overriden by the
compared to non-missing ones. This setting may be overridden by the
particular rules of the `by` argument. Note thet this argument is
independent from `order`, i.e. missing values will always be sorted
according to this rule, ignoring the ascending / descending setting.
@ -639,6 +641,23 @@ type Table
new_ctx = this.context.set_groups exprs . set_index cols
Aggregate_Table this.name this.connection this.internal_columns new_ctx
## Prototype Group By function
aggregate : [Aggregate_Column] -> Problem_Behavior -> Table
aggregate columns (on_problems=Report_Warning) =
## TODO handle errors here and turn them into warnings where applicable
_ = on_problems
resolved_aggregates = columns.map (_.resolve_columns this)
# TODO handling duplicate names etc. is to be done as part of https://www.pivotaltracker.com/story/show/181420794
# Grouping Key
is_a_key c = case c of
Aggregate_Column.Group_By _ _ -> True
_ -> False
key_columns = resolved_aggregates.filter is_a_key . map .column
key_expressions = key_columns.map .expression
new_ctx = this.context.set_groups key_expressions
new_columns = resolved_aggregates.map (Aggregate_Helper.make_aggregate_column this)
this.updated_context_and_columns new_ctx new_columns
## UNSTABLE
Returns a new Table without rows that contained missing values in any of
@ -762,8 +781,8 @@ type Table
Arguments:
- columns: The columns with which to update this table.
updated_columns : Vector Colums -> Table
updated_columns columns = Table this.name this.connection columns this.context
updated_columns : Vector Internal_Column -> Table
updated_columns internal_columns = Table this.name this.connection internal_columns this.context
## PRIVATE
@ -774,6 +793,16 @@ type Table
updated_context : Context -> Table
updated_context ctx = Table this.name this.connection this.internal_columns ctx
## PRIVATE
Returns a copy of this table with updated context and columns.
Arguments:
- ctx: The new context for this table.
- internal_columns: The new columns to include in the table.
updated_context_and_columns : Context -> Vector Internal_Column -> Table
updated_context_and_columns ctx internal_columns = Table this.name this.connection internal_columns ctx
## PRIVATE
Returns a vector that contains first the internal representations of all
@ -817,7 +846,7 @@ type Aggregate_Table
Arguments:
- name: The name of the table.
- connection: The connection with whicg the table is associated.
- connection: The connection with which the table is associated.
- internal_columns: The internal representation of the table columns.
- context: The context associated with this table.
# type Aggregate_Table (name : Text) (connection : Connection)
@ -857,9 +886,9 @@ type Aggregate_Table
Arguments:
- internal: The internal column to make into an aggregate column.
make_column : Internal_Column -> Aggregate_Column
make_column : Internal_Column -> Aggregate_Column_Builder
make_column internal =
Aggregate_Column internal.name this.connection internal.sql_type internal.expression this.context
Aggregate_Column_Builder internal.name this.connection internal.sql_type internal.expression this.context
## PRIVATE

View File

@ -1,6 +1,9 @@
from Standard.Base import all
from Standard.Table.Data.Column as Column_Module import Column
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Name, By_Index, By_Column
import Standard.Table.Internal.Table_Helpers
import Standard.Base.Error.Problem_Behavior
## Defines an Aggregate Column
type Aggregate_Column
@ -20,7 +23,7 @@ type Aggregate_Column
- columns: either a single or set of columns (specified by name, index or Column object) to count across.
- name: name of new column.
- ignore_nothing: if all values are Nothing won't be included.
type Count_Distinct (columns:Column|Text|Integer|[(Column|Text|Integer)]) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False)
type Count_Distinct (columns:Column|Text|Integer|Column_Selector) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False)
## ALIAS Count_Not_Null
@ -192,10 +195,65 @@ type Aggregate_Column
Given a column reference resolve to the underlying column
resolve_column : Table->(Column|Text|Integer)->Column
resolve_column table column =
## TODO this should be able to handle problems too!
case column of
Column _ -> table.at (column.name)
Text -> table.at column
Integer -> table.columns.at column
## A wildcard makes this work both with In-Memory and Database table columns.
_ -> table.at (column.name)
## PRIVATE
Returns a copy of this aggregate where all column descriptors (names,
indices or column references potentially from a different table) are
replaced with column references from the provided table.
This preprocess step is required by some helper function, to avoid having
to pass the table reference and resolve the column descriptors all the
time.
If some columns cannot be resolved, a dataflow error will be returned.
Higher-level methods can then handle this error by turning it into a
warning and ignoring the column.
resolve_columns : Table -> Aggregate_Column
resolve_columns table =
resolve : (Integer|Text|Column) -> Column
resolve c = this.resolve_column table c
resolve_selector_to_vector : Column_Selector -> [Column]
resolve_selector_to_vector selector =
Table_Helpers.select_columns_helper table.columns selector reorder=False on_problems=Problem_Behavior.Report_Error
resolve_selector_or_nothing selector = case selector of
Nothing -> Nothing
_ -> resolve_selector_to_vector selector
case this of
Group_By c new_name -> Group_By (resolve c) new_name
Count new_name -> Count new_name
Count_Distinct c new_name ignore_nothing ->
new_c = case c of
## TODO once we have sum type pattern matching this could be replaced with a single branch
By_Name _ _ -> resolve_selector_to_vector c
By_Index _ -> resolve_selector_to_vector c
By_Column _ -> resolve_selector_to_vector c
## TODO this is a temporary fix, remove it
Vector.Vector _ -> c.map resolve
_ -> [resolve c]
Count_Distinct new_c new_name ignore_nothing
Count_Not_Nothing c new_name -> Count_Not_Nothing (resolve c) new_name
Count_Nothing c new_name -> Count_Nothing (resolve c) new_name
Count_Not_Empty c new_name -> Count_Not_Empty (resolve c) new_name
Count_Empty c new_name -> Count_Empty (resolve c) new_name
Sum c new_name -> Sum (resolve c) new_name
Average c new_name -> Average (resolve c) new_name
Median c new_name -> Median (resolve c) new_name
Percentile p c new_name -> Percentile p (resolve c) new_name
Mode c new_name -> Mode (resolve c) new_name
Standard_Deviation c new_name population -> Standard_Deviation (resolve c) new_name population
Concatenate c new_name separator prefix suffix quote_char -> Concatenate (resolve c) new_name separator prefix suffix quote_char
First c new_name ignore_nothing order_by -> First (resolve c) new_name ignore_nothing (resolve_selector_or_nothing order_by)
Last c new_name ignore_nothing order_by -> Last (resolve c) new_name ignore_nothing (resolve_selector_or_nothing order_by)
Maximum c new_name -> Maximum (resolve c) new_name
Minimum c new_name -> Minimum (resolve c) new_name
Shortest c new_name -> Shortest (resolve c) new_name
Longest c new_name -> Longest (resolve c) new_name
## Occurs when cannot aggregate a column

View File

@ -59,6 +59,7 @@ validate aggregate_columns table =
pass_1 = valid_aggregate_columns.map c->(if c.new_name.is_nothing then Nothing else unique.make_unique c.new_name)
valid_columns = pass_1.map_with_index i->c->
Pair (if c.is_nothing then unique.make_unique (valid_aggregate_columns.at i . column_name table) else c) (valid_aggregate_columns.at i)
## TODO resolve the columns
# Build Problems Output
missing_problems = (if missing_names.is_empty then [] else [Missing_Input_Columns missing_names.to_vector])

View File

@ -51,7 +51,7 @@ prepare_visualization x max_rows = Helpers.recover_errors <| case x of
here.prepare_visualization ungrouped.to_table max_rows
Database_Table.Aggregate_Table _ _ _ _ ->
here.prepare_visualization x.ungrouped max_rows
Database_Column.Aggregate_Column _ _ _ _ _ ->
Database_Column.Aggregate_Column_Builder _ _ _ _ _ ->
here.prepare_visualization x.ungrouped.to_table max_rows
# TODO [RW] Should we truncate Vectors?

View File

@ -498,11 +498,11 @@ built runner executable as explained above):
enso --run test/Tests # for the Base library
enso --run test/Geo_Tests
enso --run test/Table_Tests
enso --run test/Database_Tests
```
The Database tests will by default only test the SQLite backend, to test other
backends see [`test/Database_Tests/README.md`](../test/Database_Tests/README.md)
backends see
[`test/Table_Tests/src/Database/README.md`](../test/Table_Tests/src/Database/README.md)
for information on how to configure them.
The Base tests rely in a few places on the system language. On Linux you can set

View File

@ -1964,8 +1964,14 @@ type System
- command: The name of the system process.
- arguments: An array of arguments to the system process.
- input: The input to pass to the process via standard input.
create_process : Text -> Array -> Text -> System_Process_Result
create_process command arguments input =
- redirect_in: Specifies if the standard input of the program should be
redirected to the started process.
- redirect_out: Specifies if the standard output of the started process
should be redirected to the program's standard output.
- redirect_err: Specifies if the standard error output of the started
process should be redirected to the program's standard error output.
create_process : Text -> Array -> Text -> Boolean -> Boolean -> Boolean -> System_Process_Result
create_process command arguments input redirect_in redirect_out redirect_err =
@Builtin_Method "System.create_process"
## Exits the Enso program, returning the provided code to the parent

View File

@ -1,6 +0,0 @@
name: Database_Tests
version: 0.0.1
enso-version: default
license: MIT
author: enso-dev@enso.org
maintainer: enso-dev@enso.org

View File

@ -1,13 +0,0 @@
from Standard.Base import all
import Standard.Test
import project.Codegen_Spec
import project.Sqlite_Spec
import project.Postgresql_Spec
import project.Redshift_Spec
main = Test.Suite.run_main <|
Codegen_Spec.spec
Sqlite_Spec.spec
Postgresql_Spec.spec
Redshift_Spec.spec

View File

@ -1,46 +0,0 @@
from Standard.Base import all
import Standard.Base.System.Environment
from Standard.Database import all
from Standard.Database.Connection.Connection import Sql_Error
import Standard.Test
import project.Common_Spec
postgres_specific_spec connection pending =
Test.group "[PostgreSQL] Info" pending=pending <|
connection.execute_update 'CREATE TABLE "Tinfo" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)'
t = connection.access_table "Tinfo"
t.insert ["a", Nothing, False, 1.2]
t.insert ["abc", Nothing, Nothing, 1.3]
t.insert ["def", 42, True, 1.4]
Test.specify "should return Table information" <|
i = t.info
i.index . to_vector . should_equal ["strs", "ints", "bools", "reals"]
i.at "Items Count" . to_vector . should_equal [3, 1, 2, 3]
i.at "SQL Type" . to_vector . should_equal ["varchar", "int4", "bool", "float4"]
Test.specify "should infer standard types correctly" <|
t.at "strs" . sql_type . is_definitely_text . should_be_true
t.at "ints" . sql_type . is_definitely_integer . should_be_true
t.at "bools" . sql_type . is_definitely_boolean . should_be_true
t.at "reals" . sql_type . is_definitely_double . should_be_true
connection.execute_update 'DROP TABLE "Tinfo"'
spec =
db_name = Environment.get "ENSO_DATABASE_TEST_DB_NAME"
db_host = Environment.get "ENSO_DATABASE_TEST_HOST"
db_user = Environment.get "ENSO_DATABASE_TEST_DB_USER"
db_password = Environment.get "ENSO_DATABASE_TEST_DB_PASSWORD"
go connection pending=Nothing =
Common_Spec.spec "[PostgreSQL] " connection pending
here.postgres_specific_spec connection pending
case db_name.is_nothing of
True ->
message = "PostgreSQL test database is not configured. See README.md for instructions."
connection = Error.throw message
go connection pending=message
False ->
url = case db_host.is_nothing of
True -> "postgresql:" + db_name
False -> "postgresql://" + db_host + "/" + db_name
connection = Database.connect url user=db_user password=db_password
go connection

View File

@ -8,26 +8,47 @@ from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_I
import Standard.Test
import Standard.Test.Problems
type Test_Selection problem_handling=True advanced_stats=True text=True first_last=True std_dev=True multi_distinct=True
all_tests = Test_Selection True True True True
spec =
file_contents = (Enso_Project.data / "data.csv") . read
table = Table.from_csv file_contents
empty_table = Table.new <| table.columns.map c->[c.name, []]
materialize = x->x
here.aggregate_spec "[In-Memory] " table empty_table materialize
## Runs the common aggregate tests.
Arguments:
- prefix: A name to prepend to test groups to identify the tested backend.
- table: A table using the tested backend containing data from
`data/data.csv`.
- empty_table: An empty table using the tested backend.
- materialize: A helper function which materializes a table from the tested
backend as an in-memory table. Used to easily inspect results of a
particular query/operation.
- test_selection: A selection of which suites should be run. Can be used to
skip checks for backends which do not support particular features.
- pending: An optional mark to disable all test groups. Can be used to
indicate that some tests are disabled due to missing test setup.
aggregate_spec prefix table empty_table materialize test_selection=here.all_tests pending=Nothing =
find_row key table (columns=Nothing) =
table_columns = if columns.is_nothing then table.columns else columns.map x->(table.columns.at x)
0.up_to table.row_count . find i->
0.up_to key.length . all j-> (table_columns.at j . at i)==(key.at j)
Test.group "Table.aggregate should summarize whole table " <|
Test.group prefix+"Table.aggregate should summarize whole table" pending=pending <|
Test.specify "should be able to count" <|
grouped = table.aggregate [Count Nothing]
grouped = materialize <| table.aggregate [Count Nothing]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 1
grouped.columns.at 0 . name . should_equal "Count"
grouped.columns.at 0 . at 0 . should_equal 2500
Test.specify "should be able to count missing values" <|
grouped = table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped = materialize <| table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
@ -40,22 +61,28 @@ spec =
grouped.columns.at 3 . at 0 . should_equal 2251
Test.specify "should be able to count distinct values" <|
grouped = table.aggregate [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
grouped = materialize <| table.aggregate [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Count Distinct Code"
grouped.columns.at 0 . at 0 . should_equal 2333
grouped.columns.at 1 . name . should_equal "Count Distinct Index"
grouped.columns.at 1 . at 0 . should_equal 10
grouped.columns.at 2 . name . should_equal "Count Distinct Flag"
grouped.columns.at 2 . at 0 . should_equal 2
grouped.columns.at 3 . name . should_equal "Count Distinct Index Flag"
grouped.columns.at 3 . at 0 . should_equal 20
Test.specify "should be able to sum, average and standard deviation of values" <|
grouped = table.aggregate [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported by "+prefix) <|
## TODO [RW] add Count_Distinct with overridden ignore_nothing! also need to modify data.csv to include some nulls on index and flag
grouped = materialize <| table.aggregate [Count_Distinct ["Index", "Flag"]]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 8
grouped.columns.length . should_equal 1
grouped.columns.at 0 . name . should_equal "Count Distinct Index Flag"
grouped.columns.at 0 . at 0 . should_equal 20
Test.specify "should be able to compute sum and average of values" <|
grouped = materialize <| table.aggregate [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Sum Value"
grouped.columns.at 0 . at 0 . should_equal -932.411550 epsilon=0.000001
grouped.columns.at 1 . name . should_equal "Sum ValueWithNothing"
@ -64,17 +91,22 @@ spec =
grouped.columns.at 2 . at 0 . should_equal -0.372965 epsilon=0.000001
grouped.columns.at 3 . name . should_equal "Average ValueWithNothing"
grouped.columns.at 3 . at 0 . should_equal 1.228650 epsilon=0.000001
grouped.columns.at 4 . name . should_equal "Standard Deviation Value"
grouped.columns.at 4 . at 0 . should_equal 56.708660 epsilon=0.000001
grouped.columns.at 5 . name . should_equal "Standard Deviation ValueWithNothing"
grouped.columns.at 5 . at 0 . should_equal 58.588610 epsilon=0.000001
grouped.columns.at 6 . name . should_equal "Standard Deviation Value_1"
grouped.columns.at 6 . at 0 . should_equal 56.697317 epsilon=0.000001
grouped.columns.at 7 . name . should_equal "Standard Deviation ValueWithNothing_1"
grouped.columns.at 7 . at 0 . should_equal 58.575554 epsilon=0.000001
Test.specify "should be able to create median, mode and percentile values" <|
grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Standard Deviation Value"
grouped.columns.at 0 . at 0 . should_equal 56.708660 epsilon=0.000001
grouped.columns.at 1 . name . should_equal "Standard Deviation ValueWithNothing"
grouped.columns.at 1 . at 0 . should_equal 58.588610 epsilon=0.000001
grouped.columns.at 2 . name . should_equal "Standard Deviation Value_1"
grouped.columns.at 2 . at 0 . should_equal 56.697317 epsilon=0.000001
grouped.columns.at 3 . name . should_equal "Standard Deviation ValueWithNothing_1"
grouped.columns.at 3 . at 0 . should_equal 58.575554 epsilon=0.000001
Test.specify "should be able to create median, mode and percentile values" (pending=if test_selection.advanced_stats.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 6
grouped.columns.at 0 . name . should_equal "Median Index"
@ -90,8 +122,8 @@ spec =
grouped.columns.at 5 . name . should_equal "40%-ile ValueWithNothing"
grouped.columns.at 5 . at 0 . should_equal -17.960000 epsilon=0.000001
Test.specify "should be able to get first and last values" <|
grouped = table.aggregate [First "Index", Last "Value"]
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [First "Index", Last "Value"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "First Index"
@ -100,7 +132,7 @@ spec =
grouped.columns.at 1 . at 0 . should_equal 70.99931 epsilon=0.000001
Test.specify "should be able to get minimum and maximum values" <|
grouped = table.aggregate [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
grouped = materialize <| table.aggregate [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Minimum Value"
@ -112,8 +144,8 @@ spec =
grouped.columns.at 3 . name . should_equal "Maximum ValueWithNothing"
grouped.columns.at 3 . at 0 . should_equal 99.95 epsilon=0.000001
Test.specify "should be able to get shortest, longest and concatenated values" <|
grouped = table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
@ -123,16 +155,16 @@ spec =
grouped.columns.at 2 . name . should_equal "Concatenate Code"
grouped.columns.at 2 . at 0 . length . should_equal 7500
Test.group "Table.aggregate should summarize empty table " <|
Test.group prefix+"Table.aggregate should summarize empty table" pending=pending <|
Test.specify "should be able to count" <|
grouped = empty_table.aggregate [Count Nothing]
grouped = materialize <| empty_table.aggregate [Count Nothing]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 1
grouped.columns.at 0 . name . should_equal "Count"
grouped.columns.at 0 . at 0 . should_equal 0
Test.specify "should be able to count missing values" <|
grouped = empty_table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped = materialize <| empty_table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
@ -145,27 +177,32 @@ spec =
grouped.columns.at 3 . at 0 . should_equal 0
Test.specify "should be able to count distinct values" <|
grouped = empty_table.aggregate [Count_Distinct "Code"]
grouped = materialize <| empty_table.aggregate [Count_Distinct "Code"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 1
grouped.columns.at 0 . name . should_equal "Count Distinct Code"
grouped.columns.at 0 . at 0 . should_equal 0
Test.specify "should be able to sum, average and standard deviation of values" <|
grouped = empty_table.aggregate [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
Test.specify "should be able to compute sum and average of values" <|
grouped = materialize <| empty_table.aggregate [Sum "Value", Average "ValueWithNothing"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4
grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "Sum Value"
grouped.columns.at 0 . at 0 . should_equal Nothing
grouped.columns.at 1 . name . should_equal "Average ValueWithNothing"
grouped.columns.at 1 . at 0 . should_equal Nothing
grouped.columns.at 2 . name . should_equal "Standard Deviation Value"
grouped.columns.at 2 . at 0 . should_equal Nothing
grouped.columns.at 3 . name . should_equal "Standard Deviation ValueWithNothing"
grouped.columns.at 3 . at 0 . should_equal Nothing
Test.specify "should be able to create median, mode and percentile values" <|
grouped = empty_table.aggregate [Median "Index", Mode "Index", Percentile 0.25 "Value"]
Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported by "+prefix) <|
grouped = materialize <| empty_table.aggregate [Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "Standard Deviation Value"
grouped.columns.at 0 . at 0 . should_equal Nothing
grouped.columns.at 1 . name . should_equal "Standard Deviation ValueWithNothing"
grouped.columns.at 1 . at 0 . should_equal Nothing
Test.specify "should be able to create median, mode and percentile values" (pending=if test_selection.advanced_stats.not then "Not supported by "+prefix) <|
grouped = materialize <| empty_table.aggregate [Median "Index", Mode "Index", Percentile 0.25 "Value"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Median Index"
@ -175,8 +212,8 @@ spec =
grouped.columns.at 2 . name . should_equal "25%-ile Value"
grouped.columns.at 2 . at 0 . should_equal Nothing
Test.specify "should be able to get first and last values" <|
grouped = empty_table.aggregate [First "Index", Last "Value"]
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported by "+prefix) <|
grouped = materialize <| empty_table.aggregate [First "Index", Last "Value"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "First Index"
@ -185,7 +222,7 @@ spec =
grouped.columns.at 1 . at 0 . should_equal Nothing
Test.specify "should be able to get minimum and maximum values" <|
grouped = empty_table.aggregate [Minimum "Value", Maximum "ValueWithNothing"]
grouped = materialize <| empty_table.aggregate [Minimum "Value", Maximum "ValueWithNothing"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "Minimum Value"
@ -193,8 +230,8 @@ spec =
grouped.columns.at 1 . name . should_equal "Maximum ValueWithNothing"
grouped.columns.at 1 . at 0 . should_equal Nothing
Test.specify "should be able to get shortest, longest and concatenated values" <|
grouped = empty_table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported by "+prefix) <|
grouped = materialize <| empty_table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
grouped.row_count . should_equal 1
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
@ -204,16 +241,16 @@ spec =
grouped.columns.at 2 . name . should_equal "Concatenate Code"
grouped.columns.at 2 . at 0 . should_equal Nothing
Test.group "Table.aggregate should not summarize empty table when grouped " <|
Test.group prefix+"Table.aggregate should not summarize empty table when grouped" pending=pending <|
Test.specify "should be able to count" <|
grouped = empty_table.aggregate [Group_By 0, Count Nothing]
grouped = materialize <| empty_table.aggregate [Group_By 0, Count Nothing]
grouped.row_count . should_equal 0
grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "Code"
grouped.columns.at 1 . name . should_equal "Count"
Test.specify "should be able to count missing values" <|
grouped = empty_table.aggregate [Group_By 0, Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped = materialize <| empty_table.aggregate [Group_By 0, Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped.row_count . should_equal 0
grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Code"
@ -223,24 +260,30 @@ spec =
grouped.columns.at 4 . name . should_equal "Count Not Empty TextWithNothing"
Test.specify "should be able to count distinct values" <|
grouped = empty_table.aggregate [Group_By 0, Count_Distinct "Code"]
grouped = materialize <| empty_table.aggregate [Group_By 0, Count_Distinct "Code"]
grouped.row_count . should_equal 0
grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "Code"
grouped.columns.at 1 . name . should_equal "Count Distinct Code"
Test.specify "should be able to sum, average and standard deviation of values" <|
grouped = empty_table.aggregate [Group_By 0, Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
Test.specify "should be able to compute sum and average of values" <|
grouped = materialize <| empty_table.aggregate [Group_By 0, Sum "Value", Average "ValueWithNothing"]
grouped.row_count . should_equal 0
grouped.columns.length . should_equal 5
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Code"
grouped.columns.at 1 . name . should_equal "Sum Value"
grouped.columns.at 2 . name . should_equal "Average ValueWithNothing"
grouped.columns.at 3 . name . should_equal "Standard Deviation Value"
grouped.columns.at 4 . name . should_equal "Standard Deviation ValueWithNothing"
Test.specify "should be able to create median values" <|
grouped = empty_table.aggregate [Group_By 0, Median "Index", Mode "Index", Percentile 0.25 "Value"]
Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported by "+prefix) <|
grouped = materialize <| empty_table.aggregate [Group_By 0, Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
grouped.row_count . should_equal 0
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Code"
grouped.columns.at 1 . name . should_equal "Standard Deviation Value"
grouped.columns.at 2 . name . should_equal "Standard Deviation ValueWithNothing"
Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported by "+prefix) <|
grouped = materialize <| empty_table.aggregate [Group_By 0, Median "Index", Mode "Index", Percentile 0.25 "Value"]
grouped.row_count . should_equal 0
grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Code"
@ -248,8 +291,8 @@ spec =
grouped.columns.at 2 . name . should_equal "Mode Index"
grouped.columns.at 3 . name . should_equal "25%-ile Value"
Test.specify "should be able to get first and last values" <|
grouped = empty_table.aggregate [Group_By 0, First "Index", Last "Value"]
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported by "+prefix) <|
grouped = materialize <| empty_table.aggregate [Group_By 0, First "Index", Last "Value"]
grouped.row_count . should_equal 0
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Code"
@ -257,15 +300,15 @@ spec =
grouped.columns.at 2 . name . should_equal "Last Value"
Test.specify "should be able to get minimum and maximum values" <|
grouped = empty_table.aggregate [Group_By 0, Minimum "Value", Maximum "ValueWithNothing"]
grouped = materialize <| empty_table.aggregate [Group_By 0, Minimum "Value", Maximum "ValueWithNothing"]
grouped.row_count . should_equal 0
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Code"
grouped.columns.at 1 . name . should_equal "Minimum Value"
grouped.columns.at 2 . name . should_equal "Maximum ValueWithNothing"
Test.specify "should be able to get shortest, longest and concatenated values" <|
grouped = empty_table.aggregate [Group_By 0, Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported by "+prefix) <|
grouped = materialize <| empty_table.aggregate [Group_By 0, Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
grouped.row_count . should_equal 0
grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Code"
@ -273,9 +316,9 @@ spec =
grouped.columns.at 2 . name . should_equal "Longest TextWithNothing"
grouped.columns.at 3 . name . should_equal "Concatenate Code"
Test.group "Table.aggregate should be able to group on single field " <|
Test.group prefix+"Table.aggregate should be able to group on single field" pending=pending <|
Test.specify "should be able to count" <|
grouped = table.aggregate [Group_By "Index", Count Nothing]
grouped = materialize <| table.aggregate [Group_By "Index", Count Nothing]
grouped.row_count . should_equal 10
grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "Index"
@ -285,7 +328,7 @@ spec =
grouped.columns.at 1 . at idx . should_equal 261
Test.specify "should be able to count missing values" <|
grouped = table.aggregate [Group_By "Index", Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped = materialize <| table.aggregate [Group_By "Index", Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped.row_count . should_equal 10
grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Index"
@ -301,9 +344,9 @@ spec =
grouped.columns.at 4 . at idx . should_equal 230
Test.specify "should be able to count distinct values" <|
grouped = table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
grouped = materialize <| table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag"]
grouped.row_count . should_equal 10
grouped.columns.length . should_equal 5
grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Index"
idx = find_row [6] grouped
idx.is_nothing . should_be_false
@ -313,13 +356,22 @@ spec =
grouped.columns.at 2 . at idx . should_equal 1
grouped.columns.at 3 . name . should_equal "Count Distinct Flag"
grouped.columns.at 3 . at idx . should_equal 2
grouped.columns.at 4 . name . should_equal "Count Distinct Index Flag"
grouped.columns.at 4 . at idx . should_equal 2
Test.specify "should be able to sum, average and standard deviation of values" <|
grouped = table.aggregate [Group_By "Index", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported by "+prefix) <|
## TODO probably should use different cols for multi-distinct and also should check ignore_nothing
grouped = materialize <| table.aggregate [Group_By "Index", Count_Distinct ["Index", "Flag"]]
grouped.row_count . should_equal 10
grouped.columns.length . should_equal 9
grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "Index"
idx = find_row [6] grouped
idx.is_nothing . should_be_false
grouped.columns.at 1 . name . should_equal "Count Distinct Index Flag"
grouped.columns.at 1 . at idx . should_equal 2
Test.specify "should be able to compute sum and average of values" <|
grouped = materialize <| table.aggregate [Group_By "Index", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing"]
grouped.row_count . should_equal 10
grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Index"
idx = find_row [6] grouped
idx.is_nothing . should_be_false
@ -331,17 +383,25 @@ spec =
grouped.columns.at 3 . at idx . should_equal -1.715890 epsilon=0.000001
grouped.columns.at 4 . name . should_equal "Average ValueWithNothing"
grouped.columns.at 4 . at idx . should_equal 0.646213 epsilon=0.000001
grouped.columns.at 5 . name . should_equal "Standard Deviation Value"
grouped.columns.at 5 . at idx . should_equal 60.272158 epsilon=0.000001
grouped.columns.at 6 . name . should_equal "Standard Deviation ValueWithNothing"
grouped.columns.at 6 . at idx . should_equal 56.798691 epsilon=0.000001
grouped.columns.at 7 . name . should_equal "Standard Deviation Value_1"
grouped.columns.at 7 . at idx . should_equal 60.156583 epsilon=0.000001
grouped.columns.at 8 . name . should_equal "Standard Deviation ValueWithNothing_1"
grouped.columns.at 8 . at idx . should_equal 56.677714 epsilon=0.000001
Test.specify "should be able to create median values" <|
grouped = table.aggregate [Group_By "Index", Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [Group_By "Index", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
grouped.row_count . should_equal 10
grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Index"
idx = find_row [6] grouped
idx.is_nothing . should_be_false
grouped.columns.at 1 . name . should_equal "Standard Deviation Value"
grouped.columns.at 1 . at idx . should_equal 60.272158 epsilon=0.000001
grouped.columns.at 2 . name . should_equal "Standard Deviation ValueWithNothing"
grouped.columns.at 2 . at idx . should_equal 56.798691 epsilon=0.000001
grouped.columns.at 3 . name . should_equal "Standard Deviation Value_1"
grouped.columns.at 3 . at idx . should_equal 60.156583 epsilon=0.000001
grouped.columns.at 4 . name . should_equal "Standard Deviation ValueWithNothing_1"
grouped.columns.at 4 . at idx . should_equal 56.677714 epsilon=0.000001
Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [Group_By "Index", Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
grouped.row_count . should_equal 10
grouped.columns.length . should_equal 7
grouped.columns.at 0 . name . should_equal "Index"
@ -360,8 +420,8 @@ spec =
grouped.columns.at 6 . name . should_equal "40%-ile ValueWithNothing"
grouped.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001
Test.specify "should be able to get first and last values" <|
grouped = table.aggregate [Group_By "Index", First "TextWithNothing", Last "Value"]
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [Group_By "Index", First "TextWithNothing", Last "Value"]
grouped.row_count . should_equal 10
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Index"
@ -373,7 +433,7 @@ spec =
grouped.columns.at 2 . at idx . should_equal 56.15916 epsilon=0.000001
Test.specify "should be able to get minimum and maximum values" <|
grouped = table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
grouped = materialize <| table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
grouped.row_count . should_equal 10
grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Index"
@ -388,8 +448,8 @@ spec =
grouped.columns.at 4 . name . should_equal "Maximum ValueWithNothing"
grouped.columns.at 4 . at idx . should_equal 99.79 epsilon=0.000001
Test.specify "should be able to get shortest, longest and concatenated values" <|
grouped = table.aggregate [Group_By "Index", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [Group_By "Index", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
grouped.row_count . should_equal 10
grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Index"
@ -402,9 +462,9 @@ spec =
grouped.columns.at 3 . name . should_equal "Concatenate Code"
grouped.columns.at 3 . at idx . length . should_equal 783
Test.group "Table.aggregate should be able to group on multiple fields not in left columns" <|
Test.group prefix+"Table.aggregate should be able to group on multiple fields not in left columns" pending=pending <|
Test.specify "should be able to count" <|
grouped = table.aggregate [Group_By "Flag", Count Nothing, Group_By "Index"]
grouped = materialize <| table.aggregate [Group_By "Flag", Count Nothing, Group_By "Index"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Flag"
@ -415,7 +475,7 @@ spec =
grouped.columns.at 1 . at idx . should_equal 127
Test.specify "should be able to count missing values" <|
grouped = table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Group_By "Index", Count_Empty "TextWithNothing", Group_By "Flag", Count_Not_Empty "TextWithNothing"]
grouped = materialize <| table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Group_By "Index", Count_Empty "TextWithNothing", Group_By "Flag", Count_Not_Empty "TextWithNothing"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 6
grouped.columns.at 4 . name . should_equal "Flag"
@ -432,12 +492,12 @@ spec =
grouped.columns.at 5 . at idx . should_equal 115
Test.specify "should be able to count distinct values" <|
grouped = table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"], Group_By "Flag"]
grouped = materialize <| table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Group_By "Flag"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 6
grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Index"
grouped.columns.at 5 . name . should_equal "Flag"
idx = find_row ["False", 6] grouped [5, 0]
grouped.columns.at 4 . name . should_equal "Flag"
idx = find_row ["False", 6] grouped [4, 0]
idx.is_nothing . should_be_false
grouped.columns.at 1 . name . should_equal "Count Distinct Code"
grouped.columns.at 1 . at idx . should_equal 127
@ -445,36 +505,55 @@ spec =
grouped.columns.at 2 . at idx . should_equal 1
grouped.columns.at 3 . name . should_equal "Count Distinct Flag"
grouped.columns.at 3 . at idx . should_equal 1
grouped.columns.at 4 . name . should_equal "Count Distinct Index Flag"
grouped.columns.at 4 . at idx . should_equal 1
Test.specify "should be able to sum, average and standard deviation of values" <|
grouped = table.aggregate [Group_By "Index", Group_By "Flag", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported by "+prefix) <|
## TODO probably should use different cols for multi-distinct and also should check ignore_nothing
grouped = materialize <| table.aggregate [Group_By "Index", Count_Distinct ["Index", "Flag"], Group_By "Flag"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 10
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Index"
grouped.columns.at 2 . name . should_equal "Flag"
idx = find_row ["False", 6] grouped [2, 0]
idx.is_nothing . should_be_false
grouped.columns.at 1 . name . should_equal "Count Distinct Index Flag"
grouped.columns.at 1 . at idx . should_equal 1
Test.specify "should be able to compute sum and average of values" <|
grouped = materialize <| table.aggregate [Group_By "Index", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Group_By "Flag"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 6
grouped.columns.at 0 . name . should_equal "Index"
grouped.columns.at 5 . name . should_equal "Flag"
idx = find_row ["False", 6] grouped [5, 0]
idx.is_nothing . should_be_false
grouped.columns.at 1 . name . should_equal "Sum Value"
grouped.columns.at 1 . at idx . should_equal -103.050170 epsilon=0.000001
grouped.columns.at 2 . name . should_equal "Sum ValueWithNothing"
grouped.columns.at 2 . at idx . should_equal 533.57 epsilon=0.000001
grouped.columns.at 3 . name . should_equal "Average Value"
grouped.columns.at 3 . at idx . should_equal -0.811419 epsilon=0.000001
grouped.columns.at 4 . name . should_equal "Average ValueWithNothing"
grouped.columns.at 4 . at idx . should_equal 4.721858 epsilon=0.000001
Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [Group_By "Index", Group_By "Flag", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 6
grouped.columns.at 0 . name . should_equal "Index"
grouped.columns.at 1 . name . should_equal "Flag"
idx = find_row ["False", 6] grouped [1, 0]
idx.is_nothing . should_be_false
grouped.columns.at 2 . name . should_equal "Sum Value"
grouped.columns.at 2 . at idx . should_equal -103.050170 epsilon=0.000001
grouped.columns.at 3 . name . should_equal "Sum ValueWithNothing"
grouped.columns.at 3 . at idx . should_equal 533.57 epsilon=0.000001
grouped.columns.at 4 . name . should_equal "Average Value"
grouped.columns.at 4 . at idx . should_equal -0.811419 epsilon=0.000001
grouped.columns.at 5 . name . should_equal "Average ValueWithNothing"
grouped.columns.at 5 . at idx . should_equal 4.721858 epsilon=0.000001
grouped.columns.at 6 . name . should_equal "Standard Deviation Value"
grouped.columns.at 6 . at idx . should_equal 58.979275 epsilon=0.000001
grouped.columns.at 7 . name . should_equal "Standard Deviation ValueWithNothing"
grouped.columns.at 7 . at idx . should_equal 57.561756 epsilon=0.000001
grouped.columns.at 8 . name . should_equal "Standard Deviation Value_1"
grouped.columns.at 8 . at idx . should_equal 58.746614 epsilon=0.000001
grouped.columns.at 9 . name . should_equal "Standard Deviation ValueWithNothing_1"
grouped.columns.at 9 . at idx . should_equal 57.306492 epsilon=0.000001
grouped.columns.at 2 . name . should_equal "Standard Deviation Value"
grouped.columns.at 2 . at idx . should_equal 58.979275 epsilon=0.000001
grouped.columns.at 3 . name . should_equal "Standard Deviation ValueWithNothing"
grouped.columns.at 3 . at idx . should_equal 57.561756 epsilon=0.000001
grouped.columns.at 4 . name . should_equal "Standard Deviation Value_1"
grouped.columns.at 4 . at idx . should_equal 58.746614 epsilon=0.000001
grouped.columns.at 5 . name . should_equal "Standard Deviation ValueWithNothing_1"
grouped.columns.at 5 . at idx . should_equal 57.306492 epsilon=0.000001
Test.specify "should be able to create median values" <|
grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Group_By "Index", Group_By "Flag", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Group_By "Index", Group_By "Flag", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 8
grouped.columns.at 5 . name . should_equal "Flag"
@ -494,8 +573,8 @@ spec =
grouped.columns.at 7 . name . should_equal "40%-ile ValueWithNothing"
grouped.columns.at 7 . at idx . should_equal -17.174000 epsilon=0.000001
Test.specify "should be able to get first and last values" <|
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing", Last "Value", Group_By "Index"]
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [Group_By "Flag", First "TextWithNothing", Last "Value", Group_By "Index"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Flag"
@ -508,7 +587,7 @@ spec =
grouped.columns.at 2 . at idx . should_equal 56.15916 epsilon=0.000001
Test.specify "should be able to get minimum and maximum values" <|
grouped = table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Group_By "Flag", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
grouped = materialize <| table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Group_By "Flag", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 6
grouped.columns.at 3 . name . should_equal "Flag"
@ -524,8 +603,8 @@ spec =
grouped.columns.at 5 . name . should_equal "Maximum ValueWithNothing"
grouped.columns.at 5 . at idx . should_equal 97.17 epsilon=0.000001
Test.specify "should be able to get shortest, longest and concatenated values" <|
grouped = table.aggregate [Group_By "Index", Group_By "Flag", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported by "+prefix) <|
grouped = materialize <| table.aggregate [Group_By "Index", Group_By "Flag", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Index"
@ -539,7 +618,10 @@ spec =
grouped.columns.at 4 . name . should_equal "Concatenate Code"
grouped.columns.at 4 . at idx . length . should_equal 381
Test.group "Table.aggregate should raise warnings when there are issues" <|
problem_pending = case pending.is_nothing of
False -> pending
True -> if test_selection.advanced_stats.not then "Not supported by "+prefix
Test.group prefix+"Table.aggregate should raise warnings when there are issues" pending=problem_pending <|
table =
col1 = ["Index", [1, 2, 3]]
col2 = ["Value", [1, 2, 3]]

View File

@ -23,8 +23,8 @@ from Standard.Table.Data.Position as Position_Module import all
column elements.
TODO [RW] the Any in return type of the builder should ideally be replaced with the Table interface, once that is supported.
spec : Text -> (Vector -> Any) -> Boolean -> Nothing
spec prefix table_builder supports_case_sensitive_columns =
spec : Text -> (Vector -> Any) -> Boolean -> Text -> Nothing
spec prefix table_builder supports_case_sensitive_columns pending=Nothing =
table =
col1 = ["foo", Integer, [1,2,3]]
col2 = ["bar", Integer, [4,5,6]]
@ -38,7 +38,7 @@ spec prefix table_builder supports_case_sensitive_columns =
expect_column_names names table =
table.columns . map .name . should_equal names frames_to_skip=2
Test.group prefix+"Table.select_columns" <|
Test.group prefix+"Table.select_columns" pending=pending <|
Test.specify "should work as shown in the doc examples" <|
expect_column_names ["foo", "bar"] <| table.select_columns (By_Name.new ["bar", "foo"])
expect_column_names ["bar", "Baz", "foo_1", "foo_2"] <| table.select_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
@ -152,7 +152,7 @@ spec prefix table_builder supports_case_sensitive_columns =
tester_2 = expect_column_names ["foo"]
Problems.test_problem_handling action_2 problems_2 tester_2
Test.group prefix+"Table.remove_columns" <|
Test.group prefix+"Table.remove_columns" pending=pending <|
Test.specify "should work as shown in the doc examples" <|
expect_column_names ["Baz", "foo_1", "foo_2", "ab.+123", "abcd123"] <| table.remove_columns (By_Name.new ["bar", "foo"])
expect_column_names ["foo", "ab.+123", "abcd123"] <| table.remove_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
@ -261,7 +261,7 @@ spec prefix table_builder supports_case_sensitive_columns =
tester_2 = expect_column_names ["bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123"]
Problems.test_problem_handling action_2 problems_2 tester_2
Test.group prefix+"Table.reorder_columns" <|
Test.group prefix+"Table.reorder_columns" pending=pending <|
Test.specify "should work as shown in the doc examples" <|
expect_column_names ["bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns (By_Name.new ["foo"]) position=After_Other_Columns
expect_column_names ["foo_1", "foo_2", "bar", "Baz", "foo", "ab.+123", "abcd123"] <| table.reorder_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
@ -357,7 +357,7 @@ spec prefix table_builder supports_case_sensitive_columns =
tester = expect_column_names ["bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "foo"]
Problems.test_problem_handling action problems tester
Test.group prefix+"Table.sort_columns" <|
Test.group prefix+"Table.sort_columns" pending=pending <|
table =
col1 = ["foo_21", Integer, [1,2,3]]
col2 = ["foo_100", Integer, [4,5,6]]
@ -385,7 +385,7 @@ spec prefix table_builder supports_case_sensitive_columns =
Test.specify "should correctly handle various combinations of options" <|
expect_column_names ["foo_100", "foo_21", "foo_3", "Foo_2", "foo_1", "foo_001", "bar"] <| table.sort_columns (Sort_Method natural_order=True case_sensitive=Case_Insensitive.new order=Sort_Order.Descending)
Test.group prefix+"Table.rename_columns" <|
Test.group prefix+"Table.rename_columns" pending=pending <|
table =
col1 = ["alpha", Integer, [1,2,3]]
col2 = ["beta", Integer, [4,5,6]]

View File

@ -1,10 +1,11 @@
from Standard.Base import all
import project.Helpers.Fake_Test_Connection
import project.Database.Helpers.Fake_Test_Connection
import Standard.Database.Data.Dialect
import Standard.Database.Data.Table as Table_Module
import Standard.Test
from Standard.Table.Data.Aggregate_Column import all
from Standard.Database import all
from Standard.Database.Data.Sql import Sql_Type
from Standard.Table import No_Such_Column_Error, Order_Rule
@ -142,22 +143,6 @@ spec =
t2 = t1.drop_missing_rows
t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE (NOT ("T1"."A" IS NULL)) AND (NOT ("T1"."B" IS NULL)) AND (NOT ("T1"."C" IS NULL))', []]
Test.group "[Codegen] Aggregation" pending="Codegen test for aggregations is disabled until 1643 is resolved. The semantics is still being tested by tests that run on actual databases." <|
agg = t1.group by='A'
Test.specify "should allow counting group sizes" <|
agg.count.to_sql.prepare . should_equal ['SELECT COUNT(*) AS "count" FROM "T1" AS "T1" GROUP BY "T1"."A"', []]
Test.specify "should allow aggregating columns with basic arithmetic aggregators" <|
c1 = agg.at 'B' . mean
c1.to_sql.prepare . should_equal ['SELECT AVG("T1"."B") AS "B_mean" FROM "T1" AS "T1" GROUP BY "T1"."A"', []]
c2 = agg.at 'B' . min
c2.to_sql.prepare . should_equal ['SELECT MIN("T1"."B") AS "B_min" FROM "T1" AS "T1" GROUP BY "T1"."A"', []]
Test.specify "should allow grouping by multiple columns" <|
agg = t1.group by=['A','B']
agg.count.to_sql.prepare . should_equal ['SELECT COUNT(*) AS "count" FROM "T1" AS "T1" GROUP BY "T1"."A", "T1"."B"', []]
Test.group "[Codegen] Sorting" <|
Test.specify "should allow sorting by a single column name" <|
r1 = t1.sort by="A" . at "B"
@ -196,3 +181,14 @@ spec =
used_names = ["A", "A_1"]
preferred_names = ["A", "A", "B"]
Table_Module.fresh_names used_names preferred_names . should_equal ["A_2", "A_3", "B"]
Test.group "[Codegen] Aggregation" <|
Test.specify "should allow to count rows" <|
code = t1.aggregate [Group_By "A" "A grp", Count "counter"] . to_sql . prepare
code . should_equal ['SELECT "T1"."A" AS "A grp", COUNT(*) AS "counter" FROM "T1" AS "T1" GROUP BY "T1"."A"', []]
Test.specify "should allow to group by multiple fields" <|
code = t1.aggregate [Sum "A" "sum_a", Group_By "C" Nothing, Group_By "B" "B grp"] . to_sql . prepare
code . should_equal ['SELECT SUM("T1"."A") AS "sum_a", "T1"."C" AS "C", "T1"."B" AS "B grp" FROM "T1" AS "T1" GROUP BY "T1"."C", "T1"."B"', []]
main = Test.Suite.run_main here.spec

View File

@ -1,22 +1,15 @@
from Standard.Base import all
from Standard.Database import all
import Standard.Table.Data.Table as Materialized_Table
import Standard.Test
from Standard.Table.Data.Aggregate_Column import all
spec prefix connection pending=Nothing =
make_table name column_names column_typenames = Panic.recover Any <|
quote x = '"' + x + '"'
# TODO this is a hack with no sanitization, just for testing; it should be removed when proper create table is supported by the library
cols = column_names.zip column_typenames name-> typ->
quote name + " " + typ
sql = "CREATE TABLE " + quote name + " (" + (cols.join ", ") + ")"
Panic.rethrow <| connection.execute_update sql
Panic.rethrow <| connection.access_table name
clean_table name = Panic.recover Any <|
sql = 'DROP TABLE "' + name + '"'
Panic.rethrow <| connection.execute_update sql
t1 = make_table "T1" ["a", "b", "c"] ["INT", "INT", "INT"]
t1.insert [1, 2, 3]
t1.insert [4, 5, 6]
t1 = connection.upload_table "T1" (Materialized_Table.new [["a", [1, 4]], ["b", [2, 5]], ["c", [3, 6]]])
Test.group prefix+"Basic Table Access" pending=pending <|
Test.specify "should allow to materialize tables and columns into local memory" <|
df = t1.to_dataframe
@ -52,19 +45,14 @@ spec prefix connection pending=Nothing =
empty.to_dataframe.columns.length . should_equal 0
empty.to_dataframe.row_count . should_equal empty.row_count
Test.specify "should handle bigger result sets" <|
table = make_table "Big" ["a", "b", "c"] ["INT", "REAL", "VARCHAR"]
n = 1000
0.up_to n . each ix->
table.insert [ix, ix * 3.1415926, ix.to_text]
original = Materialized_Table.new [["a", Vector.new n ix->ix], ["b", Vector.new n ix-> ix * 3.1415926], ["c", Vector.new n ix-> ix.to_text]]
table = connection.upload_table "Big" original
materialized = table.to_dataframe
materialized.row_count . should_equal n
Test.group prefix+"Mapping Operations" pending=pending <|
t2 = make_table "T2" ["x", "y", "b"] ["INT", "INT", "BOOLEAN"]
t2.insert [1, 2, False]
t2.insert [4, 3, False]
t2.insert [5, 5, True]
t2.insert [Nothing, Nothing, Nothing]
t2 = connection.upload_table "T2" <| Materialized_Table.new [["x", [1, 4, 5, Nothing]], ["y", [2, 3, 5, Nothing]], ["b", [False, False, True, Nothing]]]
x = t2.at "x"
y = t2.at "y"
b = t2.at "b"
@ -96,11 +84,7 @@ spec prefix connection pending=Nothing =
x.is_missing.to_vector . should_equal [False, False, False, True]
(x == Nothing).to_vector . should_equal [Nothing, Nothing, Nothing, Nothing]
t3 = make_table "T3" ["s1", "s2"] ["VARCHAR", "VARCHAR"]
t3.insert ["foobar", "foo"]
t3.insert ["bar", "ar" ]
t3.insert ["baz", "a" ]
t3.insert [Nothing, Nothing]
t3 = connection.upload_table "T3" <| Materialized_Table.new [["s1", ["foobar", "bar", "baz", Nothing]], ["s2", ["foo", "ar", "a", Nothing]]]
s1 = t3.at "s1"
s2 = t3.at "s2"
Test.specify "should handle Text operations" <|
@ -128,19 +112,9 @@ spec prefix connection pending=Nothing =
t2.at "c" . to_vector . should_equal [3]
Test.group prefix+"Joining Tables" pending=pending <|
a = make_table "TA" ["x", "y"] ["INTEGER", "VARCHAR"]
a.insert [0, "foo"]
a.insert [1, "bar"]
a.insert [7, "baz"]
a.insert [3, "spam"]
a.insert [6, "eggs"]
b = make_table "TB" ["w", "z"] ["INTEGER", "VARCHAR"]
b.insert [6, "foo"]
b.insert [3, "foo"]
b.insert [5, "bar"]
b.insert [5, "spam"]
b.insert [3, "bar"]
b.insert [3, "eggs"]
a = connection.upload_table "TA" <| Materialized_Table.new [["x", [0, 1, 7, 3, 6]], ["y", ["foo", "bar", "baz", "spam", "eggs"]]]
b = connection.upload_table "TB" <| Materialized_Table.new [["w", [6, 3, 5, 5, 3, 3]], ["z", ["foo", "foo", "bar", "spam", "bar", "eggs"]]]
## The tests below use `sort`, because the SQL backend is not guaranteed
to return the rows in any particular order. This is the `sort` from
the Dataframes library, so it is independent of the library under
@ -174,15 +148,9 @@ spec prefix connection pending=Nothing =
r_2.columns.map .name . should_equal ['y_old', 'y_new']
Test.specify "should correctly handle multi-joins" <|
ta = make_table "M_TA" ["id", "name"] ["INTEGER", "VARCHAR"]
tb = make_table "M_TB" ["id", "name"] ["INTEGER", "VARCHAR"]
tc = make_table "M_TC" ["id_a", "id_b"] ["INTEGER", "INTEGER"]
ta.insert [0, "Foo"]
ta.insert [1, "Hmm"]
tb.insert [2, "Bar"]
tb.insert [0, "Hmm"]
tc.insert [0, 2]
tc.insert [1, 0]
ta = connection.upload_table "M_TA" <| Materialized_Table.new [["id", [0, 1]], ["name", ["Foo", "Hmm"]]]
tb = connection.upload_table "M_TB" <| Materialized_Table.new [["id", [2, 0]], ["name", ["Bar", "Hmm"]]]
tc = connection.upload_table "M_TC" <| Materialized_Table.new [["id_a", [0, 1]], ["id_b", [2, 0]]]
ta_2 = ta.set_index "id"
tb_2 = tb.set_index "id"
res = (tc.join ta_2 on="id_a") . join tb_2 on="id_b" left_suffix="_a" right_suffix="_b"
@ -192,12 +160,8 @@ spec prefix connection pending=Nothing =
df . at "name_b" . to_vector . should_equal ["Bar", "Hmm"]
Test.group prefix+"Missing Values" pending=pending <|
t4 = make_table "T4" ["a", "b", "c"] ["INT", "BOOLEAN", "VARCHAR"]
t4.insert [0, True, ""]
t4.insert [1, Nothing, "foo"]
t4.insert [Nothing, True, "bar"]
t4.insert [42, False, Nothing]
t4.insert [Nothing, Nothing, Nothing]
t4 = connection.upload_table "T4" <|
Materialized_Table.new [["a", [0, 1, Nothing, 42, Nothing]], ["b", [True, Nothing, True, False, Nothing]], ["c", ["", "foo", "bar", Nothing, Nothing]]]
Test.specify "fill_missing should replace nulls" <|
t4.at 'a' . fill_missing 10 . to_vector . should_equal [0, 1, 10, 42, 10]
t4.at 'b' . fill_missing False . to_vector . should_equal [True, False, True, False, False]
@ -221,10 +185,8 @@ spec prefix connection pending=Nothing =
d.at 'c' . to_vector . should_equal [""]
Test.specify "drop_missing_columns should drop columns that contain at least one missing row in a Table" <|
t5 = make_table "T5" ["a", "b"] ["INT", "BOOLEAN", "VARCHAR"]
t5.insert [1, True, "foo"]
t5.insert [2, False, Nothing]
t5.insert [3, Nothing, "aaa"]
t5 = connection.upload_table "T5" <|
Materialized_Table.new [["a", [1, 2, 3]], ["b", [True, False, Nothing]], ["c", ["foo", Nothing, "aaa"]]]
r = t5.drop_missing_columns
r.columns.map .name . should_equal ["a"]
@ -234,15 +196,9 @@ spec prefix connection pending=Nothing =
empty.columns.length . should_equal 0
empty.to_dataframe.columns.length . should_equal 0
Test.group prefix+"Aggregation" pending=pending <|
t = make_table "T6" ['name', 'price', 'quantity'] ['VARCHAR', 'DOUBLE PRECISION', 'INTEGER']
t.insert ["foo", 0.4, 10]
t.insert ["bar", 3.5, 20]
t.insert ["foo", Nothing, 30]
t.insert ["baz", 6.7, 40]
t.insert ["foo", Nothing, 50]
t.insert ["bar", 97, 60]
t.insert ["quux", Nothing, 70]
Test.group prefix+"Old Aggregation" pending=pending <|
t = connection.upload_table "T6" <|
Materialized_Table.new [["name", ["foo", "bar", "foo", "baz", "foo", "bar", "quux"]], ["price", [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]], ["quantity", [10, 20, 30, 40, 50, 60, 70]]]
agg = t.group by='name'
## A helper which makes sure that the groups are ordered according to the index, using the Table library
determinize col =
@ -277,24 +233,17 @@ spec prefix connection pending=Nothing =
Test.group prefix+"Column-wide statistics" pending=pending <|
Test.specify 'should allow computing basic column-wide stats' <|
t7 = make_table "T7" ['price'] ['DOUBLE PRECISION']
t7 = connection.upload_table "T7" <|
Materialized_Table.new [['price', [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]]]
price = t7.at 'price'
[0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing] . each x->
t7.insert [x]
price.sum.should_equal 107.6
price.min.should_equal 0.4
price.max.should_equal 97
price.mean.should_equal 26.9
Test.group prefix+"Sorting" pending=pending <|
df = make_table "clothes" ['id', 'name', 'quantity', 'rating', 'price'] ['INTEGER', 'VARCHAR', 'INTEGER', 'DOUBLE PRECISION', 'DOUBLE PRECISION']
df.insert [1,'shoes',20,3.0,37.2]
df.insert [2,'trousers',10,Nothing,42.1]
df.insert [3,'dress',20,7.3,64.1]
df.insert [4,'skirt',10,3.0,87.4]
df.insert [5,'blouse',30,2.2,13.5]
df.insert [6,'t-shirt',30,Nothing,64.2]
df = connection.upload_table "clothes" <|
Materialized_Table.new [["id", [1,2,3,4,5,6]], ["name", ["shoes","trousers","dress","skirt","blouse","t-shirt"]], ["quantity", [20,10,20,10,30,30]], ["rating", [3.0,Nothing,7.3,3.0,2.2,Nothing]], ["price", [37.2,42.1,64.1,87.4,13.5,64.2]]]
Test.specify "should allow sorting by a single column name" <|
r_1 = df.sort by="quantity"
@ -331,18 +280,13 @@ spec prefix connection pending=Nothing =
r.should_fail_with No_Such_Column_Error
Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <|
df = make_table "T8" ['ord', 'ints', 'reals', 'bools', 'texts'] ['INTEGER', 'INTEGER', 'DOUBLE PRECISION', 'BOOLEAN', 'VARCHAR']
r = df.sort by='ord'
df.insert [0, 1, 1.3, False, "foo"]
df.insert [3, 2, 4.6, False, "foo"]
df.insert [2, 3, 3.2, True, "bar"]
df.insert [4, 4, 5.2, True, "baz"]
df.insert [1, 5, 1.6, False, "spam"]
ints = [1, 2, 3, 4, 5]
reals = [1.3, 4.6, 3.2, 5.2, 1.6]
bools = [False, False, True, True, False]
texts = ["foo", "foo", "bar", "baz", "spam"]
df = connection.upload_table "T8" <|
Materialized_Table.new [["ord", [0,3,2,4,1]], ["ints", ints], ["reals", reals], ["bools", bools], ["texts", texts]]
r = df.sort by='ord'
r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4]
df.at 'ints' . to_vector . should_equal ints
@ -369,10 +313,8 @@ spec prefix connection pending=Nothing =
r_3.to_vector.should_equal [Nothing, Nothing, 7.3, 3.0, 3.0, 2.2]
Test.group prefix+"Index" pending=pending <|
t0 = make_table "Tix" ['ix', 'c1'] ['INTEGER', 'INTEGER']
t0.insert [1, 4]
t0.insert [2, 5]
t0.insert [3, 6]
t0 = connection.upload_table "Tix" <|
Materialized_Table.new [["ix", [1,2,3]], ["c1", [4,5,6]]]
t = t0.set_index 'ix'
Test.specify "should be accessible by `at` like other columns" <|
t.at 'ix' . to_vector . should_equal t.index.to_vector
@ -386,5 +328,88 @@ spec prefix connection pending=Nothing =
df_col.to_vector . should_equal vec
df_col.index.to_vector . should_equal [1, 2, 3]
tables = ["T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "TA", "TB", "Big", "clothes", "M_TA", "M_TB", "M_TC", "Tix"]
Test.group prefix+"Aggregation" pending=pending <|
builders = [Vector.new_builder,Vector.new_builder,Vector.new_builder]
insert v =
builders.zip v .append
insert ["foo", 0.4, 50]
insert ["foo", 0.2, 10]
insert ["foo", 0.4, 30]
insert ["bar", 3.5, 20]
insert ["foo", Nothing, 20]
insert ["baz", 6.7, 40]
insert ["foo", Nothing, 10]
insert ["bar", 97, 60]
insert ["quux", Nothing, 70]
insert ["zzzz", Nothing, Nothing]
insert ["zzzz", 1, 1]
insert ["zzzz", 0, 0]
insert ["zzzz", 0, 1]
insert ["zzzz", 1, 0]
insert ["zzzz", 0, 0]
insert ["zzzz", Nothing, Nothing]
t = connection.upload_table "T9" <|
Materialized_Table.new [["name", builders.at 0 . to_vector], ["price", builders.at 1 . to_vector], ["quantity", builders.at 2 . to_vector]]
## A helper which makes sure that the groups in a materialized
(InMemory) table are ordered according to a specified column or list
of columns.
determinize_by order_column table =
table.sort by=order_column
Test.specify "should allow counting group sizes and elements" <|
aggregates = [Count Nothing, Count_Not_Nothing "price", Count_Nothing "price"]
t1 = determinize_by "name" (t.aggregate ([Group_By "name"] + aggregates) . to_dataframe)
t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"]
t1.at "Count" . to_vector . should_equal [2, 1, 5, 1, 7]
t1.at "Count Not Nothing price" . to_vector . should_equal [2, 1, 3, 0, 5]
t1.at "Count Nothing price" . to_vector . should_equal [0, 0, 2, 1, 2]
t2 = t.aggregate aggregates . to_dataframe
t2.at "Count" . to_vector . should_equal [16]
t2.at "Count Not Nothing price" . to_vector . should_equal [11]
t2.at "Count Nothing price" . to_vector . should_equal [5]
Test.specify "should allow to count distinct values" <|
aggregates = [Count_Distinct "quantity", Count_Distinct "price" (ignore_nothing=True), Count_Distinct "price" (ignore_nothing=False)]
t1 = determinize_by "name" (t.aggregate [Group_By "name"]+aggregates . to_dataframe)
t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"]
# t1.at "Count Distinct quantity" . to_vector . should_equal [2, 1, 3, 0]
# TODO
t2 = t.aggregate aggregates . to_dataframe
t2 . at "Count Distinct quantity" . to_vector . should_equal [10]
t2 . at "Count Distinct price" . to_vector . should_equal [7]
#t2 . at "Count Distinct price 2" . to_vector . should_equal [8]
Test.specify "should allow to count distinct values over multiple fields" pending="TODO" <|
aggregates = [Count_Distinct ["price", "quantity"]]
t1 = determinize_by "name" (t.aggregate [Group_By "name"]+aggregates . to_dataframe)
t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"]
# t1.at "Count Distinct quantity" . to_vector . should_equal [2, 1, 3, 0]
# TODO
t2 = t.aggregate aggregates . to_dataframe
t2 . at "Count Distinct price quantity" . to_vector . should_equal [13]
Test.specify "should allow simple arithmetic aggregations" <|
aggregates = [Sum "price" Nothing, Sum "quantity" Nothing, Average "price" Nothing]
## TODO can check the datatypes
t1 = determinize_by "name" (t.aggregate ([Group_By "name" Nothing] + aggregates) . to_dataframe)
t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"]
t1.at "Sum price" . to_vector . should_equal [100.5, 6.7, 1, Nothing, 2]
t1.at "Sum quantity" . to_vector . should_equal [80, 40, 120, 70, 2]
t1.at "Average price" . to_vector . should_equal [50.25, 6.7, (1/3), Nothing, (2/5)]
t2 = t.aggregate aggregates . to_dataframe
t2.at "Sum price" . to_vector . should_equal [110.2]
t2.at "Sum quantity" . to_vector . should_equal [312]
t2.at "Average price" . to_vector . should_equal [(110.2 / 11)]
tables = ["T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "TA", "TB", "T9", "Big", "clothes", "M_TA", "M_TB", "M_TC", "Tix"]
tables.each clean_table

View File

@ -0,0 +1,16 @@
from Standard.Base import all
import Standard.Test
import project.Database.Codegen_Spec
import project.Database.Sqlite_Spec
import project.Database.Postgresql_Spec
import project.Database.Redshift_Spec
databases_spec =
Codegen_Spec.spec
Sqlite_Spec.spec
Postgresql_Spec.spec
Redshift_Spec.spec
main = Test.Suite.run_main here.databases_spec

View File

@ -0,0 +1,82 @@
from Standard.Base import all
import Standard.Base.System.Environment
from Standard.Database import all
from Standard.Database.Connection.Connection import Sql_Error
import Standard.Test
import Standard.Table as Materialized_Table
import project.Database.Common_Spec
import project.Common_Table_Spec
import project.Aggregate_Spec
postgres_specific_spec connection pending =
Test.group "[PostgreSQL] Info" pending=pending <|
connection.execute_update 'CREATE TABLE "Tinfo" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL, "doubles" DOUBLE PRECISION)'
t = connection.access_table "Tinfo"
t.insert ["a", Nothing, False, 1.2, 0.000000000001]
t.insert ["abc", Nothing, Nothing, 1.3, Nothing]
t.insert ["def", 42, True, 1.4, 10]
Test.specify "should return Table information" <|
i = t.info
i.index . to_vector . should_equal ["strs", "ints", "bools", "reals", "doubles"]
i.at "Items Count" . to_vector . should_equal [3, 1, 2, 3, 2]
i.at "SQL Type" . to_vector . should_equal ["varchar", "int4", "bool", "float4", "float8"]
Test.specify "should infer standard types correctly" <|
t.at "strs" . sql_type . is_definitely_text . should_be_true
t.at "ints" . sql_type . is_definitely_integer . should_be_true
t.at "bools" . sql_type . is_definitely_boolean . should_be_true
t.at "reals" . sql_type . is_definitely_double . should_be_true
connection.execute_update 'DROP TABLE "Tinfo"'
run_tests connection pending=Nothing =
prefix = "[PostgreSQL] "
name_counter = Ref.new 0
tables = Vector.new_builder
table_builder columns =
ix = Ref.get name_counter
Ref.put name_counter ix+1
name = "table_"+ix.to_text
in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2]
table = connection.upload_table name in_mem_table
tables.append name
table
clean_tables table_names =
table_names.each name->
sql = 'DROP TABLE "' + name + '"'
Panic.rethrow <| connection.execute_update sql
Common_Spec.spec prefix connection pending=pending
here.postgres_specific_spec connection pending=pending
Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=True pending=pending
selection = Aggregate_Spec.Test_Selection problem_handling=False advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False
agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv
agg_table = connection.upload_table "AggT1" agg_in_memory_table
tables.append agg_table.name
empty_agg_table = connection.upload_table "AggT2" (agg_in_memory_table.take_start 0)
tables.append empty_agg_table.name
materialize = .to_dataframe
Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection pending=pending
clean_tables tables.to_vector
spec =
db_name = Environment.get "ENSO_DATABASE_TEST_DB_NAME"
db_host = Environment.get "ENSO_DATABASE_TEST_HOST"
db_user = Environment.get "ENSO_DATABASE_TEST_DB_USER"
db_password = Environment.get "ENSO_DATABASE_TEST_DB_PASSWORD"
case db_name.is_nothing of
True ->
message = "PostgreSQL test database is not configured. See README.md for instructions."
connection = Error.throw message
here.run_tests connection pending=message
False ->
url = case db_host.is_nothing of
True -> "postgresql:" + db_name
False -> "postgresql://" + db_host + "/" + db_name
connection = Database.connect url user=db_user password=db_password
here.run_tests connection
main = Test.Suite.run_main here.spec

View File

@ -4,7 +4,10 @@ import Standard.Base.System.Environment
from Standard.Database import all
from Standard.Database.Connection.Connection import Sql_Error
import Standard.Test
import project.Common_Spec
import Standard.Table as Materialized_Table
import project.Database.Common_Spec
import project.Common_Table_Spec
import project.Aggregate_Spec
redshift_specific_spec connection pending =
Test.group "[Redshift] Info" pending=pending <|
@ -25,6 +28,39 @@ redshift_specific_spec connection pending =
t.at "reals" . sql_type . is_definitely_double . should_be_true
connection.execute_update 'DROP TABLE "Tinfo"'
run_tests connection pending=Nothing =
prefix = "[Redshift] "
name_counter = Ref.new 0
tables = Vector.new_builder
table_builder columns =
ix = Ref.get name_counter
Ref.put name_counter ix+1
name = "table_"+ix.to_text
in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2]
table = connection.upload_table name in_mem_table
tables.append name
table
clean_tables table_names =
table_names.each name->
sql = 'DROP TABLE "' + name + '"'
Panic.rethrow <| connection.execute_update sql
Common_Spec.spec prefix connection pending=pending
here.redshift_specific_spec connection pending=pending
Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=True pending=pending
selection = Aggregate_Spec.Test_Selection problem_handling=False advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False
agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv
agg_table = connection.upload_table "AggT1" agg_in_memory_table
tables.append agg_table.name
empty_agg_table = connection.upload_table "AggT2" (agg_in_memory_table.take_start 0)
tables.append empty_agg_table.name
materialize = .to_dataframe
Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection pending=pending
clean_tables tables.to_vector
spec =
credentials = Enso_Project.data / 'redshift_credentials.json'
case credentials.exists of
@ -36,13 +72,10 @@ spec =
user = creds.get 'db_user'
props = [access_key, secret_key]
connection = Database.connect uri user=user custom_properties=props
Common_Spec.spec "[Redshift] " connection Nothing
here.redshift_specific_spec connection Nothing
here.run_tests connection
False ->
msg = """
Redshift connection is not set up. Please create a JSON file containing
the credentials in `data/redshift_credentials.json`
connection = Error.throw msg
Common_Spec.spec "[Redshift] " connection msg
here.redshift_specific_spec connection msg
here.run_tests connection pending=msg

View File

@ -3,7 +3,10 @@ from Standard.Base import all
from Standard.Database import all
from Standard.Database.Connection.Connection import Sql_Error
import Standard.Test
import project.Common_Spec
import Standard.Table as Materialized_Table
import project.Database.Common_Spec
import project.Common_Table_Spec
import project.Aggregate_Spec
sqlite_specific_spec connection =
Test.group "[SQLite] Error Handling" <|
@ -31,6 +34,11 @@ sqlite_specific_spec connection =
t.at "ints" . sql_type . is_definitely_integer . should_be_true
t.at "bools" . sql_type . is_definitely_boolean . should_be_true
t.at "reals" . sql_type . is_definitely_double . should_be_true
t.at "ints" . sql_type . is_definitely_text . should_be_false
t.at "strs" . sql_type . is_definitely_integer . should_be_false
t.at "reals" . sql_type . is_definitely_boolean . should_be_false
t.at "bools" . sql_type . is_definitely_double . should_be_false
connection.execute_update 'DROP TABLE "Tinfo"'
spec =
@ -38,7 +46,29 @@ spec =
file = Enso_Project.data / "sqlite_test.db"
file.delete_if_exists
connection = Database.open_sqlite_file file
Common_Spec.spec "[SQLite] " connection
prefix = "[SQLite] "
name_counter = Ref.new 0
table_builder columns =
ix = Ref.get name_counter
Ref.put name_counter ix+1
name = "table_"+ix.to_text
in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2]
connection.upload_table name in_mem_table
Common_Spec.spec prefix connection
here.sqlite_specific_spec connection
Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=False
selection = Aggregate_Spec.Test_Selection problem_handling=False advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False
agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv
agg_table = connection.upload_table "AggT1" agg_in_memory_table
empty_agg_table = connection.upload_table "AggT2" (agg_in_memory_table.take_start 0)
materialize = .to_dataframe
Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection
connection.close
file.delete
main = Test.Suite.run_main here.spec

View File

@ -1,42 +0,0 @@
from Standard.Base import all
from Standard.Database import all
import Standard.Test
import project.Common_Table_Spec
sqlite_spec =
Enso_Project.data.create_directory
file = Enso_Project.data / "sqlite_test.db"
file.delete_if_exists
connection = Database.open_sqlite_file file
name_counter = Ref.new 0
table_builder columns =
ix = Ref.get name_counter
Ref.put name_counter ix+1
name = "table_"+ix.to_text
quote x = '"' + x + '"'
# TODO this is a hack with no sanitization, just for testing; it should be removed when proper create table is supported by the library
column_definitions = columns.map col->
name = col.first
typ = case col.second of
Integer -> "INT"
_ -> Panic.throw "The provided type "+col.second+" is not currently supported by the test suite. It may need to be extended."
quote name + " " + typ
sql = "CREATE TABLE " + quote name + " (" + (column_definitions.join ", ") + ")"
Panic.rethrow <| connection.execute_update sql
table = Panic.rethrow <| connection.access_table name
row_number = columns.first.at 2 . length
0.up_to row_number . each ix->
row = columns.map col-> col.at 2 . at ix
table.insert row
table
Common_Table_Spec.spec "[SQLite] " table_builder supports_case_sensitive_columns=False
connection.close
file.delete
main = Test.Suite.run_main here.sqlite_spec

View File

@ -0,0 +1,24 @@
from Standard.Base import all
import Standard.Test
import project.Model_Spec
import project.Column_Spec
import project.Csv_Spec
import project.Json_Spec
import project.Table_Spec
import project.Spreadsheet_Spec
import project.Aggregate_Column_Spec
import project.Aggregate_Spec
in_memory_spec =
Column_Spec.spec
Csv_Spec.spec
Json_Spec.spec
Spreadsheet_Spec.spec
Table_Spec.spec
Model_Spec.spec
Aggregate_Column_Spec.spec
Aggregate_Spec.spec
main = Test.Suite.run_main here.in_memory_spec

View File

@ -2,23 +2,9 @@ from Standard.Base import all
import Standard.Test
import project.Database_Spec
import project.Model_Spec
import project.Column_Spec
import project.Csv_Spec
import project.Json_Spec
import project.Table_Spec
import project.Spreadsheet_Spec
import project.Aggregate_Column_Spec
import project.Aggregate_Spec
import project.In_Memory_Tests
import project.Database.Main as Database_Tests
main = Test.Suite.run_main <|
Column_Spec.spec
Csv_Spec.spec
Json_Spec.spec
Spreadsheet_Spec.spec
Table_Spec.spec
Database_Spec.sqlite_spec
Model_Spec.spec
Aggregate_Column_Spec.spec
Aggregate_Spec.spec
In_Memory_Tests.in_memory_spec
Database_Tests.databases_spec

View File

@ -638,7 +638,7 @@ spec =
table_builder columns =
Table.new <| columns.map description-> [description.at 0, description.at 2]
Common_Table_Spec.spec "" table_builder supports_case_sensitive_columns=True
Common_Table_Spec.spec "[In-Memory] " table_builder supports_case_sensitive_columns=True
Test.group "Use First Row As Names" <|
expect_column_names names table =