mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 10:42:05 +03:00
Aggregates in the Database library - MVP (#3353)
Implements infrastructure for new aggregations in the Database. It comes with only some basic aggregations and limited error-handling. More aggregations and problem handling will be added in subsequent PRs. # Important Notes This introduces basic aggregations using our existing codegen and sets-up our testing infrastructure to be able to use the same aggregate tests as in-memory backend for the database backends. Many aggregations are not yet implemented - they will be added in subsequent tasks. There are some TODOs left - they will be addressed in the next tasks.
This commit is contained in:
parent
bec7a58a28
commit
20be5516a5
4
.github/workflows/scala.yml
vendored
4
.github/workflows/scala.yml
vendored
@ -278,7 +278,6 @@ jobs:
|
||||
run: |
|
||||
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Table_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Database_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Geo_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Visualization_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Image_Tests
|
||||
@ -304,7 +303,6 @@ jobs:
|
||||
run: |
|
||||
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Table_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Database_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Geo_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Visualization_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Image_Tests
|
||||
@ -315,7 +313,6 @@ jobs:
|
||||
run: |
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Table_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Database_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Geo_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Visualization_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Image_Tests
|
||||
@ -341,7 +338,6 @@ jobs:
|
||||
run: |
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Table_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Database_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Geo_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Visualization_Tests
|
||||
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Image_Tests
|
||||
|
@ -5,8 +5,9 @@ import Standard.Database.Data.Internal.IR
|
||||
import Standard.Database.Data.Sql
|
||||
import Standard.Database.Data.Table as Database_Table
|
||||
import Standard.Table.Data.Table as Materialized_Table
|
||||
import Standard.Table.Data.Storage
|
||||
import Standard.Table.Internal.Java_Exports
|
||||
|
||||
import Standard.Database.Data.Internal.Base_Generator
|
||||
from Standard.Database.Data.Sql import Sql_Type
|
||||
|
||||
polyglot java import java.lang.UnsupportedOperationException
|
||||
@ -26,7 +27,8 @@ type Connection
|
||||
A Database connection using a JDBC driver.
|
||||
|
||||
Arguments:
|
||||
- java_connection: the resource managing the underlying JDBC connection.
|
||||
- connection_resource: the resource managing the underlying JDBC
|
||||
connection.
|
||||
- dialect: the dialect associated with the database we are connected to.
|
||||
|
||||
Allows accessing tables from a database.
|
||||
@ -40,7 +42,7 @@ type Connection
|
||||
Arguments:
|
||||
- name: name of the table to access
|
||||
access_table : Text -> Database_Table
|
||||
access_table name = here.wrap_sql_errors <|
|
||||
access_table name = here.handle_sql_errors <|
|
||||
columns = this.fetch_columns name
|
||||
Database_Table.make_table this name columns
|
||||
|
||||
@ -62,7 +64,7 @@ type Connection
|
||||
- expected_types: an optional array of expected types of each column;
|
||||
meant only for internal use.
|
||||
execute_query : Text | Sql.Statement -> Vector Sql.Sql_Type -> Materialized_Table =
|
||||
execute_query query expected_types=Nothing = here.wrap_sql_errors <|
|
||||
execute_query query expected_types=Nothing = here.handle_sql_errors <|
|
||||
Resource.bracket (this.prepare_statement query) .close stmt->
|
||||
rs = stmt.executeQuery
|
||||
metadata = rs.getMetaData
|
||||
@ -94,8 +96,9 @@ type Connection
|
||||
- query: either raw SQL code as Text or an instance of Sql.Statement
|
||||
representing the query to execute.
|
||||
execute_update : Text | Sql.Statement -> Integer
|
||||
execute_update query = here.wrap_sql_errors <|
|
||||
execute_update query = here.handle_sql_errors <|
|
||||
Resource.bracket (this.prepare_statement query) .close stmt->
|
||||
## FIXME USE CATCH HERE!
|
||||
result = Panic.recover Any stmt.executeLargeUpdate
|
||||
result.catch err-> case err of
|
||||
Polyglot_Error exc ->
|
||||
@ -115,15 +118,9 @@ type Connection
|
||||
prepare_statement query =
|
||||
go template holes=[] = Managed_Resource.with this.connection_resource java_connection->
|
||||
stmt = java_connection.prepareStatement template
|
||||
setup_error = Panic.recover Any <|
|
||||
holes.map_with_index ix-> obj->
|
||||
position = ix + 1
|
||||
case obj.first of
|
||||
Nothing -> stmt.setNull position obj.second.typeid
|
||||
_ -> stmt.setObject position obj.first
|
||||
setup_error.catch error->
|
||||
Panic.catch Any (here.set_statement_values stmt holes) caught_panic->
|
||||
stmt.close
|
||||
Panic.throw error
|
||||
Panic.throw caught_panic
|
||||
stmt
|
||||
case query of
|
||||
Text -> go query []
|
||||
@ -154,6 +151,52 @@ type Connection
|
||||
[name, Sql_Type typeid typename]
|
||||
Vector.new ncols resolve_column
|
||||
|
||||
## PRIVATE
|
||||
UNSTABLE
|
||||
This is a prototype function used in our test suites. It may change.
|
||||
|
||||
It creates a new table in the database with the given name (will fail if
|
||||
the table already existed), inserts the contents of the provided
|
||||
in-memory table and returns a handle to the newly created table.
|
||||
upload_table : Text -> Materialized_Table -> Integer -> Database_Table
|
||||
upload_table name table batch_size=1000 = Panic.recover Illegal_State_Error <| here.handle_sql_errors <|
|
||||
column_types = table.columns.map col-> here.default_storage_type col.storage_type
|
||||
column_names = table.columns.map .name
|
||||
col_makers = column_names.zip column_types name-> typ->
|
||||
Base_Generator.wrap_in_quotes name ++ Sql.code " " ++ Sql.code typ.name
|
||||
create_sql = (Sql.code "CREATE TABLE " ++ Base_Generator.wrap_in_quotes name ++ Sql.code " (" ++ (Sql.join ", " col_makers) ++ Sql.code ")").build
|
||||
Panic.rethrow <| this.execute_update create_sql
|
||||
db_table = Panic.rethrow <| this.access_table name
|
||||
|
||||
pairs = db_table.internal_columns.map col->
|
||||
[col.name, IR.Constant col.sql_type Nothing]
|
||||
db_types = pairs.map p-> p.second.sql_type
|
||||
insert_query = this.dialect.generate_sql <| IR.Insert name pairs
|
||||
insert_template = insert_query.prepare.first
|
||||
Managed_Resource.with this.connection_resource java_connection->
|
||||
default_autocommit = java_connection.getAutoCommit
|
||||
java_connection.setAutoCommit False
|
||||
Resource.bracket Nothing (_ -> java_connection.setAutoCommit default_autocommit) _->
|
||||
Resource.bracket (java_connection.prepareStatement insert_template) .close stmt->
|
||||
num_rows = table.row_count
|
||||
columns = table.columns
|
||||
check_rows updates_array expected_size =
|
||||
updates = Vector.Vector updates_array
|
||||
if updates.length != expected_size then Panic.throw <| Illegal_State_Error "The batch update unexpectedly affected "+updates.length.to_text+" rows instead of "+expected_size.to_text+"." else
|
||||
updates.each affected_rows->
|
||||
if affected_rows != 1 then
|
||||
Panic.throw <| Illegal_State_Error "A single update within the batch unexpectedly affected "+affected_rows.to_text+" rows."
|
||||
0.up_to num_rows . each row_id->
|
||||
values = columns.map col-> col.at row_id
|
||||
holes = values.zip db_types
|
||||
here.set_statement_values stmt holes
|
||||
stmt.addBatch
|
||||
if (row_id+1 % batch_size) == 0 then check_rows stmt.executeBatch batch_size
|
||||
if (num_rows % batch_size) != 0 then check_rows stmt.executeBatch (num_rows % batch_size)
|
||||
java_connection.commit
|
||||
db_table
|
||||
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Creates a builder for a column based on a provided SQL type, trying to infer
|
||||
@ -265,7 +308,7 @@ Unsupported_Dialect.to_display_text =
|
||||
- url: The URL to connect to.
|
||||
- properties: A vector of properties for the connection.
|
||||
create_jdbc_connection : Text -> Vector -> Connection
|
||||
create_jdbc_connection url properties = here.wrap_sql_errors <|
|
||||
create_jdbc_connection url properties = here.handle_sql_errors <|
|
||||
java_props = Properties.new
|
||||
properties.each pair->
|
||||
java_props.setProperty pair.first pair.second
|
||||
@ -300,13 +343,17 @@ type Sql_Error
|
||||
|
||||
Arguments:
|
||||
- java_exception: The underlying exception.
|
||||
type Sql_Error java_exception
|
||||
- related_query (optional): A string representation of a query that this
|
||||
error is related to.
|
||||
type Sql_Error java_exception related_query=Nothing
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Convert the SQL error to a textual representation.
|
||||
to_text : Text
|
||||
to_text = "There was an SQL error: " + this.java_exception.getMessage.to_text + "."
|
||||
to_text =
|
||||
query = if this.related_query.is_nothing.not then " [Query was: " + query + "]" else ""
|
||||
"There was an SQL error: " + this.java_exception.getMessage.to_text + "." + query
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -322,13 +369,17 @@ type Sql_Timeout_Error
|
||||
|
||||
Arguments:
|
||||
- java_exception: The underlying exception.
|
||||
type Sql_Timeout_Error java_exception
|
||||
- related_query (optional): A string representation of a query that this
|
||||
error is related to.
|
||||
type Sql_Timeout_Error java_exception related_query=Nothing
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Convert the timeout error to a textual representation.
|
||||
to_text : Text
|
||||
to_text = "The SQL connection timed out: " + this.java_exception.getMessage + "."
|
||||
to_text =
|
||||
query = if this.related_query.is_nothing.not then " [Query was: " + query + "]" else ""
|
||||
"The SQL connection timed out: " + this.java_exception.getMessage + "." + query
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -339,16 +390,50 @@ type Sql_Timeout_Error
|
||||
## PRIVATE
|
||||
|
||||
Executes `action` and returns its result, catching any panics and if they are
|
||||
coming from JDBC, wraps them with our own error types.
|
||||
coming from JDBC, wraps them with our own error types and returns as regular
|
||||
data-flow errors.
|
||||
|
||||
Arguments:
|
||||
- action: The computation to execute. This computation may throw SQL errors.
|
||||
wrap_sql_errors : Any -> Any ! Error
|
||||
wrap_sql_errors ~action =
|
||||
result = Panic.recover Any action
|
||||
result.catch err-> case err of
|
||||
Polyglot_Error exc ->
|
||||
transformed = if Java.is_instance exc SQLTimeoutException then Sql_Timeout_Error exc else
|
||||
if Java.is_instance exc SQLException then Sql_Error exc else err
|
||||
Error.throw transformed
|
||||
_ -> Error.throw err
|
||||
handle_sql_errors : Any -> Any ! (Sql_Error | Sql_Timeout_Error)
|
||||
handle_sql_errors ~action =
|
||||
Panic.recover [Sql_Error, Sql_Timeout_Error] <|
|
||||
here.wrap_sql_errors action
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Executes `action` and returns its result, converting any SQL exceptions into
|
||||
Enso panics.
|
||||
|
||||
Arguments:
|
||||
- action: The computation to execute. This computation may throw SQL errors.
|
||||
- related_query: A related query that is currently being run, to enrich the
|
||||
error metadata.
|
||||
wrap_sql_errors : Any -> (Text | Nothing) -> Any ! (Sql_Error | Sql_Timeout_Error)
|
||||
wrap_sql_errors ~action related_query=Nothing =
|
||||
Panic.catch SQLException action caught_panic->
|
||||
exc = caught_panic.payload.cause
|
||||
case Java.is_instance exc SQLTimeoutException of
|
||||
True -> Panic.throw (Sql_Timeout_Error exc related_query)
|
||||
False -> Panic.throw (Sql_Error exc related_query)
|
||||
|
||||
## PRIVATE
|
||||
Returns the default database type corresponding to an in-memory storage
|
||||
type.
|
||||
default_storage_type : Storage.Type -> Sql_Type
|
||||
default_storage_type storage_type = case storage_type of
|
||||
Storage.Text -> Sql_Type.text
|
||||
Storage.Integer -> Sql_Type.integer
|
||||
Storage.Decimal -> Sql_Type.double
|
||||
Storage.Boolean -> Sql_Type.boolean
|
||||
Storage.Any -> Sql_Type.blob
|
||||
|
||||
## PRIVATE
|
||||
Sets values inside of a prepared statement.
|
||||
set_statement_values : PreparedStatement -> Vector (Pair Any Sql_Type) -> Nothing
|
||||
set_statement_values stmt holes =
|
||||
holes.map_with_index ix-> obj->
|
||||
position = ix + 1
|
||||
case obj.first of
|
||||
Nothing -> stmt.setNull position obj.second.typeid
|
||||
_ -> stmt.setObject position obj.first
|
||||
|
@ -510,7 +510,7 @@ type Column
|
||||
as_internal : IR.Internal_Column
|
||||
as_internal = IR.Internal_Column this.name this.sql_type this.expression
|
||||
|
||||
type Aggregate_Column
|
||||
type Aggregate_Column_Builder
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -524,10 +524,10 @@ type Aggregate_Column
|
||||
- context: The SQl context in which the column exists.
|
||||
|
||||
Allows performing aggregation operations on the contained values.
|
||||
# type Aggregate_Column (name : Text) (connection : Connection)
|
||||
# type Aggregate_Column_Builder (name : Text) (connection : Connection)
|
||||
# (sql_type : Sql_Type) (expression : IR.Expression)
|
||||
# (context : IR.Context)
|
||||
type Aggregate_Column name connection sql_type expression context
|
||||
type Aggregate_Column_Builder name connection sql_type expression context
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
|
@ -1,7 +1,11 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Base.Error.Extensions as Errors
|
||||
import Standard.Table.Data.Aggregate_Column
|
||||
import Standard.Database.Data.Sql
|
||||
import Standard.Database.Data.Internal.Base_Generator
|
||||
import Standard.Database.Data.Dialect.Postgres
|
||||
import Standard.Database.Data.Dialect.Redshift
|
||||
import Standard.Database.Data.Dialect.Sqlite as Sqlite_Module
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -10,103 +14,39 @@ import Standard.Database.Data.Internal.Base_Generator
|
||||
It encapsulates dialect-specific code generation details allowing us to
|
||||
support differing SQL dialects.
|
||||
type Dialect
|
||||
## PRIVATE
|
||||
This is a fake constructor to make the compiler accept this type
|
||||
definition. It can and should be removed once interface definitions are
|
||||
allowed.
|
||||
type Dialect
|
||||
## PRIVATE
|
||||
Name of the dialect.
|
||||
name : Text
|
||||
name = Errors.unimplemented "This is an interface only."
|
||||
|
||||
|
||||
## PRIVATE
|
||||
A function which generates SQL code from the internal representation
|
||||
according to the specific dialect.
|
||||
generate_sql : Query -> Sql.Statement
|
||||
generate_sql = Errors.unimplemented "This is an interface only."
|
||||
|
||||
Represents a specific SQL dialect.
|
||||
## PRIVATE
|
||||
Deduces the result type for an aggregation operation.
|
||||
|
||||
Arguments:
|
||||
- name: name of the dialect.
|
||||
- generate_sql: a function which generates SQL code from the internal
|
||||
representation according to the specific dialect.
|
||||
|
||||
It encapsulates dialect-specific code generation details allowing us to
|
||||
support differing SQL dialects.
|
||||
# type Dialect (name : Text) (generate_sql : Query -> Sql.Statement)
|
||||
type Dialect name generate_sql
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The dialect of PostgreSQL databases.
|
||||
postgresql : Dialect
|
||||
postgresql =
|
||||
starts_with arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation starts_with")
|
||||
ends_with arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation ends_with")
|
||||
contains arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ", '%')")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation contains")
|
||||
my_mappings = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]
|
||||
dialect = Base_Generator.base_dialect . extend_with my_mappings
|
||||
Dialect "postgresql" (query -> Base_Generator.generate_query dialect query . build)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The dialect of SQLite databases.
|
||||
sqlite : Dialect
|
||||
sqlite =
|
||||
starts_with arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE (") ++ sub ++ (Sql.code " || '%')")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation starts_with")
|
||||
ends_with arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code ")")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation ends_with")
|
||||
contains arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code " || '%')")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation contains")
|
||||
my_mappings = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]
|
||||
dialect = Base_Generator.base_dialect . extend_with my_mappings
|
||||
Dialect "sqlite" (query -> Base_Generator.generate_query dialect query . build)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The dialect for Redshift connections.
|
||||
redshift : Dialect
|
||||
redshift = case here.postgresql of
|
||||
Dialect _ builder -> Dialect "redshift" builder
|
||||
The provided aggregate is assumed to contain only already resolved columns.
|
||||
You may need to transform it with `resolve_columns` first.
|
||||
resolve_target_sql_type : Aggregate_Column -> Sql_Type
|
||||
resolve_target_sql_type = Errors.unimplemented "This is an interface only."
|
||||
|
||||
## PRIVATE
|
||||
|
||||
A vector of SQL dialects supported by the Database library.
|
||||
supported_dialects : Vector Dialect
|
||||
supported_dialects = [here.postgresql, here.sqlite, here.redshift]
|
||||
supported_dialects = [Postgres.postgresql, Sqlite_Module.sqlite, Redshift.redshift]
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The dialect of SQLite databases.
|
||||
sqlite : Dialect
|
||||
sqlite = Sqlite_Module.sqlite
|
||||
|
@ -0,0 +1,124 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
from Standard.Database.Data.Sql import Sql_Type
|
||||
import Standard.Database.Data.Dialect
|
||||
import Standard.Database.Data.Internal.Base_Generator
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The dialect of PostgreSQL databases.
|
||||
postgresql : Dialect
|
||||
postgresql =
|
||||
Postgresql_Dialect here.make_internal_generator_dialect
|
||||
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The dialect of PostgreSQL databases.
|
||||
type Postgresql_Dialect
|
||||
## PRIVATE
|
||||
|
||||
The dialect of PostgreSQL databases.
|
||||
type Postgresql_Dialect internal_generator_dialect
|
||||
|
||||
## PRIVATE
|
||||
Name of the dialect.
|
||||
name : Text
|
||||
name = "postgresql"
|
||||
|
||||
## PRIVATE
|
||||
A function which generates SQL code from the internal representation
|
||||
according to the specific dialect.
|
||||
generate_sql : Query -> Sql.Statement
|
||||
generate_sql query =
|
||||
Base_Generator.generate_query this.internal_generator_dialect query . build
|
||||
|
||||
## PRIVATE
|
||||
Deduces the result type for an aggregation operation.
|
||||
|
||||
The provided aggregate is assumed to contain only already resolved columns.
|
||||
You may need to transform it with `resolve_columns` first.
|
||||
resolve_target_sql_type : Aggregate_Column -> Sql_Type
|
||||
resolve_target_sql_type aggregate = here.resolve_target_sql_type aggregate
|
||||
|
||||
## PRIVATE
|
||||
make_internal_generator_dialect =
|
||||
starts_with arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation starts_with")
|
||||
ends_with arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation ends_with")
|
||||
contains arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ", '%')")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation contains")
|
||||
text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]
|
||||
counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, here.agg_count_distinct_including_nulls]
|
||||
stats = [here.agg_median]
|
||||
my_mappings = text + counts + stats
|
||||
Base_Generator.base_dialect . extend_with my_mappings
|
||||
|
||||
## PRIVATE
|
||||
The provided aggregate is assumed to contain only already resolved columns.
|
||||
You may need to transform it with `resolve_columns` first.
|
||||
resolve_target_sql_type aggregate = case aggregate of
|
||||
Group_By c _ -> c.sql_type
|
||||
Count _ -> Sql_Type.bigint
|
||||
Count_Distinct _ _ _ -> Sql_Type.bigint
|
||||
Count_Not_Nothing _ _ -> Sql_Type.bigint
|
||||
Count_Nothing _ _ -> Sql_Type.bigint
|
||||
Count_Not_Empty _ _ -> Sql_Type.bigint
|
||||
Count_Empty _ _ -> Sql_Type.bigint
|
||||
Percentile _ _ _ -> Sql_Type.double
|
||||
Mode c _ -> c.sql_type
|
||||
First c _ _ _ -> c.sql_type
|
||||
Last c _ _ _ -> c.sql_type
|
||||
Maximum c _ -> c.sql_type
|
||||
Minimum c _ -> c.sql_type
|
||||
Shortest c _ -> c.sql_type
|
||||
Longest c _ -> c.sql_type
|
||||
Standard_Deviation _ _ _ -> Sql_Type.double
|
||||
Concatenate _ _ _ _ _ _ -> Sql_Type.text
|
||||
## TODO [RW] revise these
|
||||
Sum _ _ -> Sql_Type.numeric # TODO can also be bigint, real, double
|
||||
Average _ _ -> Sql_Type.numeric # TODO can be double sometimes
|
||||
Median _ _ -> Sql_Type.numeric # TODO can be double sometimes
|
||||
|
||||
## PRIVATE
|
||||
agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg->
|
||||
Sql.code "COUNT(CASE WHEN " ++ arg.paren ++ Sql.code " IS NULL THEN 1 END)"
|
||||
|
||||
## PRIVATE
|
||||
agg_count_empty = Base_Generator.lift_unary_op "COUNT_EMPTY" arg->
|
||||
Sql.code "COUNT(CASE WHEN (" ++ arg.paren ++ Sql.code " IS NULL) OR (" ++ arg.paren ++ Sql.code " = '') THEN 1 END)"
|
||||
|
||||
## PRIVATE
|
||||
agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg->
|
||||
Sql.code "COUNT(CASE WHEN (" ++ arg.paren ++ Sql.code " IS NOT NULL) AND (" ++ arg.paren ++ Sql.code " != '') THEN 1 END)"
|
||||
|
||||
## PRIVATE
|
||||
agg_count_distinct_including_nulls = Base_Generator.lift_unary_op "COUNT_DISTINCT_INCLUDE_NULL" arg->
|
||||
Sql.code "(COUNT(DISTINCT " ++ arg.paren ++ Sql.code ") + CASE WHEN COUNT(CASE WHEN " ++ arg.paren ++ Sql.code " IS NULL THEN 1 END) > 0 THEN 1 ELSE 0 END)"
|
||||
|
||||
## PRIVATE
|
||||
agg_median = Base_Generator.lift_unary_op "MEDIAN" arg->
|
||||
Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")"
|
@ -0,0 +1,44 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Table.Data.Aggregate_Column
|
||||
import Standard.Database.Data.Sql
|
||||
import Standard.Database.Data.Dialect
|
||||
import Standard.Database.Data.Dialect.Postgres
|
||||
import Standard.Database.Data.Internal.Base_Generator
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The dialect for Redshift connections.
|
||||
redshift : Dialect
|
||||
redshift =
|
||||
Redshift_Dialect Postgres.make_internal_generator_dialect
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The dialect for Redshift connections.
|
||||
type Redshift_Dialect
|
||||
## PRIVATE
|
||||
|
||||
The dialect for Redshift connections.
|
||||
type Redshift_Dialect internal_generator_dialect
|
||||
|
||||
## PRIVATE
|
||||
Name of the dialect.
|
||||
name : Text
|
||||
name = "redshift"
|
||||
|
||||
## PRIVATE
|
||||
A function which generates SQL code from the internal representation
|
||||
according to the specific dialect.
|
||||
generate_sql : Query -> Sql.Statement
|
||||
generate_sql query =
|
||||
Base_Generator.generate_query this.internal_generator_dialect query . build
|
||||
|
||||
## PRIVATE
|
||||
Deduces the result type for an aggregation operation.
|
||||
|
||||
The provided aggregate is assumed to contain only already resolved columns.
|
||||
You may need to transform it with `resolve_columns` first.
|
||||
resolve_target_sql_type : Aggregate_Column -> Sql_Type
|
||||
resolve_target_sql_type aggregate =
|
||||
Postgres.resolve_target_sql_type aggregate
|
@ -0,0 +1,118 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
from Standard.Database.Data.Sql import Sql_Type
|
||||
import Standard.Database.Data.Dialect
|
||||
import Standard.Database.Data.Internal.Base_Generator
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The dialect of SQLite databases.
|
||||
sqlite : Dialect
|
||||
sqlite =
|
||||
Sqlite_Dialect here.make_internal_generator_dialect
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The dialect of SQLite databases.
|
||||
type Sqlite_Dialect
|
||||
## PRIVATE
|
||||
|
||||
The dialect of SQLite databases.
|
||||
type Sqlite_Dialect internal_generator_dialect
|
||||
|
||||
## PRIVATE
|
||||
Name of the dialect.
|
||||
name : Text
|
||||
name = "sqlite"
|
||||
|
||||
## PRIVATE
|
||||
A function which generates SQL code from the internal representation
|
||||
according to the specific dialect.
|
||||
generate_sql : Query -> Sql.Statement
|
||||
generate_sql query =
|
||||
Base_Generator.generate_query this.internal_generator_dialect query . build
|
||||
|
||||
## PRIVATE
|
||||
Deduces the result type for an aggregation operation.
|
||||
|
||||
The provided aggregate is assumed to contain only already resolved columns.
|
||||
You may need to transform it with `resolve_columns` first.
|
||||
resolve_target_sql_type : Aggregate_Column -> Sql_Type
|
||||
resolve_target_sql_type aggregate = here.resolve_target_sql_type aggregate
|
||||
|
||||
## PRIVATE
|
||||
make_internal_generator_dialect =
|
||||
starts_with arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE (") ++ sub ++ (Sql.code " || '%')")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation starts_with")
|
||||
ends_with arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code ")")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation ends_with")
|
||||
contains arguments =
|
||||
case arguments.length == 2 of
|
||||
True ->
|
||||
str = arguments.at 0
|
||||
sub = arguments.at 1
|
||||
res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code " || '%')")
|
||||
res.paren
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation contains")
|
||||
text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]
|
||||
counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, here.agg_count_distinct_including_nulls]
|
||||
my_mappings = text + counts
|
||||
Base_Generator.base_dialect . extend_with my_mappings
|
||||
|
||||
## PRIVATE
|
||||
The provided aggregate is assumed to contain only already resolved columns.
|
||||
You may need to transform it with `resolve_columns` first.
|
||||
resolve_target_sql_type aggregate = case aggregate of
|
||||
Group_By c _ -> c.sql_type
|
||||
Count _ -> Sql_Type.integer
|
||||
Count_Distinct _ _ _ -> Sql_Type.integer
|
||||
Count_Not_Nothing _ _ -> Sql_Type.integer
|
||||
Count_Nothing _ _ -> Sql_Type.integer
|
||||
Count_Not_Empty _ _ -> Sql_Type.integer
|
||||
Count_Empty _ _ -> Sql_Type.integer
|
||||
Percentile _ _ _ -> Sql_Type.real
|
||||
Mode c _ -> c.sql_type
|
||||
First c _ _ _ -> c.sql_type
|
||||
Last c _ _ _ -> c.sql_type
|
||||
Maximum c _ -> c.sql_type
|
||||
Minimum c _ -> c.sql_type
|
||||
Shortest c _ -> c.sql_type
|
||||
Longest c _ -> c.sql_type
|
||||
Standard_Deviation _ _ _ -> Sql_Type.real
|
||||
Concatenate _ _ _ _ _ _ -> Sql_Type.text
|
||||
## TODO revise these
|
||||
Sum c _ -> c.sql_type
|
||||
Average _ _ -> Sql_Type.real
|
||||
Median _ _ -> Sql_Type.real
|
||||
|
||||
## PRIVATE
|
||||
agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg->
|
||||
Sql.code "COALESCE(SUM(" ++ arg.paren ++ Sql.code " IS NULL), 0)"
|
||||
|
||||
## PRIVATE
|
||||
agg_count_empty = Base_Generator.lift_unary_op "COUNT_EMPTY" arg->
|
||||
Sql.code "COALESCE(SUM((" ++ arg.paren ++ Sql.code " IS NULL) OR (" ++ arg.paren ++ Sql.code " == '')), 0)"
|
||||
|
||||
## PRIVATE
|
||||
agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg->
|
||||
Sql.code "COALESCE(SUM((" ++ arg.paren ++ Sql.code " IS NOT NULL) AND (" ++ arg.paren ++ Sql.code " != '')), 0)"
|
||||
|
||||
## PRIVATE
|
||||
agg_count_distinct_including_nulls = Base_Generator.lift_unary_op "COUNT_DISTINCT_INCLUDE_NULL" arg->
|
||||
Sql.code "(COUNT(DISTINCT " ++ arg.paren ++ Sql.code ") + CASE WHEN SUM(" ++ arg.paren ++ Sql.code " IS NULL) > 0 THEN 1 ELSE 0 END)"
|
@ -0,0 +1,47 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
import Standard.Database.Data.Internal.IR
|
||||
from Standard.Database.Data.Sql import Sql_Type
|
||||
|
||||
make_aggregate_column : Table -> Aggregate_Column -> IR.Internal_Column
|
||||
make_aggregate_column table aggregate =
|
||||
new_name = aggregate.column_name table
|
||||
sql_type = table.connection.dialect.resolve_target_sql_type aggregate
|
||||
expression = here.make_expression aggregate
|
||||
IR.Internal_Column new_name sql_type expression
|
||||
|
||||
make_expression : Aggregate_Column -> IR.Expression
|
||||
make_expression aggregate = case aggregate of
|
||||
Group_By c _ -> c.expression
|
||||
Count _ -> IR.Operation "COUNT_ROWS" []
|
||||
Count_Distinct columns _ ignore_nothing -> case columns.length > 1 of
|
||||
True ->
|
||||
## TODO
|
||||
Error.throw (Illegal_State_Error "Multi column distinct is not implemented yet.")
|
||||
False ->
|
||||
column = columns.first
|
||||
case ignore_nothing of
|
||||
True -> IR.Operation "COUNT_DISTINCT" [column.expression]
|
||||
False -> IR.Operation "COUNT_DISTINCT_INCLUDE_NULL" [column.expression]
|
||||
Count_Not_Nothing c _ -> IR.Operation "COUNT" [c.expression]
|
||||
Count_Nothing c _ -> IR.Operation "COUNT_IS_NULL" [c.expression]
|
||||
Count_Not_Empty c _ -> IR.Operation "COUNT_NOT_EMPTY" [c.expression]
|
||||
Count_Empty c _ -> IR.Operation "COUNT_EMPTY" [c.expression]
|
||||
Percentile p c _ -> IR.Operation "PERCENTILE" [IR.Constant Sql_Type.double p, c.expression]
|
||||
Mode c _ -> IR.Operation "MODE" [c.expression]
|
||||
First _ _ _ _ -> Error.throw (Illegal_State_Error "Not implemented yet.")
|
||||
Last _ _ _ _ -> Error.throw (Illegal_State_Error "Not implemented yet.")
|
||||
Maximum c _ -> IR.Operation "MAX" [c.expression]
|
||||
Minimum c _ -> IR.Operation "MIN" [c.expression]
|
||||
Shortest c _ -> IR.Operation "MIN" [c.expression]
|
||||
Longest c _ -> IR.Operation "MAX" [c.expression]
|
||||
Standard_Deviation c _ population ->
|
||||
## TODO!
|
||||
_ = population
|
||||
IR.Operation "STD_DEV" [c.expression]
|
||||
Concatenate c _ _ _ _ _ ->
|
||||
IR.Operation "CONCAT" [c.expression]
|
||||
Sum c _ -> IR.Operation "SUM" [c.expression]
|
||||
Average c _ -> IR.Operation "AVG" [c.expression]
|
||||
Median c _ -> IR.Operation "MEDIAN" [c.expression]
|
@ -67,6 +67,22 @@ make_unary_op name =
|
||||
False ->
|
||||
Error.throw ("Invalid amount of arguments for operation " + name)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
A helper function to create an unary operation from a function.
|
||||
|
||||
Arguments:
|
||||
- name: Name of the operation, used for error reporting.
|
||||
- function: A function taking exactly one argument: the generated SQL code
|
||||
for the argument of the operation, and returning the generated SQL code for
|
||||
the whole operation.
|
||||
lift_unary_op : Text -> (Sql.Builder -> Sql.Builder) -> [Text, (Vector Sql.Builder -> Sql.Builder)]
|
||||
lift_unary_op name function =
|
||||
generator = arguments -> case arguments.length == 1 of
|
||||
False -> Error.throw ("Invalid amount of arguments for operation " + name + ".")
|
||||
True -> function (arguments.at 0)
|
||||
[name, generator]
|
||||
|
||||
## PRIVATE
|
||||
|
||||
A helper function to create a unary operator which is added to the right of
|
||||
@ -128,17 +144,22 @@ wrap_in_quotes identifier =
|
||||
It is a base to help creating concrete dialects. It can be extended or
|
||||
completely overridden.
|
||||
base_dialect =
|
||||
bin = here.make_binary_op
|
||||
unary = here.make_unary_op
|
||||
fun = here.make_function
|
||||
arith = [["+", bin "+"], ["-", bin "-"], ["*", bin "*"], ["/", bin "/"]]
|
||||
logic = [["AND", bin "AND"], ["OR", bin "OR"], ["NOT", unary "NOT"]]
|
||||
compare = [["=", bin "="], ["!=", bin "!="], ["<", bin "<"], [">", bin ">"], ["<=", bin "<="], [">=", bin ">="]]
|
||||
agg = [["COUNT", fun "COUNT"], ["MAX", fun "MAX"], ["MIN", fun "MIN"], ["AVG", fun "AVG"], ["SUM", fun "SUM"], ["COUNT_ROWS", here.make_constant "COUNT(*)"]]
|
||||
nulls = [["ISNULL", here.make_right_unary_op "IS NULL"], ["FILLNULL", fun "COALESCE"]]
|
||||
base_map = Map.from_vector (arith + logic + compare + agg + nulls)
|
||||
bin = name -> [name, here.make_binary_op name]
|
||||
unary = name -> [name, here.make_unary_op name]
|
||||
fun = name -> [name, here.make_function name]
|
||||
arith = [bin "+", bin "-", bin "*", bin "/"]
|
||||
logic = [bin "AND", bin "OR", unary "NOT"]
|
||||
compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">="]
|
||||
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
|
||||
counts = [fun "COUNT", ["COUNT_ROWS", here.make_constant "COUNT(*)"], here.count_distinct]
|
||||
nulls = [["ISNULL", here.make_right_unary_op "IS NULL"], ["FILLNULL", here.make_function "COALESCE"]]
|
||||
base_map = Map.from_vector (arith + logic + compare + agg + nulls + counts)
|
||||
Internal_Dialect base_map here.wrap_in_quotes
|
||||
|
||||
## PRIVATE
|
||||
count_distinct = here.lift_unary_op "COUNT_DISTINCT" arg->
|
||||
Sql.code "COUNT(DISTINCT " ++ arg.paren ++ Sql.code ")"
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Builds code for an expression.
|
||||
@ -153,7 +174,7 @@ generate_expression dialect expr = case expr of
|
||||
dialect.wrap_identifier origin ++ dot ++ dialect.wrap_identifier name
|
||||
IR.Constant sql_type value -> Sql.interpolation sql_type value
|
||||
IR.Operation kind arguments ->
|
||||
op = dialect.operation_map.get_or_else kind (Error.throw <| Illegal_State_Error "Operation "+op+" is not supported.")
|
||||
op = dialect.operation_map.get_or_else kind (Error.throw <| Illegal_State_Error "Operation "+kind+" is not supported.")
|
||||
parsed_args = arguments.map (here.generate_expression dialect)
|
||||
op parsed_args
|
||||
|
||||
|
@ -67,10 +67,22 @@ type Sql_Type
|
||||
integer : Sql_Type
|
||||
integer = Sql_Type Types.INTEGER "INTEGER"
|
||||
|
||||
## The SQL representation of the `BIGINT` type.
|
||||
bigint : Sql_Type
|
||||
bigint = Sql_Type Types.BIGINT "BIGINT"
|
||||
|
||||
## The SQL type representing decimal numbers.
|
||||
decimal : Sql_Type
|
||||
decimal = Sql_Type Types.DECIMAL "DECIMAL"
|
||||
|
||||
## The SQL type representing decimal numbers.
|
||||
real : Sql_Type
|
||||
real = Sql_Type Types.REAL "REAL"
|
||||
|
||||
## The SQL type representing double-precision floating-point numbers.
|
||||
double : Sql_Type
|
||||
double = Sql_Type Types.DOUBLE "DOUBLE PRECISION"
|
||||
|
||||
## The SQL type representing a general numeric type.
|
||||
numeric : Sql_Type
|
||||
numeric = Sql_Type Types.NUMERIC "NUMERIC"
|
||||
@ -79,6 +91,17 @@ type Sql_Type
|
||||
varchar : Sql_Type
|
||||
varchar = Sql_Type Types.VARCHAR "VARCHAR"
|
||||
|
||||
## UNSTABLE
|
||||
The SQL type representing one of the suppported textual types.
|
||||
|
||||
It seems that JDBC treats the `TEXT` and `VARCHAR` types as interchangeable.
|
||||
text : Sql_Type
|
||||
text = Sql_Type Types.VARCHAR "VARCHAR"
|
||||
|
||||
## The SQL type representing a binary object.
|
||||
blob : Sql_Type
|
||||
blob = Sql_Type Types.BLOB "BLOB"
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Returns True if this type represents an integer.
|
||||
@ -87,7 +110,7 @@ type Sql_Type
|
||||
non-standard ones.
|
||||
is_definitely_integer : Boolean
|
||||
is_definitely_integer =
|
||||
[Types.INTEGER, Types.SMALLINT, Types.TINYINT].contains this.typeid
|
||||
[Types.INTEGER, Types.BIGINT, Types.SMALLINT, Types.TINYINT].contains this.typeid
|
||||
|
||||
## PRIVATE
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Database.Data.Internal.Helpers
|
||||
import Standard.Database.Data.Internal.Aggregate_Helper
|
||||
import Standard.Database.Data.Internal.IR
|
||||
import Standard.Database.Data.Sql
|
||||
import Standard.Table.Data.Column as Materialized_Column
|
||||
@ -8,7 +9,8 @@ import Standard.Table.Data.Table as Materialized_Table
|
||||
import Standard.Table.Internal.Java_Exports
|
||||
import Standard.Table.Internal.Table_Helpers
|
||||
|
||||
from Standard.Database.Data.Column as Column_Module import Column, Aggregate_Column
|
||||
import Standard.Table.Data.Aggregate_Column
|
||||
from Standard.Database.Data.Column as Column_Module import Column, Aggregate_Column_Builder
|
||||
from Standard.Database.Data.Internal.IR import Internal_Column
|
||||
from Standard.Table.Data.Table import No_Such_Column_Error
|
||||
from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
|
||||
@ -29,7 +31,7 @@ type Table
|
||||
|
||||
Arguments:
|
||||
- name: The name of the table.
|
||||
- connection: The connection with whicg the table is associated.
|
||||
- connection: The connection with which the table is associated.
|
||||
- internal_columns: The internal representation of the table columns.
|
||||
- context: The context associated with this table.
|
||||
# type Table (name : Text) (connection : Connection)
|
||||
@ -458,7 +460,7 @@ type Table
|
||||
rules specified in the `by` argument will default to this setting,
|
||||
unless specified in the rule.
|
||||
- missing_last: Specifies the default placement of missing values when
|
||||
compared to non-missing ones. This setting may be overriden by the
|
||||
compared to non-missing ones. This setting may be overridden by the
|
||||
particular rules of the `by` argument. Note thet this argument is
|
||||
independent from `order`, i.e. missing values will always be sorted
|
||||
according to this rule, ignoring the ascending / descending setting.
|
||||
@ -639,6 +641,23 @@ type Table
|
||||
new_ctx = this.context.set_groups exprs . set_index cols
|
||||
Aggregate_Table this.name this.connection this.internal_columns new_ctx
|
||||
|
||||
## Prototype Group By function
|
||||
aggregate : [Aggregate_Column] -> Problem_Behavior -> Table
|
||||
aggregate columns (on_problems=Report_Warning) =
|
||||
## TODO handle errors here and turn them into warnings where applicable
|
||||
_ = on_problems
|
||||
resolved_aggregates = columns.map (_.resolve_columns this)
|
||||
# TODO handling duplicate names etc. is to be done as part of https://www.pivotaltracker.com/story/show/181420794
|
||||
# Grouping Key
|
||||
is_a_key c = case c of
|
||||
Aggregate_Column.Group_By _ _ -> True
|
||||
_ -> False
|
||||
key_columns = resolved_aggregates.filter is_a_key . map .column
|
||||
key_expressions = key_columns.map .expression
|
||||
new_ctx = this.context.set_groups key_expressions
|
||||
new_columns = resolved_aggregates.map (Aggregate_Helper.make_aggregate_column this)
|
||||
this.updated_context_and_columns new_ctx new_columns
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns a new Table without rows that contained missing values in any of
|
||||
@ -762,8 +781,8 @@ type Table
|
||||
|
||||
Arguments:
|
||||
- columns: The columns with which to update this table.
|
||||
updated_columns : Vector Colums -> Table
|
||||
updated_columns columns = Table this.name this.connection columns this.context
|
||||
updated_columns : Vector Internal_Column -> Table
|
||||
updated_columns internal_columns = Table this.name this.connection internal_columns this.context
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -774,6 +793,16 @@ type Table
|
||||
updated_context : Context -> Table
|
||||
updated_context ctx = Table this.name this.connection this.internal_columns ctx
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Returns a copy of this table with updated context and columns.
|
||||
|
||||
Arguments:
|
||||
- ctx: The new context for this table.
|
||||
- internal_columns: The new columns to include in the table.
|
||||
updated_context_and_columns : Context -> Vector Internal_Column -> Table
|
||||
updated_context_and_columns ctx internal_columns = Table this.name this.connection internal_columns ctx
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Returns a vector that contains first the internal representations of all
|
||||
@ -817,7 +846,7 @@ type Aggregate_Table
|
||||
|
||||
Arguments:
|
||||
- name: The name of the table.
|
||||
- connection: The connection with whicg the table is associated.
|
||||
- connection: The connection with which the table is associated.
|
||||
- internal_columns: The internal representation of the table columns.
|
||||
- context: The context associated with this table.
|
||||
# type Aggregate_Table (name : Text) (connection : Connection)
|
||||
@ -857,9 +886,9 @@ type Aggregate_Table
|
||||
|
||||
Arguments:
|
||||
- internal: The internal column to make into an aggregate column.
|
||||
make_column : Internal_Column -> Aggregate_Column
|
||||
make_column : Internal_Column -> Aggregate_Column_Builder
|
||||
make_column internal =
|
||||
Aggregate_Column internal.name this.connection internal.sql_type internal.expression this.context
|
||||
Aggregate_Column_Builder internal.name this.connection internal.sql_type internal.expression this.context
|
||||
|
||||
## PRIVATE
|
||||
|
||||
|
@ -1,6 +1,9 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Data.Column as Column_Module import Column
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Name, By_Index, By_Column
|
||||
import Standard.Table.Internal.Table_Helpers
|
||||
import Standard.Base.Error.Problem_Behavior
|
||||
|
||||
## Defines an Aggregate Column
|
||||
type Aggregate_Column
|
||||
@ -20,7 +23,7 @@ type Aggregate_Column
|
||||
- columns: either a single or set of columns (specified by name, index or Column object) to count across.
|
||||
- name: name of new column.
|
||||
- ignore_nothing: if all values are Nothing won't be included.
|
||||
type Count_Distinct (columns:Column|Text|Integer|[(Column|Text|Integer)]) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False)
|
||||
type Count_Distinct (columns:Column|Text|Integer|Column_Selector) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False)
|
||||
|
||||
## ALIAS Count_Not_Null
|
||||
|
||||
@ -192,10 +195,65 @@ type Aggregate_Column
|
||||
Given a column reference resolve to the underlying column
|
||||
resolve_column : Table->(Column|Text|Integer)->Column
|
||||
resolve_column table column =
|
||||
## TODO this should be able to handle problems too!
|
||||
case column of
|
||||
Column _ -> table.at (column.name)
|
||||
Text -> table.at column
|
||||
Integer -> table.columns.at column
|
||||
## A wildcard makes this work both with In-Memory and Database table columns.
|
||||
_ -> table.at (column.name)
|
||||
|
||||
## PRIVATE
|
||||
Returns a copy of this aggregate where all column descriptors (names,
|
||||
indices or column references potentially from a different table) are
|
||||
replaced with column references from the provided table.
|
||||
|
||||
This preprocess step is required by some helper function, to avoid having
|
||||
to pass the table reference and resolve the column descriptors all the
|
||||
time.
|
||||
|
||||
If some columns cannot be resolved, a dataflow error will be returned.
|
||||
Higher-level methods can then handle this error by turning it into a
|
||||
warning and ignoring the column.
|
||||
resolve_columns : Table -> Aggregate_Column
|
||||
resolve_columns table =
|
||||
resolve : (Integer|Text|Column) -> Column
|
||||
resolve c = this.resolve_column table c
|
||||
resolve_selector_to_vector : Column_Selector -> [Column]
|
||||
resolve_selector_to_vector selector =
|
||||
Table_Helpers.select_columns_helper table.columns selector reorder=False on_problems=Problem_Behavior.Report_Error
|
||||
resolve_selector_or_nothing selector = case selector of
|
||||
Nothing -> Nothing
|
||||
_ -> resolve_selector_to_vector selector
|
||||
case this of
|
||||
Group_By c new_name -> Group_By (resolve c) new_name
|
||||
Count new_name -> Count new_name
|
||||
Count_Distinct c new_name ignore_nothing ->
|
||||
new_c = case c of
|
||||
## TODO once we have sum type pattern matching this could be replaced with a single branch
|
||||
By_Name _ _ -> resolve_selector_to_vector c
|
||||
By_Index _ -> resolve_selector_to_vector c
|
||||
By_Column _ -> resolve_selector_to_vector c
|
||||
## TODO this is a temporary fix, remove it
|
||||
Vector.Vector _ -> c.map resolve
|
||||
_ -> [resolve c]
|
||||
Count_Distinct new_c new_name ignore_nothing
|
||||
Count_Not_Nothing c new_name -> Count_Not_Nothing (resolve c) new_name
|
||||
Count_Nothing c new_name -> Count_Nothing (resolve c) new_name
|
||||
Count_Not_Empty c new_name -> Count_Not_Empty (resolve c) new_name
|
||||
Count_Empty c new_name -> Count_Empty (resolve c) new_name
|
||||
Sum c new_name -> Sum (resolve c) new_name
|
||||
Average c new_name -> Average (resolve c) new_name
|
||||
Median c new_name -> Median (resolve c) new_name
|
||||
Percentile p c new_name -> Percentile p (resolve c) new_name
|
||||
Mode c new_name -> Mode (resolve c) new_name
|
||||
Standard_Deviation c new_name population -> Standard_Deviation (resolve c) new_name population
|
||||
Concatenate c new_name separator prefix suffix quote_char -> Concatenate (resolve c) new_name separator prefix suffix quote_char
|
||||
First c new_name ignore_nothing order_by -> First (resolve c) new_name ignore_nothing (resolve_selector_or_nothing order_by)
|
||||
Last c new_name ignore_nothing order_by -> Last (resolve c) new_name ignore_nothing (resolve_selector_or_nothing order_by)
|
||||
Maximum c new_name -> Maximum (resolve c) new_name
|
||||
Minimum c new_name -> Minimum (resolve c) new_name
|
||||
Shortest c new_name -> Shortest (resolve c) new_name
|
||||
Longest c new_name -> Longest (resolve c) new_name
|
||||
|
||||
|
||||
## Occurs when cannot aggregate a column
|
||||
|
@ -59,6 +59,7 @@ validate aggregate_columns table =
|
||||
pass_1 = valid_aggregate_columns.map c->(if c.new_name.is_nothing then Nothing else unique.make_unique c.new_name)
|
||||
valid_columns = pass_1.map_with_index i->c->
|
||||
Pair (if c.is_nothing then unique.make_unique (valid_aggregate_columns.at i . column_name table) else c) (valid_aggregate_columns.at i)
|
||||
## TODO resolve the columns
|
||||
|
||||
# Build Problems Output
|
||||
missing_problems = (if missing_names.is_empty then [] else [Missing_Input_Columns missing_names.to_vector])
|
||||
|
@ -51,7 +51,7 @@ prepare_visualization x max_rows = Helpers.recover_errors <| case x of
|
||||
here.prepare_visualization ungrouped.to_table max_rows
|
||||
Database_Table.Aggregate_Table _ _ _ _ ->
|
||||
here.prepare_visualization x.ungrouped max_rows
|
||||
Database_Column.Aggregate_Column _ _ _ _ _ ->
|
||||
Database_Column.Aggregate_Column_Builder _ _ _ _ _ ->
|
||||
here.prepare_visualization x.ungrouped.to_table max_rows
|
||||
|
||||
# TODO [RW] Should we truncate Vectors?
|
||||
|
@ -498,11 +498,11 @@ built runner executable as explained above):
|
||||
enso --run test/Tests # for the Base library
|
||||
enso --run test/Geo_Tests
|
||||
enso --run test/Table_Tests
|
||||
enso --run test/Database_Tests
|
||||
```
|
||||
|
||||
The Database tests will by default only test the SQLite backend, to test other
|
||||
backends see [`test/Database_Tests/README.md`](../test/Database_Tests/README.md)
|
||||
backends see
|
||||
[`test/Table_Tests/src/Database/README.md`](../test/Table_Tests/src/Database/README.md)
|
||||
for information on how to configure them.
|
||||
|
||||
The Base tests rely in a few places on the system language. On Linux you can set
|
||||
|
@ -1964,8 +1964,14 @@ type System
|
||||
- command: The name of the system process.
|
||||
- arguments: An array of arguments to the system process.
|
||||
- input: The input to pass to the process via standard input.
|
||||
create_process : Text -> Array -> Text -> System_Process_Result
|
||||
create_process command arguments input =
|
||||
- redirect_in: Specifies if the standard input of the program should be
|
||||
redirected to the started process.
|
||||
- redirect_out: Specifies if the standard output of the started process
|
||||
should be redirected to the program's standard output.
|
||||
- redirect_err: Specifies if the standard error output of the started
|
||||
process should be redirected to the program's standard error output.
|
||||
create_process : Text -> Array -> Text -> Boolean -> Boolean -> Boolean -> System_Process_Result
|
||||
create_process command arguments input redirect_in redirect_out redirect_err =
|
||||
@Builtin_Method "System.create_process"
|
||||
|
||||
## Exits the Enso program, returning the provided code to the parent
|
||||
|
@ -1,6 +0,0 @@
|
||||
name: Database_Tests
|
||||
version: 0.0.1
|
||||
enso-version: default
|
||||
license: MIT
|
||||
author: enso-dev@enso.org
|
||||
maintainer: enso-dev@enso.org
|
@ -1,13 +0,0 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Test
|
||||
import project.Codegen_Spec
|
||||
import project.Sqlite_Spec
|
||||
import project.Postgresql_Spec
|
||||
import project.Redshift_Spec
|
||||
|
||||
main = Test.Suite.run_main <|
|
||||
Codegen_Spec.spec
|
||||
Sqlite_Spec.spec
|
||||
Postgresql_Spec.spec
|
||||
Redshift_Spec.spec
|
@ -1,46 +0,0 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.System.Environment
|
||||
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Connection.Connection import Sql_Error
|
||||
import Standard.Test
|
||||
import project.Common_Spec
|
||||
|
||||
postgres_specific_spec connection pending =
|
||||
Test.group "[PostgreSQL] Info" pending=pending <|
|
||||
connection.execute_update 'CREATE TABLE "Tinfo" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)'
|
||||
t = connection.access_table "Tinfo"
|
||||
t.insert ["a", Nothing, False, 1.2]
|
||||
t.insert ["abc", Nothing, Nothing, 1.3]
|
||||
t.insert ["def", 42, True, 1.4]
|
||||
Test.specify "should return Table information" <|
|
||||
i = t.info
|
||||
i.index . to_vector . should_equal ["strs", "ints", "bools", "reals"]
|
||||
i.at "Items Count" . to_vector . should_equal [3, 1, 2, 3]
|
||||
i.at "SQL Type" . to_vector . should_equal ["varchar", "int4", "bool", "float4"]
|
||||
Test.specify "should infer standard types correctly" <|
|
||||
t.at "strs" . sql_type . is_definitely_text . should_be_true
|
||||
t.at "ints" . sql_type . is_definitely_integer . should_be_true
|
||||
t.at "bools" . sql_type . is_definitely_boolean . should_be_true
|
||||
t.at "reals" . sql_type . is_definitely_double . should_be_true
|
||||
connection.execute_update 'DROP TABLE "Tinfo"'
|
||||
|
||||
spec =
|
||||
db_name = Environment.get "ENSO_DATABASE_TEST_DB_NAME"
|
||||
db_host = Environment.get "ENSO_DATABASE_TEST_HOST"
|
||||
db_user = Environment.get "ENSO_DATABASE_TEST_DB_USER"
|
||||
db_password = Environment.get "ENSO_DATABASE_TEST_DB_PASSWORD"
|
||||
go connection pending=Nothing =
|
||||
Common_Spec.spec "[PostgreSQL] " connection pending
|
||||
here.postgres_specific_spec connection pending
|
||||
case db_name.is_nothing of
|
||||
True ->
|
||||
message = "PostgreSQL test database is not configured. See README.md for instructions."
|
||||
connection = Error.throw message
|
||||
go connection pending=message
|
||||
False ->
|
||||
url = case db_host.is_nothing of
|
||||
True -> "postgresql:" + db_name
|
||||
False -> "postgresql://" + db_host + "/" + db_name
|
||||
connection = Database.connect url user=db_user password=db_password
|
||||
go connection
|
@ -8,26 +8,47 @@ from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_I
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
|
||||
type Test_Selection problem_handling=True advanced_stats=True text=True first_last=True std_dev=True multi_distinct=True
|
||||
|
||||
all_tests = Test_Selection True True True True
|
||||
|
||||
spec =
|
||||
file_contents = (Enso_Project.data / "data.csv") . read
|
||||
table = Table.from_csv file_contents
|
||||
empty_table = Table.new <| table.columns.map c->[c.name, []]
|
||||
materialize = x->x
|
||||
here.aggregate_spec "[In-Memory] " table empty_table materialize
|
||||
|
||||
## Runs the common aggregate tests.
|
||||
|
||||
Arguments:
|
||||
- prefix: A name to prepend to test groups to identify the tested backend.
|
||||
- table: A table using the tested backend containing data from
|
||||
`data/data.csv`.
|
||||
- empty_table: An empty table using the tested backend.
|
||||
- materialize: A helper function which materializes a table from the tested
|
||||
backend as an in-memory table. Used to easily inspect results of a
|
||||
particular query/operation.
|
||||
- test_selection: A selection of which suites should be run. Can be used to
|
||||
skip checks for backends which do not support particular features.
|
||||
- pending: An optional mark to disable all test groups. Can be used to
|
||||
indicate that some tests are disabled due to missing test setup.
|
||||
aggregate_spec prefix table empty_table materialize test_selection=here.all_tests pending=Nothing =
|
||||
find_row key table (columns=Nothing) =
|
||||
table_columns = if columns.is_nothing then table.columns else columns.map x->(table.columns.at x)
|
||||
0.up_to table.row_count . find i->
|
||||
0.up_to key.length . all j-> (table_columns.at j . at i)==(key.at j)
|
||||
|
||||
Test.group "Table.aggregate should summarize whole table " <|
|
||||
Test.group prefix+"Table.aggregate should summarize whole table" pending=pending <|
|
||||
Test.specify "should be able to count" <|
|
||||
grouped = table.aggregate [Count Nothing]
|
||||
grouped = materialize <| table.aggregate [Count Nothing]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 1
|
||||
grouped.columns.at 0 . name . should_equal "Count"
|
||||
grouped.columns.at 0 . at 0 . should_equal 2500
|
||||
|
||||
Test.specify "should be able to count missing values" <|
|
||||
grouped = table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped = materialize <| table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
|
||||
@ -40,22 +61,28 @@ spec =
|
||||
grouped.columns.at 3 . at 0 . should_equal 2251
|
||||
|
||||
Test.specify "should be able to count distinct values" <|
|
||||
grouped = table.aggregate [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
|
||||
grouped = materialize <| table.aggregate [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Count Distinct Code"
|
||||
grouped.columns.at 0 . at 0 . should_equal 2333
|
||||
grouped.columns.at 1 . name . should_equal "Count Distinct Index"
|
||||
grouped.columns.at 1 . at 0 . should_equal 10
|
||||
grouped.columns.at 2 . name . should_equal "Count Distinct Flag"
|
||||
grouped.columns.at 2 . at 0 . should_equal 2
|
||||
grouped.columns.at 3 . name . should_equal "Count Distinct Index Flag"
|
||||
grouped.columns.at 3 . at 0 . should_equal 20
|
||||
|
||||
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||
grouped = table.aggregate [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported by "+prefix) <|
|
||||
## TODO [RW] add Count_Distinct with overridden ignore_nothing! also need to modify data.csv to include some nulls on index and flag
|
||||
grouped = materialize <| table.aggregate [Count_Distinct ["Index", "Flag"]]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 8
|
||||
grouped.columns.length . should_equal 1
|
||||
grouped.columns.at 0 . name . should_equal "Count Distinct Index Flag"
|
||||
grouped.columns.at 0 . at 0 . should_equal 20
|
||||
|
||||
Test.specify "should be able to compute sum and average of values" <|
|
||||
grouped = materialize <| table.aggregate [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Sum Value"
|
||||
grouped.columns.at 0 . at 0 . should_equal -932.411550 epsilon=0.000001
|
||||
grouped.columns.at 1 . name . should_equal "Sum ValueWithNothing"
|
||||
@ -64,17 +91,22 @@ spec =
|
||||
grouped.columns.at 2 . at 0 . should_equal -0.372965 epsilon=0.000001
|
||||
grouped.columns.at 3 . name . should_equal "Average ValueWithNothing"
|
||||
grouped.columns.at 3 . at 0 . should_equal 1.228650 epsilon=0.000001
|
||||
grouped.columns.at 4 . name . should_equal "Standard Deviation Value"
|
||||
grouped.columns.at 4 . at 0 . should_equal 56.708660 epsilon=0.000001
|
||||
grouped.columns.at 5 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
grouped.columns.at 5 . at 0 . should_equal 58.588610 epsilon=0.000001
|
||||
grouped.columns.at 6 . name . should_equal "Standard Deviation Value_1"
|
||||
grouped.columns.at 6 . at 0 . should_equal 56.697317 epsilon=0.000001
|
||||
grouped.columns.at 7 . name . should_equal "Standard Deviation ValueWithNothing_1"
|
||||
grouped.columns.at 7 . at 0 . should_equal 58.575554 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to create median, mode and percentile values" <|
|
||||
grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||
Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Standard Deviation Value"
|
||||
grouped.columns.at 0 . at 0 . should_equal 56.708660 epsilon=0.000001
|
||||
grouped.columns.at 1 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
grouped.columns.at 1 . at 0 . should_equal 58.588610 epsilon=0.000001
|
||||
grouped.columns.at 2 . name . should_equal "Standard Deviation Value_1"
|
||||
grouped.columns.at 2 . at 0 . should_equal 56.697317 epsilon=0.000001
|
||||
grouped.columns.at 3 . name . should_equal "Standard Deviation ValueWithNothing_1"
|
||||
grouped.columns.at 3 . at 0 . should_equal 58.575554 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to create median, mode and percentile values" (pending=if test_selection.advanced_stats.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 6
|
||||
grouped.columns.at 0 . name . should_equal "Median Index"
|
||||
@ -90,8 +122,8 @@ spec =
|
||||
grouped.columns.at 5 . name . should_equal "40%-ile ValueWithNothing"
|
||||
grouped.columns.at 5 . at 0 . should_equal -17.960000 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get first and last values" <|
|
||||
grouped = table.aggregate [First "Index", Last "Value"]
|
||||
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [First "Index", Last "Value"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "First Index"
|
||||
@ -100,7 +132,7 @@ spec =
|
||||
grouped.columns.at 1 . at 0 . should_equal 70.99931 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get minimum and maximum values" <|
|
||||
grouped = table.aggregate [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||
grouped = materialize <| table.aggregate [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Minimum Value"
|
||||
@ -112,8 +144,8 @@ spec =
|
||||
grouped.columns.at 3 . name . should_equal "Maximum ValueWithNothing"
|
||||
grouped.columns.at 3 . at 0 . should_equal 99.95 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||
grouped = table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
|
||||
@ -123,16 +155,16 @@ spec =
|
||||
grouped.columns.at 2 . name . should_equal "Concatenate Code"
|
||||
grouped.columns.at 2 . at 0 . length . should_equal 7500
|
||||
|
||||
Test.group "Table.aggregate should summarize empty table " <|
|
||||
Test.group prefix+"Table.aggregate should summarize empty table" pending=pending <|
|
||||
Test.specify "should be able to count" <|
|
||||
grouped = empty_table.aggregate [Count Nothing]
|
||||
grouped = materialize <| empty_table.aggregate [Count Nothing]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 1
|
||||
grouped.columns.at 0 . name . should_equal "Count"
|
||||
grouped.columns.at 0 . at 0 . should_equal 0
|
||||
|
||||
Test.specify "should be able to count missing values" <|
|
||||
grouped = empty_table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped = materialize <| empty_table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
|
||||
@ -145,27 +177,32 @@ spec =
|
||||
grouped.columns.at 3 . at 0 . should_equal 0
|
||||
|
||||
Test.specify "should be able to count distinct values" <|
|
||||
grouped = empty_table.aggregate [Count_Distinct "Code"]
|
||||
grouped = materialize <| empty_table.aggregate [Count_Distinct "Code"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 1
|
||||
grouped.columns.at 0 . name . should_equal "Count Distinct Code"
|
||||
grouped.columns.at 0 . at 0 . should_equal 0
|
||||
|
||||
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||
grouped = empty_table.aggregate [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
Test.specify "should be able to compute sum and average of values" <|
|
||||
grouped = materialize <| empty_table.aggregate [Sum "Value", Average "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "Sum Value"
|
||||
grouped.columns.at 0 . at 0 . should_equal Nothing
|
||||
grouped.columns.at 1 . name . should_equal "Average ValueWithNothing"
|
||||
grouped.columns.at 1 . at 0 . should_equal Nothing
|
||||
grouped.columns.at 2 . name . should_equal "Standard Deviation Value"
|
||||
grouped.columns.at 2 . at 0 . should_equal Nothing
|
||||
grouped.columns.at 3 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
grouped.columns.at 3 . at 0 . should_equal Nothing
|
||||
|
||||
Test.specify "should be able to create median, mode and percentile values" <|
|
||||
grouped = empty_table.aggregate [Median "Index", Mode "Index", Percentile 0.25 "Value"]
|
||||
Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| empty_table.aggregate [Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "Standard Deviation Value"
|
||||
grouped.columns.at 0 . at 0 . should_equal Nothing
|
||||
grouped.columns.at 1 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
grouped.columns.at 1 . at 0 . should_equal Nothing
|
||||
|
||||
Test.specify "should be able to create median, mode and percentile values" (pending=if test_selection.advanced_stats.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| empty_table.aggregate [Median "Index", Mode "Index", Percentile 0.25 "Value"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Median Index"
|
||||
@ -175,8 +212,8 @@ spec =
|
||||
grouped.columns.at 2 . name . should_equal "25%-ile Value"
|
||||
grouped.columns.at 2 . at 0 . should_equal Nothing
|
||||
|
||||
Test.specify "should be able to get first and last values" <|
|
||||
grouped = empty_table.aggregate [First "Index", Last "Value"]
|
||||
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| empty_table.aggregate [First "Index", Last "Value"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "First Index"
|
||||
@ -185,7 +222,7 @@ spec =
|
||||
grouped.columns.at 1 . at 0 . should_equal Nothing
|
||||
|
||||
Test.specify "should be able to get minimum and maximum values" <|
|
||||
grouped = empty_table.aggregate [Minimum "Value", Maximum "ValueWithNothing"]
|
||||
grouped = materialize <| empty_table.aggregate [Minimum "Value", Maximum "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "Minimum Value"
|
||||
@ -193,8 +230,8 @@ spec =
|
||||
grouped.columns.at 1 . name . should_equal "Maximum ValueWithNothing"
|
||||
grouped.columns.at 1 . at 0 . should_equal Nothing
|
||||
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||
grouped = empty_table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| empty_table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
|
||||
@ -204,16 +241,16 @@ spec =
|
||||
grouped.columns.at 2 . name . should_equal "Concatenate Code"
|
||||
grouped.columns.at 2 . at 0 . should_equal Nothing
|
||||
|
||||
Test.group "Table.aggregate should not summarize empty table when grouped " <|
|
||||
Test.group prefix+"Table.aggregate should not summarize empty table when grouped" pending=pending <|
|
||||
Test.specify "should be able to count" <|
|
||||
grouped = empty_table.aggregate [Group_By 0, Count Nothing]
|
||||
grouped = materialize <| empty_table.aggregate [Group_By 0, Count Nothing]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
grouped.columns.at 1 . name . should_equal "Count"
|
||||
|
||||
Test.specify "should be able to count missing values" <|
|
||||
grouped = empty_table.aggregate [Group_By 0, Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped = materialize <| empty_table.aggregate [Group_By 0, Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
@ -223,24 +260,30 @@ spec =
|
||||
grouped.columns.at 4 . name . should_equal "Count Not Empty TextWithNothing"
|
||||
|
||||
Test.specify "should be able to count distinct values" <|
|
||||
grouped = empty_table.aggregate [Group_By 0, Count_Distinct "Code"]
|
||||
grouped = materialize <| empty_table.aggregate [Group_By 0, Count_Distinct "Code"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
grouped.columns.at 1 . name . should_equal "Count Distinct Code"
|
||||
|
||||
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||
grouped = empty_table.aggregate [Group_By 0, Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
Test.specify "should be able to compute sum and average of values" <|
|
||||
grouped = materialize <| empty_table.aggregate [Group_By 0, Sum "Value", Average "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
grouped.columns.at 1 . name . should_equal "Sum Value"
|
||||
grouped.columns.at 2 . name . should_equal "Average ValueWithNothing"
|
||||
grouped.columns.at 3 . name . should_equal "Standard Deviation Value"
|
||||
grouped.columns.at 4 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
|
||||
Test.specify "should be able to create median values" <|
|
||||
grouped = empty_table.aggregate [Group_By 0, Median "Index", Mode "Index", Percentile 0.25 "Value"]
|
||||
Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| empty_table.aggregate [Group_By 0, Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
grouped.columns.at 1 . name . should_equal "Standard Deviation Value"
|
||||
grouped.columns.at 2 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
|
||||
Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| empty_table.aggregate [Group_By 0, Median "Index", Mode "Index", Percentile 0.25 "Value"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
@ -248,8 +291,8 @@ spec =
|
||||
grouped.columns.at 2 . name . should_equal "Mode Index"
|
||||
grouped.columns.at 3 . name . should_equal "25%-ile Value"
|
||||
|
||||
Test.specify "should be able to get first and last values" <|
|
||||
grouped = empty_table.aggregate [Group_By 0, First "Index", Last "Value"]
|
||||
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| empty_table.aggregate [Group_By 0, First "Index", Last "Value"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
@ -257,15 +300,15 @@ spec =
|
||||
grouped.columns.at 2 . name . should_equal "Last Value"
|
||||
|
||||
Test.specify "should be able to get minimum and maximum values" <|
|
||||
grouped = empty_table.aggregate [Group_By 0, Minimum "Value", Maximum "ValueWithNothing"]
|
||||
grouped = materialize <| empty_table.aggregate [Group_By 0, Minimum "Value", Maximum "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
grouped.columns.at 1 . name . should_equal "Minimum Value"
|
||||
grouped.columns.at 2 . name . should_equal "Maximum ValueWithNothing"
|
||||
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||
grouped = empty_table.aggregate [Group_By 0, Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| empty_table.aggregate [Group_By 0, Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
@ -273,9 +316,9 @@ spec =
|
||||
grouped.columns.at 2 . name . should_equal "Longest TextWithNothing"
|
||||
grouped.columns.at 3 . name . should_equal "Concatenate Code"
|
||||
|
||||
Test.group "Table.aggregate should be able to group on single field " <|
|
||||
Test.group prefix+"Table.aggregate should be able to group on single field" pending=pending <|
|
||||
Test.specify "should be able to count" <|
|
||||
grouped = table.aggregate [Group_By "Index", Count Nothing]
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Count Nothing]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -285,7 +328,7 @@ spec =
|
||||
grouped.columns.at 1 . at idx . should_equal 261
|
||||
|
||||
Test.specify "should be able to count missing values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -301,9 +344,9 @@ spec =
|
||||
grouped.columns.at 4 . at idx . should_equal 230
|
||||
|
||||
Test.specify "should be able to count distinct values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
idx = find_row [6] grouped
|
||||
idx.is_nothing . should_be_false
|
||||
@ -313,13 +356,22 @@ spec =
|
||||
grouped.columns.at 2 . at idx . should_equal 1
|
||||
grouped.columns.at 3 . name . should_equal "Count Distinct Flag"
|
||||
grouped.columns.at 3 . at idx . should_equal 2
|
||||
grouped.columns.at 4 . name . should_equal "Count Distinct Index Flag"
|
||||
grouped.columns.at 4 . at idx . should_equal 2
|
||||
|
||||
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported by "+prefix) <|
|
||||
## TODO probably should use different cols for multi-distinct and also should check ignore_nothing
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Count_Distinct ["Index", "Flag"]]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 9
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
idx = find_row [6] grouped
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 1 . name . should_equal "Count Distinct Index Flag"
|
||||
grouped.columns.at 1 . at idx . should_equal 2
|
||||
|
||||
Test.specify "should be able to compute sum and average of values" <|
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
idx = find_row [6] grouped
|
||||
idx.is_nothing . should_be_false
|
||||
@ -331,17 +383,25 @@ spec =
|
||||
grouped.columns.at 3 . at idx . should_equal -1.715890 epsilon=0.000001
|
||||
grouped.columns.at 4 . name . should_equal "Average ValueWithNothing"
|
||||
grouped.columns.at 4 . at idx . should_equal 0.646213 epsilon=0.000001
|
||||
grouped.columns.at 5 . name . should_equal "Standard Deviation Value"
|
||||
grouped.columns.at 5 . at idx . should_equal 60.272158 epsilon=0.000001
|
||||
grouped.columns.at 6 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
grouped.columns.at 6 . at idx . should_equal 56.798691 epsilon=0.000001
|
||||
grouped.columns.at 7 . name . should_equal "Standard Deviation Value_1"
|
||||
grouped.columns.at 7 . at idx . should_equal 60.156583 epsilon=0.000001
|
||||
grouped.columns.at 8 . name . should_equal "Standard Deviation ValueWithNothing_1"
|
||||
grouped.columns.at 8 . at idx . should_equal 56.677714 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to create median values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||
Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
idx = find_row [6] grouped
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 1 . name . should_equal "Standard Deviation Value"
|
||||
grouped.columns.at 1 . at idx . should_equal 60.272158 epsilon=0.000001
|
||||
grouped.columns.at 2 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
grouped.columns.at 2 . at idx . should_equal 56.798691 epsilon=0.000001
|
||||
grouped.columns.at 3 . name . should_equal "Standard Deviation Value_1"
|
||||
grouped.columns.at 3 . at idx . should_equal 60.156583 epsilon=0.000001
|
||||
grouped.columns.at 4 . name . should_equal "Standard Deviation ValueWithNothing_1"
|
||||
grouped.columns.at 4 . at idx . should_equal 56.677714 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 7
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -360,8 +420,8 @@ spec =
|
||||
grouped.columns.at 6 . name . should_equal "40%-ile ValueWithNothing"
|
||||
grouped.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get first and last values" <|
|
||||
grouped = table.aggregate [Group_By "Index", First "TextWithNothing", Last "Value"]
|
||||
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", First "TextWithNothing", Last "Value"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -373,7 +433,7 @@ spec =
|
||||
grouped.columns.at 2 . at idx . should_equal 56.15916 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get minimum and maximum values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -388,8 +448,8 @@ spec =
|
||||
grouped.columns.at 4 . name . should_equal "Maximum ValueWithNothing"
|
||||
grouped.columns.at 4 . at idx . should_equal 99.79 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -402,9 +462,9 @@ spec =
|
||||
grouped.columns.at 3 . name . should_equal "Concatenate Code"
|
||||
grouped.columns.at 3 . at idx . length . should_equal 783
|
||||
|
||||
Test.group "Table.aggregate should be able to group on multiple fields not in left columns" <|
|
||||
Test.group prefix+"Table.aggregate should be able to group on multiple fields not in left columns" pending=pending <|
|
||||
Test.specify "should be able to count" <|
|
||||
grouped = table.aggregate [Group_By "Flag", Count Nothing, Group_By "Index"]
|
||||
grouped = materialize <| table.aggregate [Group_By "Flag", Count Nothing, Group_By "Index"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Flag"
|
||||
@ -415,7 +475,7 @@ spec =
|
||||
grouped.columns.at 1 . at idx . should_equal 127
|
||||
|
||||
Test.specify "should be able to count missing values" <|
|
||||
grouped = table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Group_By "Index", Count_Empty "TextWithNothing", Group_By "Flag", Count_Not_Empty "TextWithNothing"]
|
||||
grouped = materialize <| table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Group_By "Index", Count_Empty "TextWithNothing", Group_By "Flag", Count_Not_Empty "TextWithNothing"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 6
|
||||
grouped.columns.at 4 . name . should_equal "Flag"
|
||||
@ -432,12 +492,12 @@ spec =
|
||||
grouped.columns.at 5 . at idx . should_equal 115
|
||||
|
||||
Test.specify "should be able to count distinct values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"], Group_By "Flag"]
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Group_By "Flag"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 6
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
grouped.columns.at 5 . name . should_equal "Flag"
|
||||
idx = find_row ["False", 6] grouped [5, 0]
|
||||
grouped.columns.at 4 . name . should_equal "Flag"
|
||||
idx = find_row ["False", 6] grouped [4, 0]
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 1 . name . should_equal "Count Distinct Code"
|
||||
grouped.columns.at 1 . at idx . should_equal 127
|
||||
@ -445,36 +505,55 @@ spec =
|
||||
grouped.columns.at 2 . at idx . should_equal 1
|
||||
grouped.columns.at 3 . name . should_equal "Count Distinct Flag"
|
||||
grouped.columns.at 3 . at idx . should_equal 1
|
||||
grouped.columns.at 4 . name . should_equal "Count Distinct Index Flag"
|
||||
grouped.columns.at 4 . at idx . should_equal 1
|
||||
|
||||
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Group_By "Flag", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported by "+prefix) <|
|
||||
## TODO probably should use different cols for multi-distinct and also should check ignore_nothing
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Count_Distinct ["Index", "Flag"], Group_By "Flag"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 10
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
grouped.columns.at 2 . name . should_equal "Flag"
|
||||
idx = find_row ["False", 6] grouped [2, 0]
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 1 . name . should_equal "Count Distinct Index Flag"
|
||||
grouped.columns.at 1 . at idx . should_equal 1
|
||||
|
||||
Test.specify "should be able to compute sum and average of values" <|
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Group_By "Flag"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 6
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
grouped.columns.at 5 . name . should_equal "Flag"
|
||||
idx = find_row ["False", 6] grouped [5, 0]
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 1 . name . should_equal "Sum Value"
|
||||
grouped.columns.at 1 . at idx . should_equal -103.050170 epsilon=0.000001
|
||||
grouped.columns.at 2 . name . should_equal "Sum ValueWithNothing"
|
||||
grouped.columns.at 2 . at idx . should_equal 533.57 epsilon=0.000001
|
||||
grouped.columns.at 3 . name . should_equal "Average Value"
|
||||
grouped.columns.at 3 . at idx . should_equal -0.811419 epsilon=0.000001
|
||||
grouped.columns.at 4 . name . should_equal "Average ValueWithNothing"
|
||||
grouped.columns.at 4 . at idx . should_equal 4.721858 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Group_By "Flag", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 6
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
grouped.columns.at 1 . name . should_equal "Flag"
|
||||
idx = find_row ["False", 6] grouped [1, 0]
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 2 . name . should_equal "Sum Value"
|
||||
grouped.columns.at 2 . at idx . should_equal -103.050170 epsilon=0.000001
|
||||
grouped.columns.at 3 . name . should_equal "Sum ValueWithNothing"
|
||||
grouped.columns.at 3 . at idx . should_equal 533.57 epsilon=0.000001
|
||||
grouped.columns.at 4 . name . should_equal "Average Value"
|
||||
grouped.columns.at 4 . at idx . should_equal -0.811419 epsilon=0.000001
|
||||
grouped.columns.at 5 . name . should_equal "Average ValueWithNothing"
|
||||
grouped.columns.at 5 . at idx . should_equal 4.721858 epsilon=0.000001
|
||||
grouped.columns.at 6 . name . should_equal "Standard Deviation Value"
|
||||
grouped.columns.at 6 . at idx . should_equal 58.979275 epsilon=0.000001
|
||||
grouped.columns.at 7 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
grouped.columns.at 7 . at idx . should_equal 57.561756 epsilon=0.000001
|
||||
grouped.columns.at 8 . name . should_equal "Standard Deviation Value_1"
|
||||
grouped.columns.at 8 . at idx . should_equal 58.746614 epsilon=0.000001
|
||||
grouped.columns.at 9 . name . should_equal "Standard Deviation ValueWithNothing_1"
|
||||
grouped.columns.at 9 . at idx . should_equal 57.306492 epsilon=0.000001
|
||||
grouped.columns.at 2 . name . should_equal "Standard Deviation Value"
|
||||
grouped.columns.at 2 . at idx . should_equal 58.979275 epsilon=0.000001
|
||||
grouped.columns.at 3 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
grouped.columns.at 3 . at idx . should_equal 57.561756 epsilon=0.000001
|
||||
grouped.columns.at 4 . name . should_equal "Standard Deviation Value_1"
|
||||
grouped.columns.at 4 . at idx . should_equal 58.746614 epsilon=0.000001
|
||||
grouped.columns.at 5 . name . should_equal "Standard Deviation ValueWithNothing_1"
|
||||
grouped.columns.at 5 . at idx . should_equal 57.306492 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to create median values" <|
|
||||
grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Group_By "Index", Group_By "Flag", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||
Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Group_By "Index", Group_By "Flag", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 8
|
||||
grouped.columns.at 5 . name . should_equal "Flag"
|
||||
@ -494,8 +573,8 @@ spec =
|
||||
grouped.columns.at 7 . name . should_equal "40%-ile ValueWithNothing"
|
||||
grouped.columns.at 7 . at idx . should_equal -17.174000 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get first and last values" <|
|
||||
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing", Last "Value", Group_By "Index"]
|
||||
Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [Group_By "Flag", First "TextWithNothing", Last "Value", Group_By "Index"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Flag"
|
||||
@ -508,7 +587,7 @@ spec =
|
||||
grouped.columns.at 2 . at idx . should_equal 56.15916 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get minimum and maximum values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Group_By "Flag", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Group_By "Flag", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 6
|
||||
grouped.columns.at 3 . name . should_equal "Flag"
|
||||
@ -524,8 +603,8 @@ spec =
|
||||
grouped.columns.at 5 . name . should_equal "Maximum ValueWithNothing"
|
||||
grouped.columns.at 5 . at idx . should_equal 97.17 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Group_By "Flag", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" (pending=if test_selection.text.not then "Not supported by "+prefix) <|
|
||||
grouped = materialize <| table.aggregate [Group_By "Index", Group_By "Flag", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -539,7 +618,10 @@ spec =
|
||||
grouped.columns.at 4 . name . should_equal "Concatenate Code"
|
||||
grouped.columns.at 4 . at idx . length . should_equal 381
|
||||
|
||||
Test.group "Table.aggregate should raise warnings when there are issues" <|
|
||||
problem_pending = case pending.is_nothing of
|
||||
False -> pending
|
||||
True -> if test_selection.advanced_stats.not then "Not supported by "+prefix
|
||||
Test.group prefix+"Table.aggregate should raise warnings when there are issues" pending=problem_pending <|
|
||||
table =
|
||||
col1 = ["Index", [1, 2, 3]]
|
||||
col2 = ["Value", [1, 2, 3]]
|
||||
|
@ -23,8 +23,8 @@ from Standard.Table.Data.Position as Position_Module import all
|
||||
column elements.
|
||||
|
||||
TODO [RW] the Any in return type of the builder should ideally be replaced with the Table interface, once that is supported.
|
||||
spec : Text -> (Vector -> Any) -> Boolean -> Nothing
|
||||
spec prefix table_builder supports_case_sensitive_columns =
|
||||
spec : Text -> (Vector -> Any) -> Boolean -> Text -> Nothing
|
||||
spec prefix table_builder supports_case_sensitive_columns pending=Nothing =
|
||||
table =
|
||||
col1 = ["foo", Integer, [1,2,3]]
|
||||
col2 = ["bar", Integer, [4,5,6]]
|
||||
@ -38,7 +38,7 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
expect_column_names names table =
|
||||
table.columns . map .name . should_equal names frames_to_skip=2
|
||||
|
||||
Test.group prefix+"Table.select_columns" <|
|
||||
Test.group prefix+"Table.select_columns" pending=pending <|
|
||||
Test.specify "should work as shown in the doc examples" <|
|
||||
expect_column_names ["foo", "bar"] <| table.select_columns (By_Name.new ["bar", "foo"])
|
||||
expect_column_names ["bar", "Baz", "foo_1", "foo_2"] <| table.select_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
@ -152,7 +152,7 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
tester_2 = expect_column_names ["foo"]
|
||||
Problems.test_problem_handling action_2 problems_2 tester_2
|
||||
|
||||
Test.group prefix+"Table.remove_columns" <|
|
||||
Test.group prefix+"Table.remove_columns" pending=pending <|
|
||||
Test.specify "should work as shown in the doc examples" <|
|
||||
expect_column_names ["Baz", "foo_1", "foo_2", "ab.+123", "abcd123"] <| table.remove_columns (By_Name.new ["bar", "foo"])
|
||||
expect_column_names ["foo", "ab.+123", "abcd123"] <| table.remove_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
@ -261,7 +261,7 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
tester_2 = expect_column_names ["bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123"]
|
||||
Problems.test_problem_handling action_2 problems_2 tester_2
|
||||
|
||||
Test.group prefix+"Table.reorder_columns" <|
|
||||
Test.group prefix+"Table.reorder_columns" pending=pending <|
|
||||
Test.specify "should work as shown in the doc examples" <|
|
||||
expect_column_names ["bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns (By_Name.new ["foo"]) position=After_Other_Columns
|
||||
expect_column_names ["foo_1", "foo_2", "bar", "Baz", "foo", "ab.+123", "abcd123"] <| table.reorder_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
@ -357,7 +357,7 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
tester = expect_column_names ["bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "foo"]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.group prefix+"Table.sort_columns" <|
|
||||
Test.group prefix+"Table.sort_columns" pending=pending <|
|
||||
table =
|
||||
col1 = ["foo_21", Integer, [1,2,3]]
|
||||
col2 = ["foo_100", Integer, [4,5,6]]
|
||||
@ -385,7 +385,7 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
Test.specify "should correctly handle various combinations of options" <|
|
||||
expect_column_names ["foo_100", "foo_21", "foo_3", "Foo_2", "foo_1", "foo_001", "bar"] <| table.sort_columns (Sort_Method natural_order=True case_sensitive=Case_Insensitive.new order=Sort_Order.Descending)
|
||||
|
||||
Test.group prefix+"Table.rename_columns" <|
|
||||
Test.group prefix+"Table.rename_columns" pending=pending <|
|
||||
table =
|
||||
col1 = ["alpha", Integer, [1,2,3]]
|
||||
col2 = ["beta", Integer, [4,5,6]]
|
||||
|
@ -1,10 +1,11 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import project.Helpers.Fake_Test_Connection
|
||||
import project.Database.Helpers.Fake_Test_Connection
|
||||
import Standard.Database.Data.Dialect
|
||||
import Standard.Database.Data.Table as Table_Module
|
||||
import Standard.Test
|
||||
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Data.Sql import Sql_Type
|
||||
from Standard.Table import No_Such_Column_Error, Order_Rule
|
||||
@ -142,22 +143,6 @@ spec =
|
||||
t2 = t1.drop_missing_rows
|
||||
t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE (NOT ("T1"."A" IS NULL)) AND (NOT ("T1"."B" IS NULL)) AND (NOT ("T1"."C" IS NULL))', []]
|
||||
|
||||
Test.group "[Codegen] Aggregation" pending="Codegen test for aggregations is disabled until 1643 is resolved. The semantics is still being tested by tests that run on actual databases." <|
|
||||
agg = t1.group by='A'
|
||||
|
||||
Test.specify "should allow counting group sizes" <|
|
||||
agg.count.to_sql.prepare . should_equal ['SELECT COUNT(*) AS "count" FROM "T1" AS "T1" GROUP BY "T1"."A"', []]
|
||||
|
||||
Test.specify "should allow aggregating columns with basic arithmetic aggregators" <|
|
||||
c1 = agg.at 'B' . mean
|
||||
c1.to_sql.prepare . should_equal ['SELECT AVG("T1"."B") AS "B_mean" FROM "T1" AS "T1" GROUP BY "T1"."A"', []]
|
||||
c2 = agg.at 'B' . min
|
||||
c2.to_sql.prepare . should_equal ['SELECT MIN("T1"."B") AS "B_min" FROM "T1" AS "T1" GROUP BY "T1"."A"', []]
|
||||
|
||||
Test.specify "should allow grouping by multiple columns" <|
|
||||
agg = t1.group by=['A','B']
|
||||
agg.count.to_sql.prepare . should_equal ['SELECT COUNT(*) AS "count" FROM "T1" AS "T1" GROUP BY "T1"."A", "T1"."B"', []]
|
||||
|
||||
Test.group "[Codegen] Sorting" <|
|
||||
Test.specify "should allow sorting by a single column name" <|
|
||||
r1 = t1.sort by="A" . at "B"
|
||||
@ -196,3 +181,14 @@ spec =
|
||||
used_names = ["A", "A_1"]
|
||||
preferred_names = ["A", "A", "B"]
|
||||
Table_Module.fresh_names used_names preferred_names . should_equal ["A_2", "A_3", "B"]
|
||||
|
||||
Test.group "[Codegen] Aggregation" <|
|
||||
Test.specify "should allow to count rows" <|
|
||||
code = t1.aggregate [Group_By "A" "A grp", Count "counter"] . to_sql . prepare
|
||||
code . should_equal ['SELECT "T1"."A" AS "A grp", COUNT(*) AS "counter" FROM "T1" AS "T1" GROUP BY "T1"."A"', []]
|
||||
|
||||
Test.specify "should allow to group by multiple fields" <|
|
||||
code = t1.aggregate [Sum "A" "sum_a", Group_By "C" Nothing, Group_By "B" "B grp"] . to_sql . prepare
|
||||
code . should_equal ['SELECT SUM("T1"."A") AS "sum_a", "T1"."C" AS "C", "T1"."B" AS "B grp" FROM "T1" AS "T1" GROUP BY "T1"."C", "T1"."B"', []]
|
||||
|
||||
main = Test.Suite.run_main here.spec
|
@ -1,22 +1,15 @@
|
||||
from Standard.Base import all
|
||||
from Standard.Database import all
|
||||
import Standard.Table.Data.Table as Materialized_Table
|
||||
import Standard.Test
|
||||
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
|
||||
spec prefix connection pending=Nothing =
|
||||
make_table name column_names column_typenames = Panic.recover Any <|
|
||||
quote x = '"' + x + '"'
|
||||
# TODO this is a hack with no sanitization, just for testing; it should be removed when proper create table is supported by the library
|
||||
cols = column_names.zip column_typenames name-> typ->
|
||||
quote name + " " + typ
|
||||
sql = "CREATE TABLE " + quote name + " (" + (cols.join ", ") + ")"
|
||||
Panic.rethrow <| connection.execute_update sql
|
||||
Panic.rethrow <| connection.access_table name
|
||||
clean_table name = Panic.recover Any <|
|
||||
sql = 'DROP TABLE "' + name + '"'
|
||||
Panic.rethrow <| connection.execute_update sql
|
||||
t1 = make_table "T1" ["a", "b", "c"] ["INT", "INT", "INT"]
|
||||
t1.insert [1, 2, 3]
|
||||
t1.insert [4, 5, 6]
|
||||
t1 = connection.upload_table "T1" (Materialized_Table.new [["a", [1, 4]], ["b", [2, 5]], ["c", [3, 6]]])
|
||||
Test.group prefix+"Basic Table Access" pending=pending <|
|
||||
Test.specify "should allow to materialize tables and columns into local memory" <|
|
||||
df = t1.to_dataframe
|
||||
@ -52,19 +45,14 @@ spec prefix connection pending=Nothing =
|
||||
empty.to_dataframe.columns.length . should_equal 0
|
||||
empty.to_dataframe.row_count . should_equal empty.row_count
|
||||
Test.specify "should handle bigger result sets" <|
|
||||
table = make_table "Big" ["a", "b", "c"] ["INT", "REAL", "VARCHAR"]
|
||||
n = 1000
|
||||
0.up_to n . each ix->
|
||||
table.insert [ix, ix * 3.1415926, ix.to_text]
|
||||
original = Materialized_Table.new [["a", Vector.new n ix->ix], ["b", Vector.new n ix-> ix * 3.1415926], ["c", Vector.new n ix-> ix.to_text]]
|
||||
table = connection.upload_table "Big" original
|
||||
materialized = table.to_dataframe
|
||||
materialized.row_count . should_equal n
|
||||
|
||||
Test.group prefix+"Mapping Operations" pending=pending <|
|
||||
t2 = make_table "T2" ["x", "y", "b"] ["INT", "INT", "BOOLEAN"]
|
||||
t2.insert [1, 2, False]
|
||||
t2.insert [4, 3, False]
|
||||
t2.insert [5, 5, True]
|
||||
t2.insert [Nothing, Nothing, Nothing]
|
||||
t2 = connection.upload_table "T2" <| Materialized_Table.new [["x", [1, 4, 5, Nothing]], ["y", [2, 3, 5, Nothing]], ["b", [False, False, True, Nothing]]]
|
||||
x = t2.at "x"
|
||||
y = t2.at "y"
|
||||
b = t2.at "b"
|
||||
@ -96,11 +84,7 @@ spec prefix connection pending=Nothing =
|
||||
x.is_missing.to_vector . should_equal [False, False, False, True]
|
||||
(x == Nothing).to_vector . should_equal [Nothing, Nothing, Nothing, Nothing]
|
||||
|
||||
t3 = make_table "T3" ["s1", "s2"] ["VARCHAR", "VARCHAR"]
|
||||
t3.insert ["foobar", "foo"]
|
||||
t3.insert ["bar", "ar" ]
|
||||
t3.insert ["baz", "a" ]
|
||||
t3.insert [Nothing, Nothing]
|
||||
t3 = connection.upload_table "T3" <| Materialized_Table.new [["s1", ["foobar", "bar", "baz", Nothing]], ["s2", ["foo", "ar", "a", Nothing]]]
|
||||
s1 = t3.at "s1"
|
||||
s2 = t3.at "s2"
|
||||
Test.specify "should handle Text operations" <|
|
||||
@ -128,19 +112,9 @@ spec prefix connection pending=Nothing =
|
||||
t2.at "c" . to_vector . should_equal [3]
|
||||
|
||||
Test.group prefix+"Joining Tables" pending=pending <|
|
||||
a = make_table "TA" ["x", "y"] ["INTEGER", "VARCHAR"]
|
||||
a.insert [0, "foo"]
|
||||
a.insert [1, "bar"]
|
||||
a.insert [7, "baz"]
|
||||
a.insert [3, "spam"]
|
||||
a.insert [6, "eggs"]
|
||||
b = make_table "TB" ["w", "z"] ["INTEGER", "VARCHAR"]
|
||||
b.insert [6, "foo"]
|
||||
b.insert [3, "foo"]
|
||||
b.insert [5, "bar"]
|
||||
b.insert [5, "spam"]
|
||||
b.insert [3, "bar"]
|
||||
b.insert [3, "eggs"]
|
||||
a = connection.upload_table "TA" <| Materialized_Table.new [["x", [0, 1, 7, 3, 6]], ["y", ["foo", "bar", "baz", "spam", "eggs"]]]
|
||||
b = connection.upload_table "TB" <| Materialized_Table.new [["w", [6, 3, 5, 5, 3, 3]], ["z", ["foo", "foo", "bar", "spam", "bar", "eggs"]]]
|
||||
|
||||
## The tests below use `sort`, because the SQL backend is not guaranteed
|
||||
to return the rows in any particular order. This is the `sort` from
|
||||
the Dataframes library, so it is independent of the library under
|
||||
@ -174,15 +148,9 @@ spec prefix connection pending=Nothing =
|
||||
r_2.columns.map .name . should_equal ['y_old', 'y_new']
|
||||
|
||||
Test.specify "should correctly handle multi-joins" <|
|
||||
ta = make_table "M_TA" ["id", "name"] ["INTEGER", "VARCHAR"]
|
||||
tb = make_table "M_TB" ["id", "name"] ["INTEGER", "VARCHAR"]
|
||||
tc = make_table "M_TC" ["id_a", "id_b"] ["INTEGER", "INTEGER"]
|
||||
ta.insert [0, "Foo"]
|
||||
ta.insert [1, "Hmm"]
|
||||
tb.insert [2, "Bar"]
|
||||
tb.insert [0, "Hmm"]
|
||||
tc.insert [0, 2]
|
||||
tc.insert [1, 0]
|
||||
ta = connection.upload_table "M_TA" <| Materialized_Table.new [["id", [0, 1]], ["name", ["Foo", "Hmm"]]]
|
||||
tb = connection.upload_table "M_TB" <| Materialized_Table.new [["id", [2, 0]], ["name", ["Bar", "Hmm"]]]
|
||||
tc = connection.upload_table "M_TC" <| Materialized_Table.new [["id_a", [0, 1]], ["id_b", [2, 0]]]
|
||||
ta_2 = ta.set_index "id"
|
||||
tb_2 = tb.set_index "id"
|
||||
res = (tc.join ta_2 on="id_a") . join tb_2 on="id_b" left_suffix="_a" right_suffix="_b"
|
||||
@ -192,12 +160,8 @@ spec prefix connection pending=Nothing =
|
||||
df . at "name_b" . to_vector . should_equal ["Bar", "Hmm"]
|
||||
|
||||
Test.group prefix+"Missing Values" pending=pending <|
|
||||
t4 = make_table "T4" ["a", "b", "c"] ["INT", "BOOLEAN", "VARCHAR"]
|
||||
t4.insert [0, True, ""]
|
||||
t4.insert [1, Nothing, "foo"]
|
||||
t4.insert [Nothing, True, "bar"]
|
||||
t4.insert [42, False, Nothing]
|
||||
t4.insert [Nothing, Nothing, Nothing]
|
||||
t4 = connection.upload_table "T4" <|
|
||||
Materialized_Table.new [["a", [0, 1, Nothing, 42, Nothing]], ["b", [True, Nothing, True, False, Nothing]], ["c", ["", "foo", "bar", Nothing, Nothing]]]
|
||||
Test.specify "fill_missing should replace nulls" <|
|
||||
t4.at 'a' . fill_missing 10 . to_vector . should_equal [0, 1, 10, 42, 10]
|
||||
t4.at 'b' . fill_missing False . to_vector . should_equal [True, False, True, False, False]
|
||||
@ -221,10 +185,8 @@ spec prefix connection pending=Nothing =
|
||||
d.at 'c' . to_vector . should_equal [""]
|
||||
|
||||
Test.specify "drop_missing_columns should drop columns that contain at least one missing row in a Table" <|
|
||||
t5 = make_table "T5" ["a", "b"] ["INT", "BOOLEAN", "VARCHAR"]
|
||||
t5.insert [1, True, "foo"]
|
||||
t5.insert [2, False, Nothing]
|
||||
t5.insert [3, Nothing, "aaa"]
|
||||
t5 = connection.upload_table "T5" <|
|
||||
Materialized_Table.new [["a", [1, 2, 3]], ["b", [True, False, Nothing]], ["c", ["foo", Nothing, "aaa"]]]
|
||||
|
||||
r = t5.drop_missing_columns
|
||||
r.columns.map .name . should_equal ["a"]
|
||||
@ -234,15 +196,9 @@ spec prefix connection pending=Nothing =
|
||||
empty.columns.length . should_equal 0
|
||||
empty.to_dataframe.columns.length . should_equal 0
|
||||
|
||||
Test.group prefix+"Aggregation" pending=pending <|
|
||||
t = make_table "T6" ['name', 'price', 'quantity'] ['VARCHAR', 'DOUBLE PRECISION', 'INTEGER']
|
||||
t.insert ["foo", 0.4, 10]
|
||||
t.insert ["bar", 3.5, 20]
|
||||
t.insert ["foo", Nothing, 30]
|
||||
t.insert ["baz", 6.7, 40]
|
||||
t.insert ["foo", Nothing, 50]
|
||||
t.insert ["bar", 97, 60]
|
||||
t.insert ["quux", Nothing, 70]
|
||||
Test.group prefix+"Old Aggregation" pending=pending <|
|
||||
t = connection.upload_table "T6" <|
|
||||
Materialized_Table.new [["name", ["foo", "bar", "foo", "baz", "foo", "bar", "quux"]], ["price", [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]], ["quantity", [10, 20, 30, 40, 50, 60, 70]]]
|
||||
agg = t.group by='name'
|
||||
## A helper which makes sure that the groups are ordered according to the index, using the Table library
|
||||
determinize col =
|
||||
@ -277,24 +233,17 @@ spec prefix connection pending=Nothing =
|
||||
|
||||
Test.group prefix+"Column-wide statistics" pending=pending <|
|
||||
Test.specify 'should allow computing basic column-wide stats' <|
|
||||
t7 = make_table "T7" ['price'] ['DOUBLE PRECISION']
|
||||
t7 = connection.upload_table "T7" <|
|
||||
Materialized_Table.new [['price', [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]]]
|
||||
price = t7.at 'price'
|
||||
[0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing] . each x->
|
||||
t7.insert [x]
|
||||
|
||||
price.sum.should_equal 107.6
|
||||
price.min.should_equal 0.4
|
||||
price.max.should_equal 97
|
||||
price.mean.should_equal 26.9
|
||||
|
||||
Test.group prefix+"Sorting" pending=pending <|
|
||||
df = make_table "clothes" ['id', 'name', 'quantity', 'rating', 'price'] ['INTEGER', 'VARCHAR', 'INTEGER', 'DOUBLE PRECISION', 'DOUBLE PRECISION']
|
||||
df.insert [1,'shoes',20,3.0,37.2]
|
||||
df.insert [2,'trousers',10,Nothing,42.1]
|
||||
df.insert [3,'dress',20,7.3,64.1]
|
||||
df.insert [4,'skirt',10,3.0,87.4]
|
||||
df.insert [5,'blouse',30,2.2,13.5]
|
||||
df.insert [6,'t-shirt',30,Nothing,64.2]
|
||||
df = connection.upload_table "clothes" <|
|
||||
Materialized_Table.new [["id", [1,2,3,4,5,6]], ["name", ["shoes","trousers","dress","skirt","blouse","t-shirt"]], ["quantity", [20,10,20,10,30,30]], ["rating", [3.0,Nothing,7.3,3.0,2.2,Nothing]], ["price", [37.2,42.1,64.1,87.4,13.5,64.2]]]
|
||||
|
||||
Test.specify "should allow sorting by a single column name" <|
|
||||
r_1 = df.sort by="quantity"
|
||||
@ -331,18 +280,13 @@ spec prefix connection pending=Nothing =
|
||||
r.should_fail_with No_Such_Column_Error
|
||||
|
||||
Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <|
|
||||
df = make_table "T8" ['ord', 'ints', 'reals', 'bools', 'texts'] ['INTEGER', 'INTEGER', 'DOUBLE PRECISION', 'BOOLEAN', 'VARCHAR']
|
||||
r = df.sort by='ord'
|
||||
df.insert [0, 1, 1.3, False, "foo"]
|
||||
df.insert [3, 2, 4.6, False, "foo"]
|
||||
df.insert [2, 3, 3.2, True, "bar"]
|
||||
df.insert [4, 4, 5.2, True, "baz"]
|
||||
df.insert [1, 5, 1.6, False, "spam"]
|
||||
|
||||
ints = [1, 2, 3, 4, 5]
|
||||
reals = [1.3, 4.6, 3.2, 5.2, 1.6]
|
||||
bools = [False, False, True, True, False]
|
||||
texts = ["foo", "foo", "bar", "baz", "spam"]
|
||||
df = connection.upload_table "T8" <|
|
||||
Materialized_Table.new [["ord", [0,3,2,4,1]], ["ints", ints], ["reals", reals], ["bools", bools], ["texts", texts]]
|
||||
r = df.sort by='ord'
|
||||
|
||||
r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4]
|
||||
df.at 'ints' . to_vector . should_equal ints
|
||||
@ -369,10 +313,8 @@ spec prefix connection pending=Nothing =
|
||||
r_3.to_vector.should_equal [Nothing, Nothing, 7.3, 3.0, 3.0, 2.2]
|
||||
|
||||
Test.group prefix+"Index" pending=pending <|
|
||||
t0 = make_table "Tix" ['ix', 'c1'] ['INTEGER', 'INTEGER']
|
||||
t0.insert [1, 4]
|
||||
t0.insert [2, 5]
|
||||
t0.insert [3, 6]
|
||||
t0 = connection.upload_table "Tix" <|
|
||||
Materialized_Table.new [["ix", [1,2,3]], ["c1", [4,5,6]]]
|
||||
t = t0.set_index 'ix'
|
||||
Test.specify "should be accessible by `at` like other columns" <|
|
||||
t.at 'ix' . to_vector . should_equal t.index.to_vector
|
||||
@ -386,5 +328,88 @@ spec prefix connection pending=Nothing =
|
||||
df_col.to_vector . should_equal vec
|
||||
df_col.index.to_vector . should_equal [1, 2, 3]
|
||||
|
||||
tables = ["T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "TA", "TB", "Big", "clothes", "M_TA", "M_TB", "M_TC", "Tix"]
|
||||
Test.group prefix+"Aggregation" pending=pending <|
|
||||
builders = [Vector.new_builder,Vector.new_builder,Vector.new_builder]
|
||||
insert v =
|
||||
builders.zip v .append
|
||||
insert ["foo", 0.4, 50]
|
||||
insert ["foo", 0.2, 10]
|
||||
insert ["foo", 0.4, 30]
|
||||
insert ["bar", 3.5, 20]
|
||||
insert ["foo", Nothing, 20]
|
||||
insert ["baz", 6.7, 40]
|
||||
insert ["foo", Nothing, 10]
|
||||
insert ["bar", 97, 60]
|
||||
insert ["quux", Nothing, 70]
|
||||
insert ["zzzz", Nothing, Nothing]
|
||||
insert ["zzzz", 1, 1]
|
||||
insert ["zzzz", 0, 0]
|
||||
insert ["zzzz", 0, 1]
|
||||
insert ["zzzz", 1, 0]
|
||||
insert ["zzzz", 0, 0]
|
||||
insert ["zzzz", Nothing, Nothing]
|
||||
t = connection.upload_table "T9" <|
|
||||
Materialized_Table.new [["name", builders.at 0 . to_vector], ["price", builders.at 1 . to_vector], ["quantity", builders.at 2 . to_vector]]
|
||||
|
||||
## A helper which makes sure that the groups in a materialized
|
||||
(InMemory) table are ordered according to a specified column or list
|
||||
of columns.
|
||||
determinize_by order_column table =
|
||||
table.sort by=order_column
|
||||
|
||||
Test.specify "should allow counting group sizes and elements" <|
|
||||
aggregates = [Count Nothing, Count_Not_Nothing "price", Count_Nothing "price"]
|
||||
|
||||
t1 = determinize_by "name" (t.aggregate ([Group_By "name"] + aggregates) . to_dataframe)
|
||||
t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"]
|
||||
t1.at "Count" . to_vector . should_equal [2, 1, 5, 1, 7]
|
||||
t1.at "Count Not Nothing price" . to_vector . should_equal [2, 1, 3, 0, 5]
|
||||
t1.at "Count Nothing price" . to_vector . should_equal [0, 0, 2, 1, 2]
|
||||
|
||||
t2 = t.aggregate aggregates . to_dataframe
|
||||
t2.at "Count" . to_vector . should_equal [16]
|
||||
t2.at "Count Not Nothing price" . to_vector . should_equal [11]
|
||||
t2.at "Count Nothing price" . to_vector . should_equal [5]
|
||||
|
||||
Test.specify "should allow to count distinct values" <|
|
||||
aggregates = [Count_Distinct "quantity", Count_Distinct "price" (ignore_nothing=True), Count_Distinct "price" (ignore_nothing=False)]
|
||||
|
||||
t1 = determinize_by "name" (t.aggregate [Group_By "name"]+aggregates . to_dataframe)
|
||||
t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"]
|
||||
# t1.at "Count Distinct quantity" . to_vector . should_equal [2, 1, 3, 0]
|
||||
# TODO
|
||||
|
||||
t2 = t.aggregate aggregates . to_dataframe
|
||||
t2 . at "Count Distinct quantity" . to_vector . should_equal [10]
|
||||
t2 . at "Count Distinct price" . to_vector . should_equal [7]
|
||||
#t2 . at "Count Distinct price 2" . to_vector . should_equal [8]
|
||||
|
||||
Test.specify "should allow to count distinct values over multiple fields" pending="TODO" <|
|
||||
aggregates = [Count_Distinct ["price", "quantity"]]
|
||||
|
||||
t1 = determinize_by "name" (t.aggregate [Group_By "name"]+aggregates . to_dataframe)
|
||||
t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"]
|
||||
# t1.at "Count Distinct quantity" . to_vector . should_equal [2, 1, 3, 0]
|
||||
# TODO
|
||||
|
||||
t2 = t.aggregate aggregates . to_dataframe
|
||||
t2 . at "Count Distinct price quantity" . to_vector . should_equal [13]
|
||||
|
||||
Test.specify "should allow simple arithmetic aggregations" <|
|
||||
aggregates = [Sum "price" Nothing, Sum "quantity" Nothing, Average "price" Nothing]
|
||||
## TODO can check the datatypes
|
||||
|
||||
t1 = determinize_by "name" (t.aggregate ([Group_By "name" Nothing] + aggregates) . to_dataframe)
|
||||
t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"]
|
||||
t1.at "Sum price" . to_vector . should_equal [100.5, 6.7, 1, Nothing, 2]
|
||||
t1.at "Sum quantity" . to_vector . should_equal [80, 40, 120, 70, 2]
|
||||
t1.at "Average price" . to_vector . should_equal [50.25, 6.7, (1/3), Nothing, (2/5)]
|
||||
|
||||
t2 = t.aggregate aggregates . to_dataframe
|
||||
t2.at "Sum price" . to_vector . should_equal [110.2]
|
||||
t2.at "Sum quantity" . to_vector . should_equal [312]
|
||||
t2.at "Average price" . to_vector . should_equal [(110.2 / 11)]
|
||||
|
||||
|
||||
tables = ["T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "TA", "TB", "T9", "Big", "clothes", "M_TA", "M_TB", "M_TC", "Tix"]
|
||||
tables.each clean_table
|
16
test/Table_Tests/src/Database/Main.enso
Normal file
16
test/Table_Tests/src/Database/Main.enso
Normal file
@ -0,0 +1,16 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Test
|
||||
import project.Database.Codegen_Spec
|
||||
import project.Database.Sqlite_Spec
|
||||
import project.Database.Postgresql_Spec
|
||||
import project.Database.Redshift_Spec
|
||||
|
||||
databases_spec =
|
||||
Codegen_Spec.spec
|
||||
Sqlite_Spec.spec
|
||||
Postgresql_Spec.spec
|
||||
Redshift_Spec.spec
|
||||
|
||||
|
||||
main = Test.Suite.run_main here.databases_spec
|
82
test/Table_Tests/src/Database/Postgresql_Spec.enso
Normal file
82
test/Table_Tests/src/Database/Postgresql_Spec.enso
Normal file
@ -0,0 +1,82 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.System.Environment
|
||||
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Connection.Connection import Sql_Error
|
||||
import Standard.Test
|
||||
import Standard.Table as Materialized_Table
|
||||
import project.Database.Common_Spec
|
||||
import project.Common_Table_Spec
|
||||
import project.Aggregate_Spec
|
||||
|
||||
postgres_specific_spec connection pending =
|
||||
Test.group "[PostgreSQL] Info" pending=pending <|
|
||||
connection.execute_update 'CREATE TABLE "Tinfo" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL, "doubles" DOUBLE PRECISION)'
|
||||
t = connection.access_table "Tinfo"
|
||||
t.insert ["a", Nothing, False, 1.2, 0.000000000001]
|
||||
t.insert ["abc", Nothing, Nothing, 1.3, Nothing]
|
||||
t.insert ["def", 42, True, 1.4, 10]
|
||||
Test.specify "should return Table information" <|
|
||||
i = t.info
|
||||
i.index . to_vector . should_equal ["strs", "ints", "bools", "reals", "doubles"]
|
||||
i.at "Items Count" . to_vector . should_equal [3, 1, 2, 3, 2]
|
||||
i.at "SQL Type" . to_vector . should_equal ["varchar", "int4", "bool", "float4", "float8"]
|
||||
Test.specify "should infer standard types correctly" <|
|
||||
t.at "strs" . sql_type . is_definitely_text . should_be_true
|
||||
t.at "ints" . sql_type . is_definitely_integer . should_be_true
|
||||
t.at "bools" . sql_type . is_definitely_boolean . should_be_true
|
||||
t.at "reals" . sql_type . is_definitely_double . should_be_true
|
||||
connection.execute_update 'DROP TABLE "Tinfo"'
|
||||
|
||||
run_tests connection pending=Nothing =
|
||||
prefix = "[PostgreSQL] "
|
||||
name_counter = Ref.new 0
|
||||
tables = Vector.new_builder
|
||||
table_builder columns =
|
||||
ix = Ref.get name_counter
|
||||
Ref.put name_counter ix+1
|
||||
name = "table_"+ix.to_text
|
||||
|
||||
in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2]
|
||||
table = connection.upload_table name in_mem_table
|
||||
tables.append name
|
||||
table
|
||||
clean_tables table_names =
|
||||
table_names.each name->
|
||||
sql = 'DROP TABLE "' + name + '"'
|
||||
Panic.rethrow <| connection.execute_update sql
|
||||
|
||||
Common_Spec.spec prefix connection pending=pending
|
||||
here.postgres_specific_spec connection pending=pending
|
||||
Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=True pending=pending
|
||||
|
||||
selection = Aggregate_Spec.Test_Selection problem_handling=False advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False
|
||||
agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv
|
||||
agg_table = connection.upload_table "AggT1" agg_in_memory_table
|
||||
tables.append agg_table.name
|
||||
empty_agg_table = connection.upload_table "AggT2" (agg_in_memory_table.take_start 0)
|
||||
tables.append empty_agg_table.name
|
||||
materialize = .to_dataframe
|
||||
Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection pending=pending
|
||||
|
||||
clean_tables tables.to_vector
|
||||
|
||||
spec =
|
||||
db_name = Environment.get "ENSO_DATABASE_TEST_DB_NAME"
|
||||
db_host = Environment.get "ENSO_DATABASE_TEST_HOST"
|
||||
db_user = Environment.get "ENSO_DATABASE_TEST_DB_USER"
|
||||
db_password = Environment.get "ENSO_DATABASE_TEST_DB_PASSWORD"
|
||||
|
||||
case db_name.is_nothing of
|
||||
True ->
|
||||
message = "PostgreSQL test database is not configured. See README.md for instructions."
|
||||
connection = Error.throw message
|
||||
here.run_tests connection pending=message
|
||||
False ->
|
||||
url = case db_host.is_nothing of
|
||||
True -> "postgresql:" + db_name
|
||||
False -> "postgresql://" + db_host + "/" + db_name
|
||||
connection = Database.connect url user=db_user password=db_password
|
||||
here.run_tests connection
|
||||
|
||||
main = Test.Suite.run_main here.spec
|
@ -4,7 +4,10 @@ import Standard.Base.System.Environment
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Connection.Connection import Sql_Error
|
||||
import Standard.Test
|
||||
import project.Common_Spec
|
||||
import Standard.Table as Materialized_Table
|
||||
import project.Database.Common_Spec
|
||||
import project.Common_Table_Spec
|
||||
import project.Aggregate_Spec
|
||||
|
||||
redshift_specific_spec connection pending =
|
||||
Test.group "[Redshift] Info" pending=pending <|
|
||||
@ -25,6 +28,39 @@ redshift_specific_spec connection pending =
|
||||
t.at "reals" . sql_type . is_definitely_double . should_be_true
|
||||
connection.execute_update 'DROP TABLE "Tinfo"'
|
||||
|
||||
run_tests connection pending=Nothing =
|
||||
prefix = "[Redshift] "
|
||||
name_counter = Ref.new 0
|
||||
tables = Vector.new_builder
|
||||
table_builder columns =
|
||||
ix = Ref.get name_counter
|
||||
Ref.put name_counter ix+1
|
||||
name = "table_"+ix.to_text
|
||||
|
||||
in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2]
|
||||
table = connection.upload_table name in_mem_table
|
||||
tables.append name
|
||||
table
|
||||
clean_tables table_names =
|
||||
table_names.each name->
|
||||
sql = 'DROP TABLE "' + name + '"'
|
||||
Panic.rethrow <| connection.execute_update sql
|
||||
|
||||
Common_Spec.spec prefix connection pending=pending
|
||||
here.redshift_specific_spec connection pending=pending
|
||||
Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=True pending=pending
|
||||
|
||||
selection = Aggregate_Spec.Test_Selection problem_handling=False advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False
|
||||
agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv
|
||||
agg_table = connection.upload_table "AggT1" agg_in_memory_table
|
||||
tables.append agg_table.name
|
||||
empty_agg_table = connection.upload_table "AggT2" (agg_in_memory_table.take_start 0)
|
||||
tables.append empty_agg_table.name
|
||||
materialize = .to_dataframe
|
||||
Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection pending=pending
|
||||
|
||||
clean_tables tables.to_vector
|
||||
|
||||
spec =
|
||||
credentials = Enso_Project.data / 'redshift_credentials.json'
|
||||
case credentials.exists of
|
||||
@ -36,13 +72,10 @@ spec =
|
||||
user = creds.get 'db_user'
|
||||
props = [access_key, secret_key]
|
||||
connection = Database.connect uri user=user custom_properties=props
|
||||
Common_Spec.spec "[Redshift] " connection Nothing
|
||||
here.redshift_specific_spec connection Nothing
|
||||
here.run_tests connection
|
||||
False ->
|
||||
msg = """
|
||||
Redshift connection is not set up. Please create a JSON file containing
|
||||
the credentials in `data/redshift_credentials.json`
|
||||
connection = Error.throw msg
|
||||
Common_Spec.spec "[Redshift] " connection msg
|
||||
here.redshift_specific_spec connection msg
|
||||
|
||||
here.run_tests connection pending=msg
|
@ -3,7 +3,10 @@ from Standard.Base import all
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Connection.Connection import Sql_Error
|
||||
import Standard.Test
|
||||
import project.Common_Spec
|
||||
import Standard.Table as Materialized_Table
|
||||
import project.Database.Common_Spec
|
||||
import project.Common_Table_Spec
|
||||
import project.Aggregate_Spec
|
||||
|
||||
sqlite_specific_spec connection =
|
||||
Test.group "[SQLite] Error Handling" <|
|
||||
@ -31,6 +34,11 @@ sqlite_specific_spec connection =
|
||||
t.at "ints" . sql_type . is_definitely_integer . should_be_true
|
||||
t.at "bools" . sql_type . is_definitely_boolean . should_be_true
|
||||
t.at "reals" . sql_type . is_definitely_double . should_be_true
|
||||
|
||||
t.at "ints" . sql_type . is_definitely_text . should_be_false
|
||||
t.at "strs" . sql_type . is_definitely_integer . should_be_false
|
||||
t.at "reals" . sql_type . is_definitely_boolean . should_be_false
|
||||
t.at "bools" . sql_type . is_definitely_double . should_be_false
|
||||
connection.execute_update 'DROP TABLE "Tinfo"'
|
||||
|
||||
spec =
|
||||
@ -38,7 +46,29 @@ spec =
|
||||
file = Enso_Project.data / "sqlite_test.db"
|
||||
file.delete_if_exists
|
||||
connection = Database.open_sqlite_file file
|
||||
Common_Spec.spec "[SQLite] " connection
|
||||
prefix = "[SQLite] "
|
||||
|
||||
name_counter = Ref.new 0
|
||||
table_builder columns =
|
||||
ix = Ref.get name_counter
|
||||
Ref.put name_counter ix+1
|
||||
name = "table_"+ix.to_text
|
||||
|
||||
in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2]
|
||||
connection.upload_table name in_mem_table
|
||||
|
||||
Common_Spec.spec prefix connection
|
||||
here.sqlite_specific_spec connection
|
||||
Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=False
|
||||
|
||||
selection = Aggregate_Spec.Test_Selection problem_handling=False advanced_stats=False text=False first_last=False std_dev=False multi_distinct=False
|
||||
agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv
|
||||
agg_table = connection.upload_table "AggT1" agg_in_memory_table
|
||||
empty_agg_table = connection.upload_table "AggT2" (agg_in_memory_table.take_start 0)
|
||||
materialize = .to_dataframe
|
||||
Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection
|
||||
|
||||
connection.close
|
||||
file.delete
|
||||
|
||||
main = Test.Suite.run_main here.spec
|
@ -1,42 +0,0 @@
|
||||
from Standard.Base import all
|
||||
from Standard.Database import all
|
||||
|
||||
import Standard.Test
|
||||
|
||||
import project.Common_Table_Spec
|
||||
|
||||
sqlite_spec =
|
||||
Enso_Project.data.create_directory
|
||||
file = Enso_Project.data / "sqlite_test.db"
|
||||
file.delete_if_exists
|
||||
connection = Database.open_sqlite_file file
|
||||
|
||||
name_counter = Ref.new 0
|
||||
table_builder columns =
|
||||
ix = Ref.get name_counter
|
||||
Ref.put name_counter ix+1
|
||||
name = "table_"+ix.to_text
|
||||
quote x = '"' + x + '"'
|
||||
# TODO this is a hack with no sanitization, just for testing; it should be removed when proper create table is supported by the library
|
||||
column_definitions = columns.map col->
|
||||
name = col.first
|
||||
typ = case col.second of
|
||||
Integer -> "INT"
|
||||
_ -> Panic.throw "The provided type "+col.second+" is not currently supported by the test suite. It may need to be extended."
|
||||
quote name + " " + typ
|
||||
sql = "CREATE TABLE " + quote name + " (" + (column_definitions.join ", ") + ")"
|
||||
Panic.rethrow <| connection.execute_update sql
|
||||
table = Panic.rethrow <| connection.access_table name
|
||||
|
||||
row_number = columns.first.at 2 . length
|
||||
0.up_to row_number . each ix->
|
||||
row = columns.map col-> col.at 2 . at ix
|
||||
table.insert row
|
||||
table
|
||||
|
||||
Common_Table_Spec.spec "[SQLite] " table_builder supports_case_sensitive_columns=False
|
||||
|
||||
connection.close
|
||||
file.delete
|
||||
|
||||
main = Test.Suite.run_main here.sqlite_spec
|
24
test/Table_Tests/src/In_Memory_Tests.enso
Normal file
24
test/Table_Tests/src/In_Memory_Tests.enso
Normal file
@ -0,0 +1,24 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Test
|
||||
|
||||
import project.Model_Spec
|
||||
import project.Column_Spec
|
||||
import project.Csv_Spec
|
||||
import project.Json_Spec
|
||||
import project.Table_Spec
|
||||
import project.Spreadsheet_Spec
|
||||
import project.Aggregate_Column_Spec
|
||||
import project.Aggregate_Spec
|
||||
|
||||
in_memory_spec =
|
||||
Column_Spec.spec
|
||||
Csv_Spec.spec
|
||||
Json_Spec.spec
|
||||
Spreadsheet_Spec.spec
|
||||
Table_Spec.spec
|
||||
Model_Spec.spec
|
||||
Aggregate_Column_Spec.spec
|
||||
Aggregate_Spec.spec
|
||||
|
||||
main = Test.Suite.run_main here.in_memory_spec
|
@ -2,23 +2,9 @@ from Standard.Base import all
|
||||
|
||||
import Standard.Test
|
||||
|
||||
import project.Database_Spec
|
||||
import project.Model_Spec
|
||||
import project.Column_Spec
|
||||
import project.Csv_Spec
|
||||
import project.Json_Spec
|
||||
import project.Table_Spec
|
||||
import project.Spreadsheet_Spec
|
||||
import project.Aggregate_Column_Spec
|
||||
import project.Aggregate_Spec
|
||||
import project.In_Memory_Tests
|
||||
import project.Database.Main as Database_Tests
|
||||
|
||||
main = Test.Suite.run_main <|
|
||||
Column_Spec.spec
|
||||
Csv_Spec.spec
|
||||
Json_Spec.spec
|
||||
Spreadsheet_Spec.spec
|
||||
Table_Spec.spec
|
||||
Database_Spec.sqlite_spec
|
||||
Model_Spec.spec
|
||||
Aggregate_Column_Spec.spec
|
||||
Aggregate_Spec.spec
|
||||
In_Memory_Tests.in_memory_spec
|
||||
Database_Tests.databases_spec
|
||||
|
@ -638,7 +638,7 @@ spec =
|
||||
table_builder columns =
|
||||
Table.new <| columns.map description-> [description.at 0, description.at 2]
|
||||
|
||||
Common_Table_Spec.spec "" table_builder supports_case_sensitive_columns=True
|
||||
Common_Table_Spec.spec "[In-Memory] " table_builder supports_case_sensitive_columns=True
|
||||
|
||||
Test.group "Use First Row As Names" <|
|
||||
expect_column_names names table =
|
||||
|
Loading…
Reference in New Issue
Block a user