Adjust Database connection to use query/read to access data. (#3727)

Adjust Database connection API to align with new [design](https://github.com/enso-org/design/blob/wip/jd/database-read/epics/basic-libraries/database-read/design.md#querying-tables).
- `query` replaces the old `access_table` and is expanded to support raw SQL queries.
- `read` replaces `execute_query` and matches the API of `query`.
- `to_dataframe` is renamed to `read`.

# Important Notes
Added support for `++` to concatenate a Text without wrapping in a `SQL.Code`.
This commit is contained in:
James Dunkerley 2022-09-23 08:35:08 +01:00 committed by GitHub
parent 096fcfee82
commit 6f54e80970
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 269 additions and 268 deletions

View File

@ -200,6 +200,7 @@
find start and end of a period of time containing the provided time.][3695]
- [Implemented `work_days_until` for counting work dys between dates and
`add_work_days` which allows to shift a date by a number of work days.][3726]
- [Added `query` and `read` functions to Database connections.][3727]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -319,6 +320,7 @@
[3691]: https://github.com/enso-org/enso/pull/3691
[3695]: https://github.com/enso-org/enso/pull/3695
[3726]: https://github.com/enso-org/enso/pull/3726
[3727]: https://github.com/enso-org/enso/pull/3727
#### Enso Compiler

View File

@ -2,17 +2,19 @@ from Standard.Base import all
import Standard.Database.Internal.IR
import Standard.Database.Data.SQL
import Standard.Database.Data.SQL_Query
import Standard.Database.Data.Table as Database_Table
import Standard.Table.Data.Table as Materialized_Table
from Standard.Table import Column_Selector, Column_Name_Mapping
from Standard.Database.Data.SQL import SQL_Type, Statement
from Standard.Database.Data.SQL import SQL_Type, Statement, Statement_Data
from Standard.Database.Internal.JDBC_Connection import create_table_statement, handle_sql_errors
from Standard.Database.Internal.Result_Set import read_column, result_set_to_table
from Standard.Database.Errors import SQL_Error
polyglot java import java.lang.UnsupportedOperationException
polyglot java import java.util.UUID
type Connection
## PRIVATE
@ -32,17 +34,6 @@ type Connection
close : Nothing
close self = self.jdbc_connection.close
## UNSTABLE
Accesses a table within the database, returning an object that can be
used to create queries for that table.
Arguments:
- name: name of the table to access
access_table : Text -> Database_Table
access_table self name = handle_sql_errors <|
columns = self.fetch_columns name
Database_Table.make_table self name columns
## Returns the list of databases (or catalogs) for the connection.
databases : [Text]
@ -111,18 +102,43 @@ type Connection
if all_fields then renamed else
renamed.select_columns (Column_Selector.By_Name ["Database", "Schema", "Name", "Type", "Description"])
## ADVANCED
Executes a raw query and returns the result as an in-memory Table.
## Set up a query returning a Table object, which can be used to work with data within the database or load it into memory.
Arguments:
- query: either raw SQL code as Text or an instance of SQL.Statement
representing the query to execute.
- expected_types: an optional array of expected types of each column;
meant only for internal use.
execute_query : Text | Statement -> Vector SQL_Type -> Materialized_Table
execute_query self query expected_types=Nothing =
self.jdbc_connection.with_prepared_statement query stmt->
- query: name of the table or sql statement to query.
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
- alias: optionally specify a friendly alias for the query.
query : Text | SQL_Query -> Text -> Database_Table
query self query alias="" = handle_sql_errors <| case query of
Text ->
self.query alias=alias <|
if self.tables.at 'Name' . to_vector . contains query then (SQL_Query.Table_Name query) else
SQL_Query.Raw_SQL query
SQL_Query.Raw_SQL raw_sql ->
columns = self.jdbc_connection.fetch_columns raw_sql
name = if alias == "" then (UUID.randomUUID.to_text) else alias
ctx = IR.context_for_query raw_sql name
Database_Table.make_table self name columns ctx
SQL_Query.Table_Name name ->
ctx = IR.context_for_table name (if alias == "" then name else alias)
columns = self.jdbc_connection.fetch_columns (self.dialect.generate_sql (IR.Select_All ctx))
Database_Table.make_table self name columns ctx
## Execute the query and load the results into memory as a Table.
Arguments:
- query: name of the table or sql statement to query.
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
- limit: the maximum number of rows to return.
read : Text | SQL_Query -> Text -> Integer | Nothing -> Table
read self query limit=Nothing =
self.query query . read max_rows=limit
## PRIVATE
Internal read function for a statement with optional types.
read_statement : Statement -> (Nothing | Vector SQL_Type) -> Table
read_statement self statement expected_types=Nothing =
self.jdbc_connection.with_prepared_statement statement stmt->
result_set_to_table stmt.executeQuery expected_types
## ADVANCED
@ -140,20 +156,6 @@ type Connection
Panic.catch UnsupportedOperationException stmt.executeLargeUpdate _->
stmt.executeUpdate
## PRIVATE
A helper function that fetches column names and sql types associated with
them for a table in the database.
Arguments:
- table_name: The name of the table to fetch the column metadata for.
# fetch_columns : Text -> Vector [Text, SQL_Type]
fetch_columns : Text -> Vector Any
fetch_columns self table_name =
query = IR.Select_All (IR.context_for_table table_name)
compiled = self.dialect.generate_sql query
self.jdbc_connection.fetch_columns compiled
## PRIVATE
UNSTABLE
This is a prototype function used in our test suites. It may change.
@ -176,7 +178,7 @@ type Connection
create_sql = create_table_statement name table temporary
create_table = self.execute_update create_sql
db_table = if create_table.is_error then create_table else self.access_table name
db_table = if create_table.is_error then create_table else self.query (SQL_Query.Table_Name name)
if db_table.is_error.not then
pairs = db_table.internal_columns.map col->[col.name, IR.Constant col.sql_type Nothing]
insert_query = self.dialect.generate_sql <| IR.Insert name pairs

View File

@ -74,10 +74,9 @@ type Column
Arguments:
- max_rows: specifies a maximum amount of rows to fetch; if not set, all
available rows are fetched.
to_dataframe : (Nothing | Integer) -> Materialized_Column.Column
to_dataframe self max_rows=Nothing =
df = self.to_table.to_dataframe max_rows
df.at self.name
read : (Nothing | Integer) -> Materialized_Column.Column
read self max_rows=Nothing =
self.to_table.read max_rows . at self.name
## UNSTABLE
@ -87,9 +86,7 @@ type Column
## We remove the index to avoid fetching index data that will not be
used anyway when constructing the raw Vector.
without_ix = self.to_table.set_index []
df = without_ix . to_dataframe
raw_column = df.at self.name
raw_column.to_vector
without_ix . read . at self.name . to_vector
## UNSTABLE

View File

@ -278,7 +278,9 @@ type Builder
Arguments:
- other: The code fragment to append to `self`.
++ : Builder -> Builder
++ self other = Builder_Data (self.fragments ++ other.fragments)
++ self other = case other of
Text -> if other == "" then self else Builder_Data (self.fragments ++ (code other).fragments)
_ -> Builder_Data (self.fragments ++ other.fragments)
## UNSTABLE
@ -298,10 +300,7 @@ type Builder
Wraps the code fragment in parentheses.
paren : Builder
paren self =
l = code "("
r = code ")"
l ++ self ++ r
paren self = code "(" ++ self ++ ")"
## UNSTABLE

View File

@ -0,0 +1,8 @@
from Standard.Base import Text
type SQL_Query
## Query a whole table or view.
Table_Name name:Text
## Raw SQL query statement.
Raw_SQL sql:Text

View File

@ -4,6 +4,7 @@ from Standard.Base.Error.Problem_Behavior import Report_Warning
import Standard.Database.Internal.Helpers
import Standard.Database.Internal.Aggregate_Helper
import Standard.Database.Internal.IR
from Standard.Database.Data.SQL_Query import Raw_SQL
from Standard.Database.Data.SQL import Statement, SQL_Type
import Standard.Table.Data.Column as Materialized_Column
@ -49,7 +50,7 @@ type Table
- format_terminal: whether ANSI-terminal formatting should be used
display : Integer -> Boolean -> Text
display self show_rows=10 format_terminal=False =
df = self.reset_index.to_dataframe max_rows=show_rows
df = self.reset_index.read max_rows=show_rows
indices_count = self.context.meta_index.length
all_rows_count = self.row_count
display_dataframe df indices_count all_rows_count format_terminal
@ -401,7 +402,7 @@ type Table
result as one could expect if the limit was applied before the filters.
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . limit 5
t2 = t1.where (t1.at 'A' > 5)
t2.to_dataframe
t2.read
limit : Integer -> Table
limit self max_rows =
new_ctx = self.context.set_limit max_rows
@ -715,13 +716,13 @@ type Table
self.updated_context_and_columns new_ctx new_columns
## Parsing values is not supported in database tables, the table has to be
materialized first with `to_dataframe`.
loaded into memory first with `read`.
parse_values : Data_Formatter -> (Nothing | [Column_Type_Selection]) -> Problem_Behavior -> Table
parse_values self value_formatter=Data_Formatter column_types=Nothing on_problems=Report_Warning =
## Avoid unused arguments warning. We cannot rename arguments to `_`,
because we need to keep the API consistent with the in-memory table.
_ = [value_formatter, column_types, on_problems]
msg = "Parsing values is not supported in database tables, the table has to be materialized first with `to_dataframe`."
msg = "Parsing values is not supported in database tables, the table has to be materialized first with `read`."
Error.throw (Unsupported_Database_Operation_Error_Data msg)
## UNSTABLE
@ -746,7 +747,7 @@ type Table
cols = self.internal_columns.map (c -> [c.name, make_count_expr c.expression])
query = IR.Select [[all_rows_column_name, rows_expr]]+cols self.context
sql = self.connection.dialect.generate_sql query
table = self.connection.execute_query sql
table = self.connection.read_statement sql
all_rows = table.at all_rows_column_name . at 0
kept_columns = self.internal_columns . filter c->
all_rows == table.at c.name . at 0
@ -765,7 +766,7 @@ type Table
new_ctx = IR.subquery_as_ctx setup.first
query = IR.Select [[column_name, expr]] new_ctx
sql = self.connection.dialect.generate_sql query
table = self.connection.execute_query sql
table = self.connection.read_statement sql
table.at column_name . at 0
## UNSTABLE
@ -775,8 +776,8 @@ type Table
Arguments:
- max_rows: specifies a maximum amount of rows to fetch; if not set, all
available rows are fetched.
to_dataframe : (Integer | Nothing) -> Materialized_Table.Table
to_dataframe self max_rows=Nothing =
read : (Integer | Nothing) -> Materialized_Table.Table
read self max_rows=Nothing =
case self.context.meta_index.length > 1 of
True -> Error.throw <| Illegal_State_Error_Data "Multi-indexes are not implemented in the dataframes, if you want to materialize such a Table, remove the index first using `set_index`."
False ->
@ -788,7 +789,7 @@ type Table
False ->
sql = preprocessed.to_sql
expected_types = preprocessed.internal_columns.map .sql_type
table = self.connection.execute_query sql expected_types
table = self.connection.read_statement sql expected_types
case self.context.meta_index.length == 1 of
False -> table
True ->
@ -834,7 +835,7 @@ type Table
[column.name, IR.Operation "COUNT" [column.expression]]
query = IR.Select new_columns new_ctx
self.connection.dialect.generate_sql query
count_table = self.connection.execute_query count_query
count_table = self.connection.read_statement count_query
counts = if cols.is_empty then [] else count_table.columns.map c-> c.at 0
types = cols.map c-> c.sql_type.name
Materialized_Table.new [["Column", cols.map .name], ["Items Count", counts], ["SQL Type", types]] . set_index "Column"
@ -905,7 +906,7 @@ type Table
insert self values =
table_name = case self.context.from_spec of
IR.From_Table name _ -> name
_ -> Error.throw <| Illegal_State_Error_Data "Inserting can only be performed on tables as returned by `access_table`, any further processing is not allowed."
_ -> Error.throw <| Illegal_State_Error_Data "Inserting can only be performed on tables as returned by `query`, any further processing is not allowed."
# TODO [RW] before removing the PRIVATE tag, add a check that no bad stuff was done to the table as described above
pairs = self.internal_columns.zip values col-> value->
[col.name, IR.Constant col.sql_type value]
@ -968,12 +969,12 @@ type Table
example_to_csv =
connection = Database.connect (SQLite (File.new "db.sqlite"))
table = connection.access_table "Table"
table = connection.query (Table_Name "Table")
table.write (enso_project.data / "example_csv_output.csv")
write : File|Text -> File_Format -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | IO_Error
write self path format=Auto_Detect on_existing_file=Existing_File_Behavior.Backup match_columns=Match_Columns.By_Name on_problems=Report_Warning =
# TODO This should ideally be done in a streaming manner, or at least respect the row limits.
self.to_dataframe.write path format on_existing_file match_columns on_problems
self.read.write path format on_existing_file match_columns on_problems
type Integrity_Error
@ -1000,10 +1001,10 @@ type Integrity_Error
- connection: The connection to a database.
- table_name: The name of the table to get.
- columns: The names of the columns to get.
# make_table : Connection -> Text -> Vector [Text, SQL_Type] -> Table
make_table : Connection -> Text -> Vector -> Table
make_table connection table_name columns =
ctx = IR.context_for_table table_name
- ctx: The context to use for the table.
# make_table : Connection -> Text -> Vector [Text, SQL_Type] -> IR.Context -> Table
make_table : Connection -> Text -> Vector -> IR.Context -> Table
make_table connection table_name columns ctx =
cols = columns.map (p -> Internal_Column_Data p.first p.second (IR.Column table_name p.first))
Table_Data table_name connection cols ctx

View File

@ -46,8 +46,8 @@ make_binary_op name =
arguments ->
case arguments.length == 2 of
True ->
op = code " "+name+" "
(arguments.at 0)++op++(arguments.at 1) . paren
op = " " + name + " "
((arguments.at 0) ++ op ++ (arguments.at 1)).paren
False ->
Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation " + name)
@ -110,7 +110,7 @@ make_right_unary_op name =
arguments ->
case arguments.length == 1 of
True ->
(arguments.at 0)++(code " "+name) . paren
(arguments.at 0)++(" " + name) . paren
False ->
Error.throw ("Invalid amount of arguments for operation " + name)
@ -181,8 +181,7 @@ base_dialect =
generate_expression : Internal_Dialect -> IR.Expression -> Builder
generate_expression dialect expr = case expr of
IR.Column origin name ->
dot = code '.'
dialect.wrap_identifier origin ++ dot ++ dialect.wrap_identifier name
dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name
IR.Constant sql_type value -> SQL.interpolation sql_type value
IR.Operation kind arguments ->
op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error kind)
@ -214,6 +213,8 @@ generate_from_part : Internal_Dialect -> From_Spec -> Builder
generate_from_part dialect from_spec = case from_spec of
IR.From_Table name as_name ->
dialect.wrap_identifier name ++ alias dialect as_name
IR.From_Query raw_sql as_name ->
code raw_sql . paren ++ alias dialect as_name
IR.Join kind left_spec right_spec on ->
left = generate_from_part dialect left_spec
right = generate_from_part dialect right_spec
@ -223,7 +224,7 @@ generate_from_part dialect from_spec = case from_spec of
IR.Join_Right -> "RIGHT JOIN"
IR.Join_Cross -> "CROSS JOIN"
ons = SQL.join " AND " (on.map (generate_expression dialect)) . prefix_if_present " ON "
left ++ (code " "+prefix+" ") ++ right ++ ons
left ++ (" " + prefix + " ") ++ right ++ ons
IR.Sub_Query columns context as_name ->
sub = generate_query dialect (IR.Select columns context)
sub.paren ++ alias dialect as_name
@ -238,15 +239,15 @@ generate_from_part dialect from_spec = case from_spec of
generate_order : Internal_Dialect -> Order_Descriptor -> Builder
generate_order dialect order_descriptor =
order_suffix = case order_descriptor.direction of
Sort_Direction.Ascending -> code " ASC"
Sort_Direction.Descending -> code " DESC"
Sort_Direction.Ascending -> " ASC"
Sort_Direction.Descending -> " DESC"
nulls_suffix = case order_descriptor.nulls_order of
Nothing -> SQL.empty
IR.Nulls_First -> code " NULLS FIRST"
IR.Nulls_Last -> code " NULLS LAST"
Nothing -> ""
IR.Nulls_First -> " NULLS FIRST"
IR.Nulls_Last -> " NULLS LAST"
collation = case order_descriptor.collation of
Nothing -> SQL.empty
collation_name -> code ' COLLATE "'+collation_name+'"'
Nothing -> ""
collation_name -> ' COLLATE "' + collation_name + '"'
base_expression = generate_expression dialect order_descriptor.expression
base_expression ++ collation ++ order_suffix ++ nulls_suffix
@ -265,8 +266,8 @@ generate_select_context dialect ctx =
where_part = (SQL.join " AND " (gen_exprs ctx.where_filters)) . prefix_if_present " WHERE "
group_part = (SQL.join ", " (gen_exprs ctx.groups)) . prefix_if_present " GROUP BY "
limit_part = case ctx.limit of
Nothing -> SQL.empty
Integer -> code " LIMIT "+ctx.limit.to_text
Nothing -> ""
Integer -> " LIMIT " + ctx.limit.to_text
orders = ctx.orders.map (generate_order dialect)
order_part = (SQL.join ", " orders) . prefix_if_present " ORDER BY "
@ -287,7 +288,7 @@ generate_insert_query dialect table_name pairs =
values = SQL.join ", " <| pairs.map (.second >> generate_expression dialect)
into = dialect.wrap_identifier table_name
code "INSERT INTO " ++ into ++ code " (" ++ names ++ code ") VALUES (" ++ values ++ code ")"
code "INSERT INTO " ++ into ++ " (" ++ names ++ ") VALUES (" ++ values ++ ")"
## PRIVATE
@ -301,9 +302,9 @@ generate_query dialect query = case query of
IR.Select columns ctx ->
gen_column pair = (generate_expression dialect pair.second) ++ alias dialect pair.first
cols = SQL.join ", " (columns.map gen_column)
(code "SELECT ") ++ cols ++ generate_select_context dialect ctx
code "SELECT " ++ cols ++ generate_select_context dialect ctx
IR.Select_All ctx ->
(code "SELECT * ") ++ generate_select_context dialect ctx
code "SELECT * " ++ generate_select_context dialect ctx
IR.Insert table_name pairs ->
generate_insert_query dialect table_name pairs
_ -> Error.throw <| Unsupported_Database_Operation_Error "Unsupported query type."
@ -337,19 +338,19 @@ make_concat make_raw_concat_expr make_contains_expr has_quote args =
separator = args.at 1
prefix = args.at 2
suffix = args.at 3
append = code " || "
append = " || "
possibly_quoted = case has_quote of
True ->
quote = args.at 4
includes_separator = separator ++ code " != '' AND " ++ make_contains_expr expr separator
includes_separator = separator ++ " != '' AND " ++ make_contains_expr expr separator
## We use the assumption that `has_quote` is True iff `quote` is not empty.
includes_quote = make_contains_expr expr quote
is_empty = expr ++ code " = ''"
needs_quoting = includes_separator.paren ++ code " OR " ++ includes_quote.paren ++ code " OR " ++ is_empty.paren
escaped = code "replace(" ++ expr ++ code ", " ++ quote ++ code ", " ++ quote ++ append ++ quote ++ code ")"
is_empty = expr ++ " = ''"
needs_quoting = includes_separator.paren ++ " OR " ++ includes_quote.paren ++ " OR " ++ is_empty.paren
escaped = code "replace(" ++ expr ++ ", " ++ quote ++ ", " ++ quote ++ append ++ quote ++ ")"
quoted = quote ++ append ++ escaped ++ append ++ quote
code "CASE WHEN " ++ needs_quoting ++ code " THEN " ++ quoted ++ code " ELSE " ++ expr ++ code " END"
code "CASE WHEN " ++ needs_quoting ++ " THEN " ++ quoted ++ " ELSE " ++ expr ++ " END"
False -> expr
transformed_expr = code "CASE WHEN " ++ expr ++ code " IS NULL THEN '' ELSE " ++ possibly_quoted.paren ++ code " END"
transformed_expr = code "CASE WHEN " ++ expr ++ " IS NULL THEN '' ELSE " ++ possibly_quoted.paren ++ " END"
concatenated = make_raw_concat_expr transformed_expr separator
prefix.paren ++ append ++ concatenated ++ append ++ suffix.paren

View File

@ -80,7 +80,7 @@ type JDBC_Connection
Given a prepared statement, gets the column names and types for the
result set.
fetch_columns : PreparedStatement -> Any
fetch_columns : Text | Statement -> Any
fetch_columns self statement =
self.with_prepared_statement statement stmt->
metadata = stmt.executeQuery.getMetaData
@ -183,9 +183,9 @@ create_table_statement name table temporary =
column_types = table.columns.map col-> default_storage_type col.storage_type
column_names = table.columns.map .name
col_makers = column_names.zip column_types name-> typ->
Base_Generator.wrap_in_quotes name ++ SQL.code " " ++ SQL.code typ.name
create_prefix = if temporary then "CREATE TEMPORARY TABLE " else "CREATE TABLE "
(SQL.code create_prefix ++ Base_Generator.wrap_in_quotes name ++ SQL.code " (" ++ (SQL.join ", " col_makers) ++ SQL.code ")").build
Base_Generator.wrap_in_quotes name ++ " " ++ typ.name
create_prefix = SQL.code <| if temporary then "CREATE TEMPORARY TABLE " else "CREATE TABLE "
(create_prefix ++ Base_Generator.wrap_in_quotes name ++ " (" ++ (SQL.join ", " col_makers) ++ ")").build
## PRIVATE
Returns the default database type corresponding to an in-memory storage type.

View File

@ -1,6 +1,7 @@
from Standard.Base import all
from Standard.Database.Data.SQL import SQL_Type, Statement
import Standard.Database.Data.SQL_Query
import Standard.Database.Internal.JDBC_Connection
import Standard.Database.Data.Dialect
@ -31,16 +32,6 @@ type Postgres_Connection
close : Nothing
close self = self.connection.close
## UNSTABLE
Accesses a table within the database, returning an object that can be
used to create queries for that table.
Arguments:
- name: name of the table to access
access_table : Text -> Database_Table
access_table self name = self.connection.access_table name
## Returns the list of databases (or catalogs) for the connection.
databases : [Text]
databases self =
@ -94,19 +85,29 @@ type Postgres_Connection
tables self name_like=Nothing database=self.database schema=self.schema types=Nothing all_fields=False =
self.connection.tables name_like database schema types all_fields
## ADVANCED
Executes a raw query and returns the result as an in-memory Table.
## Set up a query returning a Table object, which can be used to work with data within the database or load it into memory.
Arguments:
- query: either raw SQL code as Text or an instance of
.Statement
representing the query to execute.
- expected_types: an optional array of expected types of each column;
meant only for internal use.
execute_query : Text | Statement -> Vector SQL_Type -> Materialized_Table
execute_query self query expected_types=Nothing =
self.connection.execute_query query expected_types
- query: name of the table or sql statement to query.
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
- alias: optionally specify a friendly alias for the query.
query : Text | SQL_Query -> Text -> Database_Table
query self query alias="" = self.connection.query query alias
## Execute the query and load the results into memory as a Table.
Arguments:
- query: name of the table or sql statement to query.
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
- limit: the maximum number of rows to return.
read : Text | SQL_Query -> Integer | Nothing -> Table
read self query limit=Nothing = self.connection.read query limit
## PRIVATE
Internal read function for a statement with optional types.
read_statement : Statement -> (Nothing | Vector SQL_Type) -> Table
read_statement self statement expected_types=Nothing =
self.connection.read_statement statement expected_types
## ADVANCED
@ -121,18 +122,6 @@ type Postgres_Connection
execute_update self query =
self.connection.execute_update query
## PRIVATE
A helper function that fetches column names and sql types associated with
them for a table in the database.
Arguments:
- table_name: The name of the table to fetch the column metadata for.
# fetch_columns : Text -> Vector [Text, SQL_Type]
fetch_columns : Text -> Vector Any
fetch_columns self table_name =
self.connection.fetch_columns table_name
## PRIVATE
UNSTABLE
This is a prototype function used in our test suites. It may change.

View File

@ -99,42 +99,42 @@ resolve_target_sql_type aggregate = case aggregate of
## PRIVATE
agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg->
code "COUNT(CASE WHEN " ++ arg.paren ++ code " IS NULL THEN 1 END)"
code "COUNT(CASE WHEN " ++ arg.paren ++ " IS NULL THEN 1 END)"
## PRIVATE
agg_count_empty = Base_Generator.lift_unary_op "COUNT_EMPTY" arg->
code "COUNT(CASE WHEN (" ++ arg.paren ++ code " IS NULL) OR (" ++ arg.paren ++ code " = '') THEN 1 END)"
code "COUNT(CASE WHEN (" ++ arg.paren ++ " IS NULL) OR (" ++ arg.paren ++ " = '') THEN 1 END)"
## PRIVATE
agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg->
code "COUNT(CASE WHEN (" ++ arg.paren ++ code " IS NOT NULL) AND (" ++ arg.paren ++ code " != '') THEN 1 END)"
code "COUNT(CASE WHEN (" ++ arg.paren ++ " IS NOT NULL) AND (" ++ arg.paren ++ " != '') THEN 1 END)"
## PRIVATE
agg_median = Base_Generator.lift_unary_op "MEDIAN" arg->
median = code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ code ")"
median = code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ ")"
## TODO Technically, this check may not be necessary if the input column has
type INTEGER, because it is impossible to represent a NaN in that type.
However, currently the column type inference is not tested well-enough to
rely on this, so leaving an uniform approach regardless of type. This
could be revisited when further work on column types takes place.
See issue: https://www.pivotaltracker.com/story/show/180854759
has_nan = code "bool_or(" ++ arg ++ code " = double precision 'NaN')"
code "CASE WHEN " ++ has_nan ++ code " THEN 'NaN' ELSE " ++ median ++ code " END"
has_nan = code "bool_or(" ++ arg ++ " = double precision 'NaN')"
code "CASE WHEN " ++ has_nan ++ " THEN 'NaN' ELSE " ++ median ++ " END"
## PRIVATE
agg_mode = Base_Generator.lift_unary_op "MODE" arg->
code "mode() WITHIN GROUP (ORDER BY " ++ arg ++ code ")"
code "mode() WITHIN GROUP (ORDER BY " ++ arg ++ ")"
agg_percentile = Base_Generator.lift_binary_op "PERCENTILE" p-> expr->
percentile = code "percentile_cont(" ++ p ++ code ") WITHIN GROUP (ORDER BY " ++ expr ++ code ")"
percentile = code "percentile_cont(" ++ p ++ ") WITHIN GROUP (ORDER BY " ++ expr ++ ")"
## TODO Technically, this check may not be necessary if the input column has
type INTEGER, because it is impossible to represent a NaN in that type.
However, currently the column type inference is not tested well-enough to
rely on this, so leaving an uniform approach regardless of type. This
could be revisited when further work on column types takes place.
See issue: https://www.pivotaltracker.com/story/show/180854759
has_nan = code "bool_or(" ++ expr ++ code " = double precision 'NaN')"
code "CASE WHEN " ++ has_nan ++ code " THEN 'NaN' ELSE " ++ percentile ++ code " END"
has_nan = code "bool_or(" ++ expr ++ " = double precision 'NaN')"
code "CASE WHEN " ++ has_nan ++ " THEN 'NaN' ELSE " ++ percentile ++ " END"
## PRIVATE
These are written in a not most-efficient way, but a way that makes them
@ -152,31 +152,31 @@ make_first_aggregator reverse ignore_null args =
result_expr = args.head
order_bys = args.tail
filter_clause = if ignore_null.not then code "" else
code " FILTER (WHERE " ++ result_expr.paren ++ code " IS NOT NULL)"
filter_clause = if ignore_null.not then "" else
code " FILTER (WHERE " ++ result_expr.paren ++ " IS NOT NULL)"
order_clause =
code " ORDER BY " ++ SQL.join "," order_bys
index_expr = case reverse of
True -> if ignore_null.not then code "COUNT(*)" else
code "COUNT(" ++ result_expr ++ code ")"
False -> code "1"
True -> if ignore_null.not then "COUNT(*)" else
code "COUNT(" ++ result_expr ++ ")"
False -> "1"
code "(array_agg(" ++ result_expr.paren ++ order_clause ++ code ")" ++ filter_clause ++ code ")[" ++ index_expr ++ code "]"
code "(array_agg(" ++ result_expr.paren ++ order_clause ++ ")" ++ filter_clause ++ ")[" ++ index_expr ++ "]"
agg_shortest = Base_Generator.lift_unary_op "SHORTEST" arg->
order_clause =
code " ORDER BY char_length(" ++ arg ++ code ") ASC NULLS LAST"
code "(array_agg(" ++ arg.paren ++ order_clause ++ code "))[1]"
code " ORDER BY char_length(" ++ arg ++ ") ASC NULLS LAST"
code "(array_agg(" ++ arg.paren ++ order_clause ++ "))[1]"
agg_longest = Base_Generator.lift_unary_op "LONGEST" arg->
order_clause =
code " ORDER BY char_length(" ++ arg ++ code ") DESC NULLS LAST"
code "(array_agg(" ++ arg.paren ++ order_clause ++ code "))[1]"
code " ORDER BY char_length(" ++ arg ++ ") DESC NULLS LAST"
code "(array_agg(" ++ arg.paren ++ order_clause ++ "))[1]"
## PRIVATE
concat_ops =
make_raw_concat_expr expr separator =
code "string_agg(" ++ expr ++ code ", " ++ separator ++ code ")"
code "string_agg(" ++ expr ++ ", " ++ separator ++ ")"
concat = Base_Generator.make_concat make_raw_concat_expr make_contains_expr
[["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]]
@ -186,34 +186,34 @@ agg_count_distinct args = if args.is_empty then (Error.throw (Illegal_Argument_E
case args.length == 1 of
True ->
## A single null value will be skipped.
code "COUNT(DISTINCT " ++ args.first ++ code ")"
code "COUNT(DISTINCT " ++ args.first ++ ")"
False ->
## A tuple of nulls is not a null, so it will not be skipped - but
we want to ignore all-null columns. So we manually filter them
out.
count = code "COUNT(DISTINCT (" ++ SQL.join ", " args ++ code "))"
are_nulls = args.map arg-> arg.paren ++ code " IS NULL"
all_nulls_filter = code " FILTER (WHERE NOT (" ++ SQL.join " AND " are_nulls ++ code "))"
count = code "COUNT(DISTINCT (" ++ SQL.join ", " args ++ "))"
are_nulls = args.map arg-> arg.paren ++ " IS NULL"
all_nulls_filter = code " FILTER (WHERE NOT (" ++ SQL.join " AND " are_nulls ++ "))"
(count ++ all_nulls_filter).paren
## PRIVATE
agg_count_distinct_include_null args =
## If we always count as tuples, then even null fields are counted.
code "COUNT(DISTINCT (" ++ SQL.join ", " args ++ code ", 0))"
code "COUNT(DISTINCT (" ++ SQL.join ", " args ++ ", 0))"
## PRIVATE
starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub->
res = str ++ (code " LIKE CONCAT(") ++ sub ++ (code ", '%')")
res = str ++ " LIKE CONCAT(" ++ sub ++ ", '%')"
res.paren
## PRIVATE
ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub->
res = str ++ (code " LIKE CONCAT('%', ") ++ sub ++ (code ")")
res = str ++ " LIKE CONCAT('%', " ++ sub ++ ")"
res.paren
## PRIVATE
make_contains_expr expr substring =
code "position(" ++ substring ++ code " in " ++ expr ++ code ") > 0"
code "position(" ++ substring ++ " in " ++ expr ++ ") > 0"
## PRIVATE
contains = Base_Generator.lift_binary_op "contains" make_contains_expr

View File

@ -1,6 +1,7 @@
from Standard.Base import all
from Standard.Database.Data.SQL import SQL_Type, Statement
import Standard.Database.Data.SQL_Query
import Standard.Database.Internal.JDBC_Connection
import Standard.Database.Data.Dialect
@ -27,16 +28,6 @@ type SQLite_Connection
close : Nothing
close self = self.connection.close
## UNSTABLE
Accesses a table within the database, returning an object that can be
used to create queries for that table.
Arguments:
- name: name of the table to access
access_table : Text -> Database_Table
access_table self name = self.connection.access_table name
## Returns the list of databases (or catalogs) for the connection.
databases : [Text]
databases self = [Nothing]
@ -87,19 +78,29 @@ type SQLite_Connection
tables self name_like=Nothing database=self.database schema=self.schema types=Nothing all_fields=False =
self.connection.tables name_like database schema types all_fields
## ADVANCED
Executes a raw query and returns the result as an in-memory Table.
## Set up a query returning a Table object, which can be used to work with data within the database or load it into memory.
Arguments:
- query: either raw SQL code as Text or an instance of
.Statement
representing the query to execute.
- expected_types: an optional array of expected types of each column;
meant only for internal use.
execute_query : Text | Statement -> Vector SQL_Type -> Materialized_Table
execute_query self query expected_types=Nothing =
self.connection.execute_query query expected_types
- query: name of the table or sql statement to query.
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
- alias: optionally specify a friendly alias for the query.
query : Text | SQL_Query -> Text -> Database_Table
query self query alias="" = self.connection.query query alias
## Execute the query and load the results into memory as a Table.
Arguments:
- query: name of the table or sql statement to query.
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
- limit: the maximum number of rows to return.
read : Text | SQL_Query -> Integer | Nothing -> Table
read self query limit=Nothing = self.connection.read query limit
## PRIVATE
Internal read function for a statement with optional types.
read_statement : Statement -> (Nothing | Vector SQL_Type) -> Table
read_statement self statement expected_types=Nothing =
self.connection.read_statement statement expected_types
## ADVANCED
@ -114,18 +115,6 @@ type SQLite_Connection
execute_update self query =
self.connection.execute_update query
## PRIVATE
A helper function that fetches column names and sql types associated with
them for a table in the database.
Arguments:
- table_name: The name of the table to fetch the column metadata for.
# fetch_columns : Text -> Vector [Text, SQL_Type]
fetch_columns : Text -> Vector Any
fetch_columns self table_name =
self.connection.fetch_columns table_name
## PRIVATE
UNSTABLE
This is a prototype function used in our test suites. It may change.

View File

@ -107,31 +107,31 @@ unsupported name =
## PRIVATE
agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg->
code "COALESCE(SUM(" ++ arg.paren ++ code " IS NULL), 0)"
code "COALESCE(SUM(" ++ arg.paren ++ " IS NULL), 0)"
## PRIVATE
agg_count_empty = Base_Generator.lift_unary_op "COUNT_EMPTY" arg->
code "COALESCE(SUM((" ++ arg.paren ++ code " IS NULL) OR (" ++ arg.paren ++ code " == '')), 0)"
code "COALESCE(SUM((" ++ arg.paren ++ " IS NULL) OR (" ++ arg.paren ++ " == '')), 0)"
## PRIVATE
agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg->
code "COALESCE(SUM((" ++ arg.paren ++ code " IS NOT NULL) AND (" ++ arg.paren ++ code " != '')), 0)"
code "COALESCE(SUM((" ++ arg.paren ++ " IS NOT NULL) AND (" ++ arg.paren ++ " != '')), 0)"
## PRIVATE
agg_stddev_pop = Base_Generator.lift_unary_op "STDDEV_POP" arg->
sum_of_squares = code "SUM(" ++ arg.paren ++ code "*" ++ arg.paren ++ code ")"
square_of_sums = code "SUM(" ++ arg ++ code ") * SUM(" ++ arg ++ code ")"
n = code "CAST(COUNT(" ++ arg ++ code ") AS REAL)"
var = code "(" ++ sum_of_squares ++ code " - (" ++ square_of_sums ++ code " / " ++ n ++ code ")) / " ++ n
code "SQRT(" ++ var ++ code ")"
sum_of_squares = code "SUM(" ++ arg.paren ++ "*" ++ arg.paren ++ ")"
square_of_sums = code "SUM(" ++ arg ++ ") * SUM(" ++ arg ++ ")"
n = code "CAST(COUNT(" ++ arg ++ ") AS REAL)"
var = code "(" ++ sum_of_squares ++ " - (" ++ square_of_sums ++ " / " ++ n ++ ")) / " ++ n
code "SQRT(" ++ var ++ ")"
## PRIVATE
agg_stddev_samp = Base_Generator.lift_unary_op "STDDEV_SAMP" arg->
sum_of_squares = code "SUM(" ++ arg.paren ++ code "*" ++ arg.paren ++ code ")"
square_of_sums = code "SUM(" ++ arg ++ code ") * SUM(" ++ arg ++ code ")"
n = code "CAST(COUNT(" ++ arg ++ code ") AS REAL)"
var = code "(" ++ sum_of_squares ++ code " - (" ++ square_of_sums ++ code " / " ++ n ++ code ")) / (" ++ n ++ code " - 1)"
code "SQRT(" ++ var ++ code ")"
sum_of_squares = code "SUM(" ++ arg.paren ++ "*" ++ arg.paren ++ ")"
square_of_sums = code "SUM(" ++ arg ++ ") * SUM(" ++ arg ++ ")"
n = code "CAST(COUNT(" ++ arg ++ ") AS REAL)"
var = code "(" ++ sum_of_squares ++ " - (" ++ square_of_sums ++ " / " ++ n ++ ")) / (" ++ n ++ " - 1)"
code "SQRT(" ++ var ++ ")"
## PRIVATE
This is a prototype that doesn't work correctly. Left for reference for
@ -152,45 +152,45 @@ window_aggregate window_type ignore_null args =
order_exprs = args.tail
filter_clause = if ignore_null.not then code "" else
code " FILTER (WHERE " ++ result_expr.paren ++ code " IS NOT NULL)"
code " FILTER (WHERE " ++ result_expr.paren ++ " IS NOT NULL)"
code window_type+"(" ++ result_expr ++ code ")" ++ filter_clause ++ code " OVER (ORDER BY " ++ SQL.join "," order_exprs ++ code ")"
code window_type+"(" ++ result_expr ++ ")" ++ filter_clause ++ " OVER (ORDER BY " ++ SQL.join "," order_exprs ++ ")"
## PRIVATE
concat_ops =
make_raw_concat_expr expr separator =
code "group_concat(" ++ expr ++ code ", " ++ separator ++ code ")"
code "group_concat(" ++ expr ++ ", " ++ separator ++ ")"
concat = Base_Generator.make_concat make_raw_concat_expr make_contains_expr
[["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]]
## PRIVATE
agg_count_distinct args = case args.length == 1 of
True -> code "COUNT(DISTINCT (" ++ args.first ++ code "))"
True -> code "COUNT(DISTINCT (" ++ args.first ++ "))"
False -> Error.throw (Illegal_Argument_Error_Data "COUNT_DISTINCT supports only single arguments in SQLite.")
## PRIVATE
agg_count_distinct_include_null args = case args.length == 1 of
True ->
arg = args.first
count = code "COUNT(DISTINCT " ++ arg ++ code ")"
all_nulls_case = code "CASE WHEN COUNT(CASE WHEN " ++ arg ++ code "IS NULL THEN 1 END) > 0 THEN 1 ELSE 0 END"
count ++ code " + " ++ all_nulls_case
count = code "COUNT(DISTINCT " ++ arg ++ ")"
all_nulls_case = code "CASE WHEN COUNT(CASE WHEN " ++ arg ++ "IS NULL THEN 1 END) > 0 THEN 1 ELSE 0 END"
count ++ " + " ++ all_nulls_case
False -> Error.throw (Illegal_Argument_Error_Data "COUNT_DISTINCT supports only single arguments in SQLite.")
## PRIVATE
starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub->
res = str ++ (code " LIKE (") ++ sub ++ (code " || '%')")
res = str ++ " LIKE (" ++ sub ++ " || '%')"
res.paren
## PRIVATE
ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub->
res = str ++ (code " LIKE ('%' || ") ++ sub ++ (code ")")
res = str ++ " LIKE ('%' || " ++ sub ++ ")"
res.paren
## PRIVATE
make_contains_expr expr substring =
code "instr(" ++ expr ++ code ", " ++ substring ++ code ") > 0"
code "instr(" ++ expr ++ ", " ++ substring ++ ") > 0"
## PRIVATE
contains = Base_Generator.lift_binary_op "contains" make_contains_expr

View File

@ -1,5 +1,6 @@
import Standard.Database.Data.Table
import Standard.Database.Data.Column
import Standard.Database.Data.SQL_Query
import Standard.Database.Connection.Database
import Standard.Database.Connection.Credentials
@ -24,3 +25,4 @@ from Standard.Database.Connection.Database export connect
from Standard.Database.Connection.Postgres_Options export Postgres_Options, Postgres
from Standard.Database.Connection.SQLite_Options export SQLite_Options, SQLite, In_Memory
from Standard.Database.Connection.Redshift_Options export Redshift_Options, Redshift, AWS_Profile, AWS_Key
from Standard.Database.Data.SQL_Query export SQL_Query, Table_Name, Raw_SQL

View File

@ -30,7 +30,7 @@ prepare_visualization x max_rows=1000 = Helpers.recover_errors <| case x of
Database_Table.Table_Data _ _ _ _ ->
# Materialize a table with indices as normal columns (because dataframe does not support multi-indexing).
df = x.reset_index.to_dataframe max_rows
df = x.reset_index.read max_rows
# Then split into actual columns and indices.
vis_df = df.select_columns (By_Name (x.columns.map .name))
indices = df.select_columns (By_Name (x.indices.map .name)) . columns

View File

@ -26,7 +26,7 @@ spec =
table3 = ["T3", [["A", int], ["E", bool], ["F", int]]]
tables = Map.from_vector [table1, table2, table3]
Fake_Test_Connection.make Dialect.sqlite tables
t1 = test_connection.access_table "T1"
t1 = test_connection.query (Table_Name "T1")
Test.group "[Codegen] JSON serialization" <|
Test.specify "should serialize Tables and Columns to their SQL representation" <|
q1 = t1.where (t1.at "A" == 42) . to_json
@ -100,8 +100,8 @@ spec =
c2.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" WHERE ("T1"."A" = "T1"."C")', []]
Test.group "[Codegen] Joining Tables" <|
t2 = test_connection.access_table "T2"
t3 = test_connection.access_table "T3"
t2 = test_connection.query (Table_Name "T2")
t3 = test_connection.query (Table_Name "T3")
Test.specify "should allow joining tables index-on-index" <|
r1 = t1.set_index 'A' . join (t2.set_index 'D')
r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B", "T1"."C" AS "C", "T2"."E" AS "E", "T2"."F" AS "F" FROM (SELECT "T1"."B" AS "B", "T1"."C" AS "C", "T1"."A" AS "A", "T1"."A" AS "A_1" FROM "T1" AS "T1") AS "T1" LEFT JOIN (SELECT "T2"."E" AS "E", "T2"."F" AS "F", "T2"."D" AS "D" FROM "T2" AS "T2") AS "T2" ON ("T1"."A_1" = "T2"."D")', []]
@ -124,8 +124,8 @@ spec =
table2 = ["T2", [["X", int], ["A", int], ["B", int]]]
tables = Map.from_vector [table1, table2]
Fake_Test_Connection.make Dialect.sqlite tables
t1 = connection.access_table "T1"
t2 = connection.access_table "T2"
t1 = connection.query (Table_Name "T1")
t2 = connection.query (Table_Name "T2")
(t1.set_index "X").join (t2.set_index "X") . should_fail_with Illegal_State_Error_Data
Test.specify "should ensure that name suffixes are distinct" <|

View File

@ -26,8 +26,8 @@ spec prefix connection pending=Nothing =
t1 = upload "T1" (Materialized_Table.new [["a", [1, 4]], ["b", [2, 5]], ["c", [3, 6]]])
Test.group prefix+"Basic Table Access" pending=pending <|
Test.specify "should allow to materialize tables and columns into local memory" <|
df = t1.to_dataframe
a = t1.at 'a' . to_dataframe
df = t1.read
a = t1.at 'a' . read
df.at 'a' . to_vector . should_equal [1, 4]
a.to_vector . should_equal [1, 4]
Test.specify "should allow to materialize columns directly into a Vector" <|
@ -35,7 +35,7 @@ spec prefix connection pending=Nothing =
v . should_equal [1, 4]
Test.specify "should preserve indexes when materializing tables" <|
# TODO add multi indexes when implemented
df = t1.set_index 'a' . to_dataframe
df = t1.set_index 'a' . read
df.at 'b' . to_vector . should_equal [2, 5]
df.columns.map .name . should_equal ['b', 'c']
ix = df.index
@ -44,7 +44,7 @@ spec prefix connection pending=Nothing =
Test.specify "should preserve indexes when materializing columns" <|
# TODO add multi indexes when implemented
b = t1.set_index 'a' . at 'b'
col = b . to_dataframe
col = b . read
col.to_vector . should_equal [2, 5]
ix = col.index
@ -56,14 +56,13 @@ spec prefix connection pending=Nothing =
ix2.to_vector . should_equal [1, 4]
Test.specify "should work correctly when there are no columns" <|
empty = t1.select_columns (By_Name [])
empty.to_dataframe.column_count . should_equal 0
empty.to_dataframe.row_count . should_equal empty.row_count
empty.read.column_count . should_equal 0
empty.read.row_count . should_equal empty.row_count
Test.specify "should handle bigger result sets" <|
n = 1000
original = Materialized_Table.new [["a", Vector.new n ix->ix], ["b", Vector.new n ix-> ix * 3.1415926], ["c", Vector.new n ix-> ix.to_text]]
table = upload "Big" original
materialized = table.to_dataframe
materialized.row_count . should_equal n
table.read.row_count . should_equal n
Test.group prefix+"Mapping Operations" pending=pending <|
t2 = upload "T2" <| Materialized_Table.new [["x", [1, 4, 5, Nothing]], ["y", [2, 3, 5, Nothing]], ["b", [False, False, True, Nothing]]]
@ -117,7 +116,7 @@ spec prefix connection pending=Nothing =
Test.group prefix+"Masking Tables" pending=pending <|
Test.specify "should allow to select rows from a table or column based on an expression" <|
t2 = t1.where (t1.at "a" == 1)
df = t2.to_dataframe
df = t2.read
df.at "a" . to_vector . should_equal [1]
df.at "b" . to_vector . should_equal [2]
df.at "c" . to_vector . should_equal [3]
@ -134,24 +133,24 @@ spec prefix connection pending=Nothing =
the Dataframes library, so it is independent of the library under
testing here.
Test.specify "should allow joining tables index-on-index" <|
r_1 = a.set_index 'x' . join (b.set_index 'w') . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z'])
r_1 = a.set_index 'x' . join (b.set_index 'w') . read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z'])
r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam']
r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo']
r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w'])
r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w'])
r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6]
r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3]
Test.specify "should allow joining tables column-on-index" <|
r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z'])
r_1 = a.join (b.set_index 'w') on='x' . read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z'])
r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam']
r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo']
r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w'])
r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w'])
r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6]
r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3]
Test.specify "should allow self-joins and append suffixes to disambiguate column names" <|
r_1 = a.join (a.set_index 'x') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x'])
r_1 = a.join (a.set_index 'x') on='x' . read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x'])
r_1.columns.map .name . should_equal ['x', 'y_left', 'y_right']
r_1.at 'x' . to_vector . should_equal [0, 1, 3, 6, 7]
expected_y = ['foo', 'bar', 'spam', 'eggs', 'baz']
@ -169,7 +168,7 @@ spec prefix connection pending=Nothing =
tb_2 = tb.set_index "id"
res = (tc.join ta_2 on="id_a") . join tb_2 on="id_b" left_suffix="_a" right_suffix="_b"
sel = res.select_columns (By_Name ["name_a", "name_b"])
df = sel.to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "name_a"])
df = sel.read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "name_a"])
df . at "name_a" . to_vector . should_equal ["Foo", "Hmm"]
df . at "name_b" . to_vector . should_equal ["Bar", "Hmm"]
@ -193,7 +192,7 @@ spec prefix connection pending=Nothing =
col.drop_missing.to_vector . should_equal [0, 1, 42]
Test.specify "drop_missing_rows should drop rows that contain at least one missing column in a Table" <|
d = t4.drop_missing_rows.to_dataframe
d = t4.drop_missing_rows.read
d.at 'a' . to_vector . should_equal [0]
d.at 'b' . to_vector . should_equal [True]
d.at 'c' . to_vector . should_equal [""]
@ -208,7 +207,7 @@ spec prefix connection pending=Nothing =
empty = t4.drop_missing_columns
empty.column_count . should_equal 0
empty.to_dataframe.column_count . should_equal 0
empty.read.column_count . should_equal 0
Test.group prefix+"Column-wide statistics" pending=pending <|
Test.specify 'should allow computing basic column-wide stats' <|
@ -289,7 +288,7 @@ spec prefix connection pending=Nothing =
col = t.index+10
vec = [11, 12, 13]
col.to_vector . should_equal vec
df_col = col.to_dataframe
df_col = col.read
df_col.to_vector . should_equal vec
df_col.index.to_vector . should_equal [1, 2, 3]
@ -327,13 +326,13 @@ spec prefix connection pending=Nothing =
returned in lower case.
aggregates = [Count "count", Count_Not_Nothing "price" "count not nothing price", Count_Nothing "price" "count nothing price"]
t1 = determinize_by "name" (t.aggregate ([Group_By "name"] + aggregates) . to_dataframe)
t1 = determinize_by "name" (t.aggregate ([Group_By "name"] + aggregates) . read)
t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"]
t1.at "count" . to_vector . should_equal [2, 1, 5, 1, 7]
t1.at "count not nothing price" . to_vector . should_equal [2, 1, 3, 0, 5]
t1.at "count nothing price" . to_vector . should_equal [0, 0, 2, 1, 2]
t2 = t.aggregate aggregates . to_dataframe
t2 = t.aggregate aggregates . read
t2.at "count" . to_vector . should_equal [16]
t2.at "count not nothing price" . to_vector . should_equal [11]
t2.at "count nothing price" . to_vector . should_equal [5]
@ -344,13 +343,13 @@ spec prefix connection pending=Nothing =
aggregates = [Sum "price" "sum price", Sum "quantity" "sum quantity", Average "price" "avg price"]
## TODO can check the datatypes
t1 = determinize_by "name" (t.aggregate ([Group_By "name" Nothing] + aggregates) . to_dataframe)
t1 = determinize_by "name" (t.aggregate ([Group_By "name" Nothing] + aggregates) . read)
t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"]
t1.at "sum price" . to_vector . should_equal [100.5, 6.7, 1, Nothing, 2]
t1.at "sum quantity" . to_vector . should_equal [80, 40, 120, 70, 2]
t1.at "avg price" . to_vector . should_equal [50.25, 6.7, (1/3), Nothing, (2/5)]
t2 = t.aggregate aggregates . to_dataframe
t2 = t.aggregate aggregates . read
t2.at "sum price" . to_vector . should_equal [110.2]
t2.at "sum quantity" . to_vector . should_equal [312]
t2.at "avg price" . to_vector . should_equal [(110.2 / 11)]

View File

@ -1,33 +1,45 @@
from Standard.Base import all
from Standard.Database import SQL_Query, Raw_SQL, Table_Name
import Standard.Table.Data.Table as Materialized_Table
import Standard.Database.Data.Table as Database_Table
from Standard.Database.Data.SQL import Statement, SQL_Type
import Standard.Database.Internal.IR
type Fake_Test_Connection
# type Fake_Test_Connection_Data (tables : Map Text (Vector [Text, SQL_Type])) (dialect : Text)
Fake_Test_Connection_Data tables dialect
## PRIVATE
access_table : Text -> Database_Table
access_table self name =
## Set up a query returning a Table object, which can be used to work with data within the database or load it into memory.
Arguments:
- query: name of the table or sql statement to query.
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
- alias: optionally specify a friendly alias for the query.
query : Text | SQL_Query -> Text -> Database_Table
query self query alias="" = case query of
Text -> self.query (Table_Name query) alias
Raw_SQL _ ->
Error.throw (Illegal_Argument_Error "Cannot query a fake connection with raw SQL")
Table_Name name ->
columns = self.tables.get name
Database_Table.make_table self name columns
Database_Table.make_table self name columns (IR.context_for_table name)
## Execute the query and load the results into memory as a Table.
Arguments:
- query: name of the table or sql statement to query.
If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
- alias: optionally specify a friendly alias for the query.
- limit: the maximum number of rows to return (default 1000).
read : Text | SQL_Query -> Text -> Integer | Nothing -> Table
read self _ _="" _=Nothing =
Error.throw "Materialization not supported on fake connection."
## PRIVATE
close : Nothing
close self = Nothing
## PRIVATE
explain_query_plan : Statement -> Text
explain_query_plan self _ =
Error.throw "Materialization not supported on fake connection."
## PRIVATE
execute_query : Text | Statement -> Materialized_Table
execute_query self _ =
Error.throw "Materialization not supported on fake connection."
## PRIVATE
execute_update : Text | Statement -> Integer
execute_update self _ =

View File

@ -39,14 +39,14 @@ postgres_specific_spec connection db_name pending =
Test.specify "should allow changing schema" <|
new_connection = connection.set_schema "information_schema"
new_schema = new_connection.execute_query "SELECT current_schema()" . at 0 . to_vector . first
new_schema = new_connection.read (Raw_SQL "SELECT current_schema()") . at 0 . to_vector . first
new_schema . should_equal "information_schema"
databases = connection.databases.filter d->((d!=db_name) && (d!='rdsadmin'))
pending_database = if databases.length != 0 then Nothing else "Cannot tests changing database unless two databases defined."
Test.specify "should allow changing database" pending=pending_database <|
new_connection = connection.set_database databases.first
new_database = new_connection.execute_query "SELECT current_database()" . at 0 . to_vector . first
new_database = new_connection.read (Raw_SQL "SELECT current_database()") . at 0 . to_vector . first
new_database . should_equal databases.first
Test.group "[PostgreSQL] Tables and Table Types" <|
@ -91,7 +91,7 @@ postgres_specific_spec connection db_name pending =
Test.group "[PostgreSQL] Info" pending=pending <|
tinfo = Name_Generator.random_name "Tinfo"
connection.execute_update 'CREATE TEMPORARY TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL, "doubles" DOUBLE PRECISION)'
t = connection.access_table tinfo
t = connection.query (Table_Name tinfo)
t.insert ["a", Nothing, False, 1.2, 0.000000000001]
t.insert ["abc", Nothing, Nothing, 1.3, Nothing]
t.insert ["def", 42, True, 1.4, 10]
@ -118,7 +118,7 @@ postgres_specific_spec connection db_name pending =
Test.group "[PostgreSQL] Table.aggregate should correctly infer result types" pending=pending <|
name = Name_Generator.random_name "Ttypes"
connection.execute_update 'CREATE TEMPORARY TABLE "'+name+'" ("txt" VARCHAR, "i1" SMALLINT, "i2" INT, "i3" BIGINT, "i4" NUMERIC, "r1" REAL, "r2" DOUBLE PRECISION, "bools" BOOLEAN)'
t = connection.access_table name
t = connection.query (Table_Name name)
Test.specify "Concatenate, Shortest and Longest" <|
r = t.aggregate [Concatenate "txt", Shortest "txt", Longest "txt"]
r.columns.at 0 . sql_type . should_equal SQL_Type.text
@ -169,7 +169,7 @@ run_tests connection db_name pending=Nothing =
table_names.each name->
sql = 'DROP TABLE "' + name + '"'
Panic.rethrow <| connection.execute_update sql
materialize = .to_dataframe
materialize = .read
Common_Spec.spec prefix connection pending=pending
postgres_specific_spec connection db_name pending=pending

View File

@ -17,7 +17,7 @@ redshift_specific_spec connection pending =
Test.group "[Redshift] Info" pending=pending <|
tinfo = Name_Generator.random_name "Tinfo"
connection.execute_update 'CREATE TEMPORARY TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)'
t = connection.access_table tinfo
t = connection.query (Table_Name tinfo)
t.insert ["a", Nothing, False, 1.2]
t.insert ["abc", Nothing, Nothing, 1.3]
t.insert ["def", 42, True, 1.4]
@ -51,7 +51,7 @@ run_tests connection pending=Nothing =
table_names.each name->
sql = 'DROP TABLE "' + name + '"'
Panic.rethrow <| connection.execute_update sql
materialize = .to_dataframe
materialize = .read
Common_Spec.spec prefix connection pending=pending
redshift_specific_spec connection pending=pending

View File

@ -67,17 +67,17 @@ sqlite_specific_spec connection =
Test.group "[SQLite] Error Handling" <|
Test.specify "should wrap errors" <|
connection.execute_query "foobar" . should_fail_with SQL_Error_Data
connection.read (Raw_SQL "foobar") . should_fail_with SQL_Error_Data
connection.execute_update "foobar" . should_fail_with SQL_Error_Data
action = connection.execute_query "SELECT A FROM undefined_table"
action = connection.read (Raw_SQL "SELECT A FROM undefined_table")
action . should_fail_with SQL_Error_Data
action.catch.to_text . should_equal "There was an SQL error: '[SQLITE_ERROR] SQL error or missing database (no such table: undefined_table)'. [Query was: SELECT A FROM undefined_table]"
Test.group "[SQLite] Metadata" <|
tinfo = Name_Generator.random_name "Tinfo"
connection.execute_update 'CREATE TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)'
t = connection.access_table tinfo
t = connection.query (Table_Name tinfo)
t.insert ["a", Nothing, False, 1.2]
t.insert ["abc", Nothing, Nothing, 1.3]
t.insert ["def", 42, True, 1.4]
@ -106,7 +106,7 @@ sqlite_spec connection prefix =
in_mem_table = Materialized_Table.new columns
connection.upload_table name in_mem_table
materialize = .to_dataframe
materialize = .read
Common_Spec.spec prefix connection
sqlite_specific_spec connection

View File

@ -9,7 +9,7 @@ import Standard.Test
visualization_spec connection =
connection.execute_update 'CREATE TABLE "T" ("A" VARCHAR, "B" INTEGER, "C" INTEGER)'
t = connection.access_table "T"
t = connection.query (Table_Name "T")
Test.group "SQL Visualization" <|
Test.specify "should provide type metadata for interpolations" <|
q = t.where ((t.at "B" == 2) && (t.at "A" == True)) . at "C"