From 6f54e809700f8beb1f5c7803aef35f2ebd1c24d2 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 23 Sep 2022 08:35:08 +0100 Subject: [PATCH] Adjust Database connection to use query/read to access data. (#3727) Adjust Database connection API to align with new [design](https://github.com/enso-org/design/blob/wip/jd/database-read/epics/basic-libraries/database-read/design.md#querying-tables). - `query` replaces the old `access_table` and is expanded to support raw SQL queries. - `read` replaces `execute_query` and matches the API of `query`. - `to_dataframe` is renamed to `read`. # Important Notes Added support for `++` to concatenate a Text without wrapping in a `SQL.Code`. --- CHANGELOG.md | 2 + .../0.0.0-dev/src/Connection/Connection.enso | 76 ++++++++++--------- .../Database/0.0.0-dev/src/Data/Column.enso | 11 +-- .../Database/0.0.0-dev/src/Data/SQL.enso | 9 +-- .../0.0.0-dev/src/Data/SQL_Query.enso | 8 ++ .../Database/0.0.0-dev/src/Data/Table.enso | 35 ++++----- .../src/Internal/Base_Generator.enso | 51 +++++++------ .../src/Internal/JDBC_Connection.enso | 8 +- .../Postgres/Postgres_Connection.enso | 55 ++++++-------- .../Internal/Postgres/Postgres_Dialect.enso | 58 +++++++------- .../Internal/SQLite/SQLite_Connection.enso | 55 ++++++-------- .../src/Internal/SQLite/SQLite_Dialect.enso | 46 +++++------ .../Standard/Database/0.0.0-dev/src/Main.enso | 2 + .../0.0.0-dev/src/Table/Visualization.enso | 2 +- .../src/Database/Codegen_Spec.enso | 10 +-- .../Table_Tests/src/Database/Common_Spec.enso | 43 +++++------ .../Helpers/Fake_Test_Connection.enso | 42 ++++++---- .../src/Database/Postgres_Spec.enso | 10 +-- .../src/Database/Redshift_Spec.enso | 4 +- .../Table_Tests/src/Database/SQLite_Spec.enso | 8 +- test/Visualization_Tests/src/SQL_Spec.enso | 2 +- 21 files changed, 269 insertions(+), 268 deletions(-) create mode 100644 distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Query.enso diff --git a/CHANGELOG.md b/CHANGELOG.md index e1d2f19d75..19cf4403eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -200,6 +200,7 @@ find start and end of a period of time containing the provided time.][3695] - [Implemented `work_days_until` for counting work dys between dates and `add_work_days` which allows to shift a date by a number of work days.][3726] +- [Added `query` and `read` functions to Database connections.][3727] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -319,6 +320,7 @@ [3691]: https://github.com/enso-org/enso/pull/3691 [3695]: https://github.com/enso-org/enso/pull/3695 [3726]: https://github.com/enso-org/enso/pull/3726 +[3727]: https://github.com/enso-org/enso/pull/3727 #### Enso Compiler diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso index b69f230ab7..4b7bf70931 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso @@ -2,17 +2,19 @@ from Standard.Base import all import Standard.Database.Internal.IR import Standard.Database.Data.SQL +import Standard.Database.Data.SQL_Query import Standard.Database.Data.Table as Database_Table import Standard.Table.Data.Table as Materialized_Table from Standard.Table import Column_Selector, Column_Name_Mapping -from Standard.Database.Data.SQL import SQL_Type, Statement +from Standard.Database.Data.SQL import SQL_Type, Statement, Statement_Data from Standard.Database.Internal.JDBC_Connection import create_table_statement, handle_sql_errors from Standard.Database.Internal.Result_Set import read_column, result_set_to_table from Standard.Database.Errors import SQL_Error polyglot java import java.lang.UnsupportedOperationException +polyglot java import java.util.UUID type Connection ## PRIVATE @@ -32,17 +34,6 @@ type Connection close : Nothing close self = self.jdbc_connection.close - ## UNSTABLE - - Accesses a table within the database, returning an object that can be - used to create queries for that table. - - Arguments: - - name: name of the table to access - access_table : Text -> Database_Table - access_table self name = handle_sql_errors <| - columns = self.fetch_columns name - Database_Table.make_table self name columns ## Returns the list of databases (or catalogs) for the connection. databases : [Text] @@ -111,18 +102,43 @@ type Connection if all_fields then renamed else renamed.select_columns (Column_Selector.By_Name ["Database", "Schema", "Name", "Type", "Description"]) - ## ADVANCED - - Executes a raw query and returns the result as an in-memory Table. + ## Set up a query returning a Table object, which can be used to work with data within the database or load it into memory. Arguments: - - query: either raw SQL code as Text or an instance of SQL.Statement - representing the query to execute. - - expected_types: an optional array of expected types of each column; - meant only for internal use. - execute_query : Text | Statement -> Vector SQL_Type -> Materialized_Table - execute_query self query expected_types=Nothing = - self.jdbc_connection.with_prepared_statement query stmt-> + - query: name of the table or sql statement to query. + If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query. + - alias: optionally specify a friendly alias for the query. + query : Text | SQL_Query -> Text -> Database_Table + query self query alias="" = handle_sql_errors <| case query of + Text -> + self.query alias=alias <| + if self.tables.at 'Name' . to_vector . contains query then (SQL_Query.Table_Name query) else + SQL_Query.Raw_SQL query + SQL_Query.Raw_SQL raw_sql -> + columns = self.jdbc_connection.fetch_columns raw_sql + name = if alias == "" then (UUID.randomUUID.to_text) else alias + ctx = IR.context_for_query raw_sql name + Database_Table.make_table self name columns ctx + SQL_Query.Table_Name name -> + ctx = IR.context_for_table name (if alias == "" then name else alias) + columns = self.jdbc_connection.fetch_columns (self.dialect.generate_sql (IR.Select_All ctx)) + Database_Table.make_table self name columns ctx + + ## Execute the query and load the results into memory as a Table. + + Arguments: + - query: name of the table or sql statement to query. + If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query. + - limit: the maximum number of rows to return. + read : Text | SQL_Query -> Text -> Integer | Nothing -> Table + read self query limit=Nothing = + self.query query . read max_rows=limit + + ## PRIVATE + Internal read function for a statement with optional types. + read_statement : Statement -> (Nothing | Vector SQL_Type) -> Table + read_statement self statement expected_types=Nothing = + self.jdbc_connection.with_prepared_statement statement stmt-> result_set_to_table stmt.executeQuery expected_types ## ADVANCED @@ -140,20 +156,6 @@ type Connection Panic.catch UnsupportedOperationException stmt.executeLargeUpdate _-> stmt.executeUpdate - ## PRIVATE - - A helper function that fetches column names and sql types associated with - them for a table in the database. - - Arguments: - - table_name: The name of the table to fetch the column metadata for. - # fetch_columns : Text -> Vector [Text, SQL_Type] - fetch_columns : Text -> Vector Any - fetch_columns self table_name = - query = IR.Select_All (IR.context_for_table table_name) - compiled = self.dialect.generate_sql query - self.jdbc_connection.fetch_columns compiled - ## PRIVATE UNSTABLE This is a prototype function used in our test suites. It may change. @@ -176,7 +178,7 @@ type Connection create_sql = create_table_statement name table temporary create_table = self.execute_update create_sql - db_table = if create_table.is_error then create_table else self.access_table name + db_table = if create_table.is_error then create_table else self.query (SQL_Query.Table_Name name) if db_table.is_error.not then pairs = db_table.internal_columns.map col->[col.name, IR.Constant col.sql_type Nothing] insert_query = self.dialect.generate_sql <| IR.Insert name pairs diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index a0769cb35c..a61a92cf93 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -74,10 +74,9 @@ type Column Arguments: - max_rows: specifies a maximum amount of rows to fetch; if not set, all available rows are fetched. - to_dataframe : (Nothing | Integer) -> Materialized_Column.Column - to_dataframe self max_rows=Nothing = - df = self.to_table.to_dataframe max_rows - df.at self.name + read : (Nothing | Integer) -> Materialized_Column.Column + read self max_rows=Nothing = + self.to_table.read max_rows . at self.name ## UNSTABLE @@ -87,9 +86,7 @@ type Column ## We remove the index to avoid fetching index data that will not be used anyway when constructing the raw Vector. without_ix = self.to_table.set_index [] - df = without_ix . to_dataframe - raw_column = df.at self.name - raw_column.to_vector + without_ix . read . at self.name . to_vector ## UNSTABLE diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL.enso index b25b6392ea..08dc910be4 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL.enso @@ -278,7 +278,9 @@ type Builder Arguments: - other: The code fragment to append to `self`. ++ : Builder -> Builder - ++ self other = Builder_Data (self.fragments ++ other.fragments) + ++ self other = case other of + Text -> if other == "" then self else Builder_Data (self.fragments ++ (code other).fragments) + _ -> Builder_Data (self.fragments ++ other.fragments) ## UNSTABLE @@ -298,10 +300,7 @@ type Builder Wraps the code fragment in parentheses. paren : Builder - paren self = - l = code "(" - r = code ")" - l ++ self ++ r + paren self = code "(" ++ self ++ ")" ## UNSTABLE diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Query.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Query.enso new file mode 100644 index 0000000000..90589680c8 --- /dev/null +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/SQL_Query.enso @@ -0,0 +1,8 @@ +from Standard.Base import Text + +type SQL_Query + ## Query a whole table or view. + Table_Name name:Text + + ## Raw SQL query statement. + Raw_SQL sql:Text diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 8940c8d08c..50e2bb8318 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -4,6 +4,7 @@ from Standard.Base.Error.Problem_Behavior import Report_Warning import Standard.Database.Internal.Helpers import Standard.Database.Internal.Aggregate_Helper import Standard.Database.Internal.IR +from Standard.Database.Data.SQL_Query import Raw_SQL from Standard.Database.Data.SQL import Statement, SQL_Type import Standard.Table.Data.Column as Materialized_Column @@ -49,7 +50,7 @@ type Table - format_terminal: whether ANSI-terminal formatting should be used display : Integer -> Boolean -> Text display self show_rows=10 format_terminal=False = - df = self.reset_index.to_dataframe max_rows=show_rows + df = self.reset_index.read max_rows=show_rows indices_count = self.context.meta_index.length all_rows_count = self.row_count display_dataframe df indices_count all_rows_count format_terminal @@ -401,7 +402,7 @@ type Table result as one could expect if the limit was applied before the filters. t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . limit 5 t2 = t1.where (t1.at 'A' > 5) - t2.to_dataframe + t2.read limit : Integer -> Table limit self max_rows = new_ctx = self.context.set_limit max_rows @@ -715,13 +716,13 @@ type Table self.updated_context_and_columns new_ctx new_columns ## Parsing values is not supported in database tables, the table has to be - materialized first with `to_dataframe`. + loaded into memory first with `read`. parse_values : Data_Formatter -> (Nothing | [Column_Type_Selection]) -> Problem_Behavior -> Table parse_values self value_formatter=Data_Formatter column_types=Nothing on_problems=Report_Warning = ## Avoid unused arguments warning. We cannot rename arguments to `_`, because we need to keep the API consistent with the in-memory table. _ = [value_formatter, column_types, on_problems] - msg = "Parsing values is not supported in database tables, the table has to be materialized first with `to_dataframe`." + msg = "Parsing values is not supported in database tables, the table has to be materialized first with `read`." Error.throw (Unsupported_Database_Operation_Error_Data msg) ## UNSTABLE @@ -746,7 +747,7 @@ type Table cols = self.internal_columns.map (c -> [c.name, make_count_expr c.expression]) query = IR.Select [[all_rows_column_name, rows_expr]]+cols self.context sql = self.connection.dialect.generate_sql query - table = self.connection.execute_query sql + table = self.connection.read_statement sql all_rows = table.at all_rows_column_name . at 0 kept_columns = self.internal_columns . filter c-> all_rows == table.at c.name . at 0 @@ -765,7 +766,7 @@ type Table new_ctx = IR.subquery_as_ctx setup.first query = IR.Select [[column_name, expr]] new_ctx sql = self.connection.dialect.generate_sql query - table = self.connection.execute_query sql + table = self.connection.read_statement sql table.at column_name . at 0 ## UNSTABLE @@ -775,8 +776,8 @@ type Table Arguments: - max_rows: specifies a maximum amount of rows to fetch; if not set, all available rows are fetched. - to_dataframe : (Integer | Nothing) -> Materialized_Table.Table - to_dataframe self max_rows=Nothing = + read : (Integer | Nothing) -> Materialized_Table.Table + read self max_rows=Nothing = case self.context.meta_index.length > 1 of True -> Error.throw <| Illegal_State_Error_Data "Multi-indexes are not implemented in the dataframes, if you want to materialize such a Table, remove the index first using `set_index`." False -> @@ -788,7 +789,7 @@ type Table False -> sql = preprocessed.to_sql expected_types = preprocessed.internal_columns.map .sql_type - table = self.connection.execute_query sql expected_types + table = self.connection.read_statement sql expected_types case self.context.meta_index.length == 1 of False -> table True -> @@ -834,7 +835,7 @@ type Table [column.name, IR.Operation "COUNT" [column.expression]] query = IR.Select new_columns new_ctx self.connection.dialect.generate_sql query - count_table = self.connection.execute_query count_query + count_table = self.connection.read_statement count_query counts = if cols.is_empty then [] else count_table.columns.map c-> c.at 0 types = cols.map c-> c.sql_type.name Materialized_Table.new [["Column", cols.map .name], ["Items Count", counts], ["SQL Type", types]] . set_index "Column" @@ -905,7 +906,7 @@ type Table insert self values = table_name = case self.context.from_spec of IR.From_Table name _ -> name - _ -> Error.throw <| Illegal_State_Error_Data "Inserting can only be performed on tables as returned by `access_table`, any further processing is not allowed." + _ -> Error.throw <| Illegal_State_Error_Data "Inserting can only be performed on tables as returned by `query`, any further processing is not allowed." # TODO [RW] before removing the PRIVATE tag, add a check that no bad stuff was done to the table as described above pairs = self.internal_columns.zip values col-> value-> [col.name, IR.Constant col.sql_type value] @@ -968,12 +969,12 @@ type Table example_to_csv = connection = Database.connect (SQLite (File.new "db.sqlite")) - table = connection.access_table "Table" + table = connection.query (Table_Name "Table") table.write (enso_project.data / "example_csv_output.csv") write : File|Text -> File_Format -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | IO_Error write self path format=Auto_Detect on_existing_file=Existing_File_Behavior.Backup match_columns=Match_Columns.By_Name on_problems=Report_Warning = # TODO This should ideally be done in a streaming manner, or at least respect the row limits. - self.to_dataframe.write path format on_existing_file match_columns on_problems + self.read.write path format on_existing_file match_columns on_problems type Integrity_Error @@ -1000,10 +1001,10 @@ type Integrity_Error - connection: The connection to a database. - table_name: The name of the table to get. - columns: The names of the columns to get. -# make_table : Connection -> Text -> Vector [Text, SQL_Type] -> Table -make_table : Connection -> Text -> Vector -> Table -make_table connection table_name columns = - ctx = IR.context_for_table table_name + - ctx: The context to use for the table. +# make_table : Connection -> Text -> Vector [Text, SQL_Type] -> IR.Context -> Table +make_table : Connection -> Text -> Vector -> IR.Context -> Table +make_table connection table_name columns ctx = cols = columns.map (p -> Internal_Column_Data p.first p.second (IR.Column table_name p.first)) Table_Data table_name connection cols ctx diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso index 16538ccd8c..cc47aa199a 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso @@ -46,8 +46,8 @@ make_binary_op name = arguments -> case arguments.length == 2 of True -> - op = code " "+name+" " - (arguments.at 0)++op++(arguments.at 1) . paren + op = " " + name + " " + ((arguments.at 0) ++ op ++ (arguments.at 1)).paren False -> Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation " + name) @@ -110,7 +110,7 @@ make_right_unary_op name = arguments -> case arguments.length == 1 of True -> - (arguments.at 0)++(code " "+name) . paren + (arguments.at 0)++(" " + name) . paren False -> Error.throw ("Invalid amount of arguments for operation " + name) @@ -181,8 +181,7 @@ base_dialect = generate_expression : Internal_Dialect -> IR.Expression -> Builder generate_expression dialect expr = case expr of IR.Column origin name -> - dot = code '.' - dialect.wrap_identifier origin ++ dot ++ dialect.wrap_identifier name + dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name IR.Constant sql_type value -> SQL.interpolation sql_type value IR.Operation kind arguments -> op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error kind) @@ -214,6 +213,8 @@ generate_from_part : Internal_Dialect -> From_Spec -> Builder generate_from_part dialect from_spec = case from_spec of IR.From_Table name as_name -> dialect.wrap_identifier name ++ alias dialect as_name + IR.From_Query raw_sql as_name -> + code raw_sql . paren ++ alias dialect as_name IR.Join kind left_spec right_spec on -> left = generate_from_part dialect left_spec right = generate_from_part dialect right_spec @@ -223,7 +224,7 @@ generate_from_part dialect from_spec = case from_spec of IR.Join_Right -> "RIGHT JOIN" IR.Join_Cross -> "CROSS JOIN" ons = SQL.join " AND " (on.map (generate_expression dialect)) . prefix_if_present " ON " - left ++ (code " "+prefix+" ") ++ right ++ ons + left ++ (" " + prefix + " ") ++ right ++ ons IR.Sub_Query columns context as_name -> sub = generate_query dialect (IR.Select columns context) sub.paren ++ alias dialect as_name @@ -238,15 +239,15 @@ generate_from_part dialect from_spec = case from_spec of generate_order : Internal_Dialect -> Order_Descriptor -> Builder generate_order dialect order_descriptor = order_suffix = case order_descriptor.direction of - Sort_Direction.Ascending -> code " ASC" - Sort_Direction.Descending -> code " DESC" + Sort_Direction.Ascending -> " ASC" + Sort_Direction.Descending -> " DESC" nulls_suffix = case order_descriptor.nulls_order of - Nothing -> SQL.empty - IR.Nulls_First -> code " NULLS FIRST" - IR.Nulls_Last -> code " NULLS LAST" + Nothing -> "" + IR.Nulls_First -> " NULLS FIRST" + IR.Nulls_Last -> " NULLS LAST" collation = case order_descriptor.collation of - Nothing -> SQL.empty - collation_name -> code ' COLLATE "'+collation_name+'"' + Nothing -> "" + collation_name -> ' COLLATE "' + collation_name + '"' base_expression = generate_expression dialect order_descriptor.expression base_expression ++ collation ++ order_suffix ++ nulls_suffix @@ -265,8 +266,8 @@ generate_select_context dialect ctx = where_part = (SQL.join " AND " (gen_exprs ctx.where_filters)) . prefix_if_present " WHERE " group_part = (SQL.join ", " (gen_exprs ctx.groups)) . prefix_if_present " GROUP BY " limit_part = case ctx.limit of - Nothing -> SQL.empty - Integer -> code " LIMIT "+ctx.limit.to_text + Nothing -> "" + Integer -> " LIMIT " + ctx.limit.to_text orders = ctx.orders.map (generate_order dialect) order_part = (SQL.join ", " orders) . prefix_if_present " ORDER BY " @@ -287,7 +288,7 @@ generate_insert_query dialect table_name pairs = values = SQL.join ", " <| pairs.map (.second >> generate_expression dialect) into = dialect.wrap_identifier table_name - code "INSERT INTO " ++ into ++ code " (" ++ names ++ code ") VALUES (" ++ values ++ code ")" + code "INSERT INTO " ++ into ++ " (" ++ names ++ ") VALUES (" ++ values ++ ")" ## PRIVATE @@ -301,9 +302,9 @@ generate_query dialect query = case query of IR.Select columns ctx -> gen_column pair = (generate_expression dialect pair.second) ++ alias dialect pair.first cols = SQL.join ", " (columns.map gen_column) - (code "SELECT ") ++ cols ++ generate_select_context dialect ctx + code "SELECT " ++ cols ++ generate_select_context dialect ctx IR.Select_All ctx -> - (code "SELECT * ") ++ generate_select_context dialect ctx + code "SELECT * " ++ generate_select_context dialect ctx IR.Insert table_name pairs -> generate_insert_query dialect table_name pairs _ -> Error.throw <| Unsupported_Database_Operation_Error "Unsupported query type." @@ -337,19 +338,19 @@ make_concat make_raw_concat_expr make_contains_expr has_quote args = separator = args.at 1 prefix = args.at 2 suffix = args.at 3 - append = code " || " + append = " || " possibly_quoted = case has_quote of True -> quote = args.at 4 - includes_separator = separator ++ code " != '' AND " ++ make_contains_expr expr separator + includes_separator = separator ++ " != '' AND " ++ make_contains_expr expr separator ## We use the assumption that `has_quote` is True iff `quote` is not empty. includes_quote = make_contains_expr expr quote - is_empty = expr ++ code " = ''" - needs_quoting = includes_separator.paren ++ code " OR " ++ includes_quote.paren ++ code " OR " ++ is_empty.paren - escaped = code "replace(" ++ expr ++ code ", " ++ quote ++ code ", " ++ quote ++ append ++ quote ++ code ")" + is_empty = expr ++ " = ''" + needs_quoting = includes_separator.paren ++ " OR " ++ includes_quote.paren ++ " OR " ++ is_empty.paren + escaped = code "replace(" ++ expr ++ ", " ++ quote ++ ", " ++ quote ++ append ++ quote ++ ")" quoted = quote ++ append ++ escaped ++ append ++ quote - code "CASE WHEN " ++ needs_quoting ++ code " THEN " ++ quoted ++ code " ELSE " ++ expr ++ code " END" + code "CASE WHEN " ++ needs_quoting ++ " THEN " ++ quoted ++ " ELSE " ++ expr ++ " END" False -> expr - transformed_expr = code "CASE WHEN " ++ expr ++ code " IS NULL THEN '' ELSE " ++ possibly_quoted.paren ++ code " END" + transformed_expr = code "CASE WHEN " ++ expr ++ " IS NULL THEN '' ELSE " ++ possibly_quoted.paren ++ " END" concatenated = make_raw_concat_expr transformed_expr separator prefix.paren ++ append ++ concatenated ++ append ++ suffix.paren diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso index b561530f11..91ed30f7df 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/JDBC_Connection.enso @@ -80,7 +80,7 @@ type JDBC_Connection Given a prepared statement, gets the column names and types for the result set. - fetch_columns : PreparedStatement -> Any + fetch_columns : Text | Statement -> Any fetch_columns self statement = self.with_prepared_statement statement stmt-> metadata = stmt.executeQuery.getMetaData @@ -183,9 +183,9 @@ create_table_statement name table temporary = column_types = table.columns.map col-> default_storage_type col.storage_type column_names = table.columns.map .name col_makers = column_names.zip column_types name-> typ-> - Base_Generator.wrap_in_quotes name ++ SQL.code " " ++ SQL.code typ.name - create_prefix = if temporary then "CREATE TEMPORARY TABLE " else "CREATE TABLE " - (SQL.code create_prefix ++ Base_Generator.wrap_in_quotes name ++ SQL.code " (" ++ (SQL.join ", " col_makers) ++ SQL.code ")").build + Base_Generator.wrap_in_quotes name ++ " " ++ typ.name + create_prefix = SQL.code <| if temporary then "CREATE TEMPORARY TABLE " else "CREATE TABLE " + (create_prefix ++ Base_Generator.wrap_in_quotes name ++ " (" ++ (SQL.join ", " col_makers) ++ ")").build ## PRIVATE Returns the default database type corresponding to an in-memory storage type. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso index ff4eb5d6f8..6348247f52 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso @@ -1,6 +1,7 @@ from Standard.Base import all from Standard.Database.Data.SQL import SQL_Type, Statement +import Standard.Database.Data.SQL_Query import Standard.Database.Internal.JDBC_Connection import Standard.Database.Data.Dialect @@ -31,16 +32,6 @@ type Postgres_Connection close : Nothing close self = self.connection.close - ## UNSTABLE - - Accesses a table within the database, returning an object that can be - used to create queries for that table. - - Arguments: - - name: name of the table to access - access_table : Text -> Database_Table - access_table self name = self.connection.access_table name - ## Returns the list of databases (or catalogs) for the connection. databases : [Text] databases self = @@ -94,19 +85,29 @@ type Postgres_Connection tables self name_like=Nothing database=self.database schema=self.schema types=Nothing all_fields=False = self.connection.tables name_like database schema types all_fields - ## ADVANCED - - Executes a raw query and returns the result as an in-memory Table. + ## Set up a query returning a Table object, which can be used to work with data within the database or load it into memory. Arguments: - - query: either raw SQL code as Text or an instance of - .Statement - representing the query to execute. - - expected_types: an optional array of expected types of each column; - meant only for internal use. - execute_query : Text | Statement -> Vector SQL_Type -> Materialized_Table - execute_query self query expected_types=Nothing = - self.connection.execute_query query expected_types + - query: name of the table or sql statement to query. + If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query. + - alias: optionally specify a friendly alias for the query. + query : Text | SQL_Query -> Text -> Database_Table + query self query alias="" = self.connection.query query alias + + ## Execute the query and load the results into memory as a Table. + + Arguments: + - query: name of the table or sql statement to query. + If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query. + - limit: the maximum number of rows to return. + read : Text | SQL_Query -> Integer | Nothing -> Table + read self query limit=Nothing = self.connection.read query limit + + ## PRIVATE + Internal read function for a statement with optional types. + read_statement : Statement -> (Nothing | Vector SQL_Type) -> Table + read_statement self statement expected_types=Nothing = + self.connection.read_statement statement expected_types ## ADVANCED @@ -121,18 +122,6 @@ type Postgres_Connection execute_update self query = self.connection.execute_update query - ## PRIVATE - - A helper function that fetches column names and sql types associated with - them for a table in the database. - - Arguments: - - table_name: The name of the table to fetch the column metadata for. - # fetch_columns : Text -> Vector [Text, SQL_Type] - fetch_columns : Text -> Vector Any - fetch_columns self table_name = - self.connection.fetch_columns table_name - ## PRIVATE UNSTABLE This is a prototype function used in our test suites. It may change. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso index 9bfe6944ea..9d561c2e10 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso @@ -99,42 +99,42 @@ resolve_target_sql_type aggregate = case aggregate of ## PRIVATE agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg-> - code "COUNT(CASE WHEN " ++ arg.paren ++ code " IS NULL THEN 1 END)" + code "COUNT(CASE WHEN " ++ arg.paren ++ " IS NULL THEN 1 END)" ## PRIVATE agg_count_empty = Base_Generator.lift_unary_op "COUNT_EMPTY" arg-> - code "COUNT(CASE WHEN (" ++ arg.paren ++ code " IS NULL) OR (" ++ arg.paren ++ code " = '') THEN 1 END)" + code "COUNT(CASE WHEN (" ++ arg.paren ++ " IS NULL) OR (" ++ arg.paren ++ " = '') THEN 1 END)" ## PRIVATE agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg-> - code "COUNT(CASE WHEN (" ++ arg.paren ++ code " IS NOT NULL) AND (" ++ arg.paren ++ code " != '') THEN 1 END)" + code "COUNT(CASE WHEN (" ++ arg.paren ++ " IS NOT NULL) AND (" ++ arg.paren ++ " != '') THEN 1 END)" ## PRIVATE agg_median = Base_Generator.lift_unary_op "MEDIAN" arg-> - median = code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ code ")" + median = code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ ")" ## TODO Technically, this check may not be necessary if the input column has type INTEGER, because it is impossible to represent a NaN in that type. However, currently the column type inference is not tested well-enough to rely on this, so leaving an uniform approach regardless of type. This could be revisited when further work on column types takes place. See issue: https://www.pivotaltracker.com/story/show/180854759 - has_nan = code "bool_or(" ++ arg ++ code " = double precision 'NaN')" - code "CASE WHEN " ++ has_nan ++ code " THEN 'NaN' ELSE " ++ median ++ code " END" + has_nan = code "bool_or(" ++ arg ++ " = double precision 'NaN')" + code "CASE WHEN " ++ has_nan ++ " THEN 'NaN' ELSE " ++ median ++ " END" ## PRIVATE agg_mode = Base_Generator.lift_unary_op "MODE" arg-> - code "mode() WITHIN GROUP (ORDER BY " ++ arg ++ code ")" + code "mode() WITHIN GROUP (ORDER BY " ++ arg ++ ")" agg_percentile = Base_Generator.lift_binary_op "PERCENTILE" p-> expr-> - percentile = code "percentile_cont(" ++ p ++ code ") WITHIN GROUP (ORDER BY " ++ expr ++ code ")" + percentile = code "percentile_cont(" ++ p ++ ") WITHIN GROUP (ORDER BY " ++ expr ++ ")" ## TODO Technically, this check may not be necessary if the input column has type INTEGER, because it is impossible to represent a NaN in that type. However, currently the column type inference is not tested well-enough to rely on this, so leaving an uniform approach regardless of type. This could be revisited when further work on column types takes place. See issue: https://www.pivotaltracker.com/story/show/180854759 - has_nan = code "bool_or(" ++ expr ++ code " = double precision 'NaN')" - code "CASE WHEN " ++ has_nan ++ code " THEN 'NaN' ELSE " ++ percentile ++ code " END" + has_nan = code "bool_or(" ++ expr ++ " = double precision 'NaN')" + code "CASE WHEN " ++ has_nan ++ " THEN 'NaN' ELSE " ++ percentile ++ " END" ## PRIVATE These are written in a not most-efficient way, but a way that makes them @@ -152,31 +152,31 @@ make_first_aggregator reverse ignore_null args = result_expr = args.head order_bys = args.tail - filter_clause = if ignore_null.not then code "" else - code " FILTER (WHERE " ++ result_expr.paren ++ code " IS NOT NULL)" + filter_clause = if ignore_null.not then "" else + code " FILTER (WHERE " ++ result_expr.paren ++ " IS NOT NULL)" order_clause = code " ORDER BY " ++ SQL.join "," order_bys index_expr = case reverse of - True -> if ignore_null.not then code "COUNT(*)" else - code "COUNT(" ++ result_expr ++ code ")" - False -> code "1" + True -> if ignore_null.not then "COUNT(*)" else + code "COUNT(" ++ result_expr ++ ")" + False -> "1" - code "(array_agg(" ++ result_expr.paren ++ order_clause ++ code ")" ++ filter_clause ++ code ")[" ++ index_expr ++ code "]" + code "(array_agg(" ++ result_expr.paren ++ order_clause ++ ")" ++ filter_clause ++ ")[" ++ index_expr ++ "]" agg_shortest = Base_Generator.lift_unary_op "SHORTEST" arg-> order_clause = - code " ORDER BY char_length(" ++ arg ++ code ") ASC NULLS LAST" - code "(array_agg(" ++ arg.paren ++ order_clause ++ code "))[1]" + code " ORDER BY char_length(" ++ arg ++ ") ASC NULLS LAST" + code "(array_agg(" ++ arg.paren ++ order_clause ++ "))[1]" agg_longest = Base_Generator.lift_unary_op "LONGEST" arg-> order_clause = - code " ORDER BY char_length(" ++ arg ++ code ") DESC NULLS LAST" - code "(array_agg(" ++ arg.paren ++ order_clause ++ code "))[1]" + code " ORDER BY char_length(" ++ arg ++ ") DESC NULLS LAST" + code "(array_agg(" ++ arg.paren ++ order_clause ++ "))[1]" ## PRIVATE concat_ops = make_raw_concat_expr expr separator = - code "string_agg(" ++ expr ++ code ", " ++ separator ++ code ")" + code "string_agg(" ++ expr ++ ", " ++ separator ++ ")" concat = Base_Generator.make_concat make_raw_concat_expr make_contains_expr [["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]] @@ -186,34 +186,34 @@ agg_count_distinct args = if args.is_empty then (Error.throw (Illegal_Argument_E case args.length == 1 of True -> ## A single null value will be skipped. - code "COUNT(DISTINCT " ++ args.first ++ code ")" + code "COUNT(DISTINCT " ++ args.first ++ ")" False -> ## A tuple of nulls is not a null, so it will not be skipped - but we want to ignore all-null columns. So we manually filter them out. - count = code "COUNT(DISTINCT (" ++ SQL.join ", " args ++ code "))" - are_nulls = args.map arg-> arg.paren ++ code " IS NULL" - all_nulls_filter = code " FILTER (WHERE NOT (" ++ SQL.join " AND " are_nulls ++ code "))" + count = code "COUNT(DISTINCT (" ++ SQL.join ", " args ++ "))" + are_nulls = args.map arg-> arg.paren ++ " IS NULL" + all_nulls_filter = code " FILTER (WHERE NOT (" ++ SQL.join " AND " are_nulls ++ "))" (count ++ all_nulls_filter).paren ## PRIVATE agg_count_distinct_include_null args = ## If we always count as tuples, then even null fields are counted. - code "COUNT(DISTINCT (" ++ SQL.join ", " args ++ code ", 0))" + code "COUNT(DISTINCT (" ++ SQL.join ", " args ++ ", 0))" ## PRIVATE starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub-> - res = str ++ (code " LIKE CONCAT(") ++ sub ++ (code ", '%')") + res = str ++ " LIKE CONCAT(" ++ sub ++ ", '%')" res.paren ## PRIVATE ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub-> - res = str ++ (code " LIKE CONCAT('%', ") ++ sub ++ (code ")") + res = str ++ " LIKE CONCAT('%', " ++ sub ++ ")" res.paren ## PRIVATE make_contains_expr expr substring = - code "position(" ++ substring ++ code " in " ++ expr ++ code ") > 0" + code "position(" ++ substring ++ " in " ++ expr ++ ") > 0" ## PRIVATE contains = Base_Generator.lift_binary_op "contains" make_contains_expr diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso index 09b6e40bcf..738cbbfb58 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso @@ -1,6 +1,7 @@ from Standard.Base import all from Standard.Database.Data.SQL import SQL_Type, Statement +import Standard.Database.Data.SQL_Query import Standard.Database.Internal.JDBC_Connection import Standard.Database.Data.Dialect @@ -27,16 +28,6 @@ type SQLite_Connection close : Nothing close self = self.connection.close - ## UNSTABLE - - Accesses a table within the database, returning an object that can be - used to create queries for that table. - - Arguments: - - name: name of the table to access - access_table : Text -> Database_Table - access_table self name = self.connection.access_table name - ## Returns the list of databases (or catalogs) for the connection. databases : [Text] databases self = [Nothing] @@ -87,19 +78,29 @@ type SQLite_Connection tables self name_like=Nothing database=self.database schema=self.schema types=Nothing all_fields=False = self.connection.tables name_like database schema types all_fields - ## ADVANCED - - Executes a raw query and returns the result as an in-memory Table. + ## Set up a query returning a Table object, which can be used to work with data within the database or load it into memory. Arguments: - - query: either raw SQL code as Text or an instance of - .Statement - representing the query to execute. - - expected_types: an optional array of expected types of each column; - meant only for internal use. - execute_query : Text | Statement -> Vector SQL_Type -> Materialized_Table - execute_query self query expected_types=Nothing = - self.connection.execute_query query expected_types + - query: name of the table or sql statement to query. + If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query. + - alias: optionally specify a friendly alias for the query. + query : Text | SQL_Query -> Text -> Database_Table + query self query alias="" = self.connection.query query alias + + ## Execute the query and load the results into memory as a Table. + + Arguments: + - query: name of the table or sql statement to query. + If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query. + - limit: the maximum number of rows to return. + read : Text | SQL_Query -> Integer | Nothing -> Table + read self query limit=Nothing = self.connection.read query limit + + ## PRIVATE + Internal read function for a statement with optional types. + read_statement : Statement -> (Nothing | Vector SQL_Type) -> Table + read_statement self statement expected_types=Nothing = + self.connection.read_statement statement expected_types ## ADVANCED @@ -114,18 +115,6 @@ type SQLite_Connection execute_update self query = self.connection.execute_update query - ## PRIVATE - - A helper function that fetches column names and sql types associated with - them for a table in the database. - - Arguments: - - table_name: The name of the table to fetch the column metadata for. - # fetch_columns : Text -> Vector [Text, SQL_Type] - fetch_columns : Text -> Vector Any - fetch_columns self table_name = - self.connection.fetch_columns table_name - ## PRIVATE UNSTABLE This is a prototype function used in our test suites. It may change. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso index 49bc91ae9c..b86f954bf9 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso @@ -107,31 +107,31 @@ unsupported name = ## PRIVATE agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg-> - code "COALESCE(SUM(" ++ arg.paren ++ code " IS NULL), 0)" + code "COALESCE(SUM(" ++ arg.paren ++ " IS NULL), 0)" ## PRIVATE agg_count_empty = Base_Generator.lift_unary_op "COUNT_EMPTY" arg-> - code "COALESCE(SUM((" ++ arg.paren ++ code " IS NULL) OR (" ++ arg.paren ++ code " == '')), 0)" + code "COALESCE(SUM((" ++ arg.paren ++ " IS NULL) OR (" ++ arg.paren ++ " == '')), 0)" ## PRIVATE agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg-> - code "COALESCE(SUM((" ++ arg.paren ++ code " IS NOT NULL) AND (" ++ arg.paren ++ code " != '')), 0)" + code "COALESCE(SUM((" ++ arg.paren ++ " IS NOT NULL) AND (" ++ arg.paren ++ " != '')), 0)" ## PRIVATE agg_stddev_pop = Base_Generator.lift_unary_op "STDDEV_POP" arg-> - sum_of_squares = code "SUM(" ++ arg.paren ++ code "*" ++ arg.paren ++ code ")" - square_of_sums = code "SUM(" ++ arg ++ code ") * SUM(" ++ arg ++ code ")" - n = code "CAST(COUNT(" ++ arg ++ code ") AS REAL)" - var = code "(" ++ sum_of_squares ++ code " - (" ++ square_of_sums ++ code " / " ++ n ++ code ")) / " ++ n - code "SQRT(" ++ var ++ code ")" + sum_of_squares = code "SUM(" ++ arg.paren ++ "*" ++ arg.paren ++ ")" + square_of_sums = code "SUM(" ++ arg ++ ") * SUM(" ++ arg ++ ")" + n = code "CAST(COUNT(" ++ arg ++ ") AS REAL)" + var = code "(" ++ sum_of_squares ++ " - (" ++ square_of_sums ++ " / " ++ n ++ ")) / " ++ n + code "SQRT(" ++ var ++ ")" ## PRIVATE agg_stddev_samp = Base_Generator.lift_unary_op "STDDEV_SAMP" arg-> - sum_of_squares = code "SUM(" ++ arg.paren ++ code "*" ++ arg.paren ++ code ")" - square_of_sums = code "SUM(" ++ arg ++ code ") * SUM(" ++ arg ++ code ")" - n = code "CAST(COUNT(" ++ arg ++ code ") AS REAL)" - var = code "(" ++ sum_of_squares ++ code " - (" ++ square_of_sums ++ code " / " ++ n ++ code ")) / (" ++ n ++ code " - 1)" - code "SQRT(" ++ var ++ code ")" + sum_of_squares = code "SUM(" ++ arg.paren ++ "*" ++ arg.paren ++ ")" + square_of_sums = code "SUM(" ++ arg ++ ") * SUM(" ++ arg ++ ")" + n = code "CAST(COUNT(" ++ arg ++ ") AS REAL)" + var = code "(" ++ sum_of_squares ++ " - (" ++ square_of_sums ++ " / " ++ n ++ ")) / (" ++ n ++ " - 1)" + code "SQRT(" ++ var ++ ")" ## PRIVATE This is a prototype that doesn't work correctly. Left for reference for @@ -152,45 +152,45 @@ window_aggregate window_type ignore_null args = order_exprs = args.tail filter_clause = if ignore_null.not then code "" else - code " FILTER (WHERE " ++ result_expr.paren ++ code " IS NOT NULL)" + code " FILTER (WHERE " ++ result_expr.paren ++ " IS NOT NULL)" - code window_type+"(" ++ result_expr ++ code ")" ++ filter_clause ++ code " OVER (ORDER BY " ++ SQL.join "," order_exprs ++ code ")" + code window_type+"(" ++ result_expr ++ ")" ++ filter_clause ++ " OVER (ORDER BY " ++ SQL.join "," order_exprs ++ ")" ## PRIVATE concat_ops = make_raw_concat_expr expr separator = - code "group_concat(" ++ expr ++ code ", " ++ separator ++ code ")" + code "group_concat(" ++ expr ++ ", " ++ separator ++ ")" concat = Base_Generator.make_concat make_raw_concat_expr make_contains_expr [["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]] ## PRIVATE agg_count_distinct args = case args.length == 1 of - True -> code "COUNT(DISTINCT (" ++ args.first ++ code "))" + True -> code "COUNT(DISTINCT (" ++ args.first ++ "))" False -> Error.throw (Illegal_Argument_Error_Data "COUNT_DISTINCT supports only single arguments in SQLite.") ## PRIVATE agg_count_distinct_include_null args = case args.length == 1 of True -> arg = args.first - count = code "COUNT(DISTINCT " ++ arg ++ code ")" - all_nulls_case = code "CASE WHEN COUNT(CASE WHEN " ++ arg ++ code "IS NULL THEN 1 END) > 0 THEN 1 ELSE 0 END" - count ++ code " + " ++ all_nulls_case + count = code "COUNT(DISTINCT " ++ arg ++ ")" + all_nulls_case = code "CASE WHEN COUNT(CASE WHEN " ++ arg ++ "IS NULL THEN 1 END) > 0 THEN 1 ELSE 0 END" + count ++ " + " ++ all_nulls_case False -> Error.throw (Illegal_Argument_Error_Data "COUNT_DISTINCT supports only single arguments in SQLite.") ## PRIVATE starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub-> - res = str ++ (code " LIKE (") ++ sub ++ (code " || '%')") + res = str ++ " LIKE (" ++ sub ++ " || '%')" res.paren ## PRIVATE ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub-> - res = str ++ (code " LIKE ('%' || ") ++ sub ++ (code ")") + res = str ++ " LIKE ('%' || " ++ sub ++ ")" res.paren ## PRIVATE make_contains_expr expr substring = - code "instr(" ++ expr ++ code ", " ++ substring ++ code ") > 0" + code "instr(" ++ expr ++ ", " ++ substring ++ ") > 0" ## PRIVATE contains = Base_Generator.lift_binary_op "contains" make_contains_expr diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso index be76efe1a4..20201d0d55 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso @@ -1,5 +1,6 @@ import Standard.Database.Data.Table import Standard.Database.Data.Column +import Standard.Database.Data.SQL_Query import Standard.Database.Connection.Database import Standard.Database.Connection.Credentials @@ -24,3 +25,4 @@ from Standard.Database.Connection.Database export connect from Standard.Database.Connection.Postgres_Options export Postgres_Options, Postgres from Standard.Database.Connection.SQLite_Options export SQLite_Options, SQLite, In_Memory from Standard.Database.Connection.Redshift_Options export Redshift_Options, Redshift, AWS_Profile, AWS_Key +from Standard.Database.Data.SQL_Query export SQL_Query, Table_Name, Raw_SQL diff --git a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso index 3547c75c3e..34b58a3ce7 100644 --- a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso +++ b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso @@ -30,7 +30,7 @@ prepare_visualization x max_rows=1000 = Helpers.recover_errors <| case x of Database_Table.Table_Data _ _ _ _ -> # Materialize a table with indices as normal columns (because dataframe does not support multi-indexing). - df = x.reset_index.to_dataframe max_rows + df = x.reset_index.read max_rows # Then split into actual columns and indices. vis_df = df.select_columns (By_Name (x.columns.map .name)) indices = df.select_columns (By_Name (x.indices.map .name)) . columns diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 3c2c9a6365..38a22908dd 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -26,7 +26,7 @@ spec = table3 = ["T3", [["A", int], ["E", bool], ["F", int]]] tables = Map.from_vector [table1, table2, table3] Fake_Test_Connection.make Dialect.sqlite tables - t1 = test_connection.access_table "T1" + t1 = test_connection.query (Table_Name "T1") Test.group "[Codegen] JSON serialization" <| Test.specify "should serialize Tables and Columns to their SQL representation" <| q1 = t1.where (t1.at "A" == 42) . to_json @@ -100,8 +100,8 @@ spec = c2.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" WHERE ("T1"."A" = "T1"."C")', []] Test.group "[Codegen] Joining Tables" <| - t2 = test_connection.access_table "T2" - t3 = test_connection.access_table "T3" + t2 = test_connection.query (Table_Name "T2") + t3 = test_connection.query (Table_Name "T3") Test.specify "should allow joining tables index-on-index" <| r1 = t1.set_index 'A' . join (t2.set_index 'D') r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B", "T1"."C" AS "C", "T2"."E" AS "E", "T2"."F" AS "F" FROM (SELECT "T1"."B" AS "B", "T1"."C" AS "C", "T1"."A" AS "A", "T1"."A" AS "A_1" FROM "T1" AS "T1") AS "T1" LEFT JOIN (SELECT "T2"."E" AS "E", "T2"."F" AS "F", "T2"."D" AS "D" FROM "T2" AS "T2") AS "T2" ON ("T1"."A_1" = "T2"."D")', []] @@ -124,8 +124,8 @@ spec = table2 = ["T2", [["X", int], ["A", int], ["B", int]]] tables = Map.from_vector [table1, table2] Fake_Test_Connection.make Dialect.sqlite tables - t1 = connection.access_table "T1" - t2 = connection.access_table "T2" + t1 = connection.query (Table_Name "T1") + t2 = connection.query (Table_Name "T2") (t1.set_index "X").join (t2.set_index "X") . should_fail_with Illegal_State_Error_Data Test.specify "should ensure that name suffixes are distinct" <| diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index a181332e60..ca3dc954d0 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -26,8 +26,8 @@ spec prefix connection pending=Nothing = t1 = upload "T1" (Materialized_Table.new [["a", [1, 4]], ["b", [2, 5]], ["c", [3, 6]]]) Test.group prefix+"Basic Table Access" pending=pending <| Test.specify "should allow to materialize tables and columns into local memory" <| - df = t1.to_dataframe - a = t1.at 'a' . to_dataframe + df = t1.read + a = t1.at 'a' . read df.at 'a' . to_vector . should_equal [1, 4] a.to_vector . should_equal [1, 4] Test.specify "should allow to materialize columns directly into a Vector" <| @@ -35,7 +35,7 @@ spec prefix connection pending=Nothing = v . should_equal [1, 4] Test.specify "should preserve indexes when materializing tables" <| # TODO add multi indexes when implemented - df = t1.set_index 'a' . to_dataframe + df = t1.set_index 'a' . read df.at 'b' . to_vector . should_equal [2, 5] df.columns.map .name . should_equal ['b', 'c'] ix = df.index @@ -44,7 +44,7 @@ spec prefix connection pending=Nothing = Test.specify "should preserve indexes when materializing columns" <| # TODO add multi indexes when implemented b = t1.set_index 'a' . at 'b' - col = b . to_dataframe + col = b . read col.to_vector . should_equal [2, 5] ix = col.index @@ -56,14 +56,13 @@ spec prefix connection pending=Nothing = ix2.to_vector . should_equal [1, 4] Test.specify "should work correctly when there are no columns" <| empty = t1.select_columns (By_Name []) - empty.to_dataframe.column_count . should_equal 0 - empty.to_dataframe.row_count . should_equal empty.row_count + empty.read.column_count . should_equal 0 + empty.read.row_count . should_equal empty.row_count Test.specify "should handle bigger result sets" <| n = 1000 original = Materialized_Table.new [["a", Vector.new n ix->ix], ["b", Vector.new n ix-> ix * 3.1415926], ["c", Vector.new n ix-> ix.to_text]] table = upload "Big" original - materialized = table.to_dataframe - materialized.row_count . should_equal n + table.read.row_count . should_equal n Test.group prefix+"Mapping Operations" pending=pending <| t2 = upload "T2" <| Materialized_Table.new [["x", [1, 4, 5, Nothing]], ["y", [2, 3, 5, Nothing]], ["b", [False, False, True, Nothing]]] @@ -117,7 +116,7 @@ spec prefix connection pending=Nothing = Test.group prefix+"Masking Tables" pending=pending <| Test.specify "should allow to select rows from a table or column based on an expression" <| t2 = t1.where (t1.at "a" == 1) - df = t2.to_dataframe + df = t2.read df.at "a" . to_vector . should_equal [1] df.at "b" . to_vector . should_equal [2] df.at "c" . to_vector . should_equal [3] @@ -134,24 +133,24 @@ spec prefix connection pending=Nothing = the Dataframes library, so it is independent of the library under testing here. Test.specify "should allow joining tables index-on-index" <| - r_1 = a.set_index 'x' . join (b.set_index 'w') . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z']) + r_1 = a.set_index 'x' . join (b.set_index 'w') . read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z']) r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam'] r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo'] - r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w']) + r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w']) r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6] r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3] Test.specify "should allow joining tables column-on-index" <| - r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z']) + r_1 = a.join (b.set_index 'w') on='x' . read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z']) r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam'] r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo'] - r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w']) + r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w']) r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6] r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3] Test.specify "should allow self-joins and append suffixes to disambiguate column names" <| - r_1 = a.join (a.set_index 'x') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x']) + r_1 = a.join (a.set_index 'x') on='x' . read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x']) r_1.columns.map .name . should_equal ['x', 'y_left', 'y_right'] r_1.at 'x' . to_vector . should_equal [0, 1, 3, 6, 7] expected_y = ['foo', 'bar', 'spam', 'eggs', 'baz'] @@ -169,7 +168,7 @@ spec prefix connection pending=Nothing = tb_2 = tb.set_index "id" res = (tc.join ta_2 on="id_a") . join tb_2 on="id_b" left_suffix="_a" right_suffix="_b" sel = res.select_columns (By_Name ["name_a", "name_b"]) - df = sel.to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "name_a"]) + df = sel.read . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "name_a"]) df . at "name_a" . to_vector . should_equal ["Foo", "Hmm"] df . at "name_b" . to_vector . should_equal ["Bar", "Hmm"] @@ -193,7 +192,7 @@ spec prefix connection pending=Nothing = col.drop_missing.to_vector . should_equal [0, 1, 42] Test.specify "drop_missing_rows should drop rows that contain at least one missing column in a Table" <| - d = t4.drop_missing_rows.to_dataframe + d = t4.drop_missing_rows.read d.at 'a' . to_vector . should_equal [0] d.at 'b' . to_vector . should_equal [True] d.at 'c' . to_vector . should_equal [""] @@ -208,7 +207,7 @@ spec prefix connection pending=Nothing = empty = t4.drop_missing_columns empty.column_count . should_equal 0 - empty.to_dataframe.column_count . should_equal 0 + empty.read.column_count . should_equal 0 Test.group prefix+"Column-wide statistics" pending=pending <| Test.specify 'should allow computing basic column-wide stats' <| @@ -289,7 +288,7 @@ spec prefix connection pending=Nothing = col = t.index+10 vec = [11, 12, 13] col.to_vector . should_equal vec - df_col = col.to_dataframe + df_col = col.read df_col.to_vector . should_equal vec df_col.index.to_vector . should_equal [1, 2, 3] @@ -327,13 +326,13 @@ spec prefix connection pending=Nothing = returned in lower case. aggregates = [Count "count", Count_Not_Nothing "price" "count not nothing price", Count_Nothing "price" "count nothing price"] - t1 = determinize_by "name" (t.aggregate ([Group_By "name"] + aggregates) . to_dataframe) + t1 = determinize_by "name" (t.aggregate ([Group_By "name"] + aggregates) . read) t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"] t1.at "count" . to_vector . should_equal [2, 1, 5, 1, 7] t1.at "count not nothing price" . to_vector . should_equal [2, 1, 3, 0, 5] t1.at "count nothing price" . to_vector . should_equal [0, 0, 2, 1, 2] - t2 = t.aggregate aggregates . to_dataframe + t2 = t.aggregate aggregates . read t2.at "count" . to_vector . should_equal [16] t2.at "count not nothing price" . to_vector . should_equal [11] t2.at "count nothing price" . to_vector . should_equal [5] @@ -344,13 +343,13 @@ spec prefix connection pending=Nothing = aggregates = [Sum "price" "sum price", Sum "quantity" "sum quantity", Average "price" "avg price"] ## TODO can check the datatypes - t1 = determinize_by "name" (t.aggregate ([Group_By "name" Nothing] + aggregates) . to_dataframe) + t1 = determinize_by "name" (t.aggregate ([Group_By "name" Nothing] + aggregates) . read) t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"] t1.at "sum price" . to_vector . should_equal [100.5, 6.7, 1, Nothing, 2] t1.at "sum quantity" . to_vector . should_equal [80, 40, 120, 70, 2] t1.at "avg price" . to_vector . should_equal [50.25, 6.7, (1/3), Nothing, (2/5)] - t2 = t.aggregate aggregates . to_dataframe + t2 = t.aggregate aggregates . read t2.at "sum price" . to_vector . should_equal [110.2] t2.at "sum quantity" . to_vector . should_equal [312] t2.at "avg price" . to_vector . should_equal [(110.2 / 11)] diff --git a/test/Table_Tests/src/Database/Helpers/Fake_Test_Connection.enso b/test/Table_Tests/src/Database/Helpers/Fake_Test_Connection.enso index c4133f83d6..95247444af 100644 --- a/test/Table_Tests/src/Database/Helpers/Fake_Test_Connection.enso +++ b/test/Table_Tests/src/Database/Helpers/Fake_Test_Connection.enso @@ -1,33 +1,45 @@ from Standard.Base import all +from Standard.Database import SQL_Query, Raw_SQL, Table_Name import Standard.Table.Data.Table as Materialized_Table import Standard.Database.Data.Table as Database_Table from Standard.Database.Data.SQL import Statement, SQL_Type +import Standard.Database.Internal.IR type Fake_Test_Connection # type Fake_Test_Connection_Data (tables : Map Text (Vector [Text, SQL_Type])) (dialect : Text) Fake_Test_Connection_Data tables dialect - ## PRIVATE - access_table : Text -> Database_Table - access_table self name = - columns = self.tables.get name - Database_Table.make_table self name columns + ## Set up a query returning a Table object, which can be used to work with data within the database or load it into memory. + + Arguments: + - query: name of the table or sql statement to query. + If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query. + - alias: optionally specify a friendly alias for the query. + query : Text | SQL_Query -> Text -> Database_Table + query self query alias="" = case query of + Text -> self.query (Table_Name query) alias + Raw_SQL _ -> + Error.throw (Illegal_Argument_Error "Cannot query a fake connection with raw SQL") + Table_Name name -> + columns = self.tables.get name + Database_Table.make_table self name columns (IR.context_for_table name) + + ## Execute the query and load the results into memory as a Table. + + Arguments: + - query: name of the table or sql statement to query. + If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query. + - alias: optionally specify a friendly alias for the query. + - limit: the maximum number of rows to return (default 1000). + read : Text | SQL_Query -> Text -> Integer | Nothing -> Table + read self _ _="" _=Nothing = + Error.throw "Materialization not supported on fake connection." ## PRIVATE close : Nothing close self = Nothing - ## PRIVATE - explain_query_plan : Statement -> Text - explain_query_plan self _ = - Error.throw "Materialization not supported on fake connection." - - ## PRIVATE - execute_query : Text | Statement -> Materialized_Table - execute_query self _ = - Error.throw "Materialization not supported on fake connection." - ## PRIVATE execute_update : Text | Statement -> Integer execute_update self _ = diff --git a/test/Table_Tests/src/Database/Postgres_Spec.enso b/test/Table_Tests/src/Database/Postgres_Spec.enso index ab81f27952..39188fcd1c 100644 --- a/test/Table_Tests/src/Database/Postgres_Spec.enso +++ b/test/Table_Tests/src/Database/Postgres_Spec.enso @@ -39,14 +39,14 @@ postgres_specific_spec connection db_name pending = Test.specify "should allow changing schema" <| new_connection = connection.set_schema "information_schema" - new_schema = new_connection.execute_query "SELECT current_schema()" . at 0 . to_vector . first + new_schema = new_connection.read (Raw_SQL "SELECT current_schema()") . at 0 . to_vector . first new_schema . should_equal "information_schema" databases = connection.databases.filter d->((d!=db_name) && (d!='rdsadmin')) pending_database = if databases.length != 0 then Nothing else "Cannot tests changing database unless two databases defined." Test.specify "should allow changing database" pending=pending_database <| new_connection = connection.set_database databases.first - new_database = new_connection.execute_query "SELECT current_database()" . at 0 . to_vector . first + new_database = new_connection.read (Raw_SQL "SELECT current_database()") . at 0 . to_vector . first new_database . should_equal databases.first Test.group "[PostgreSQL] Tables and Table Types" <| @@ -91,7 +91,7 @@ postgres_specific_spec connection db_name pending = Test.group "[PostgreSQL] Info" pending=pending <| tinfo = Name_Generator.random_name "Tinfo" connection.execute_update 'CREATE TEMPORARY TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL, "doubles" DOUBLE PRECISION)' - t = connection.access_table tinfo + t = connection.query (Table_Name tinfo) t.insert ["a", Nothing, False, 1.2, 0.000000000001] t.insert ["abc", Nothing, Nothing, 1.3, Nothing] t.insert ["def", 42, True, 1.4, 10] @@ -118,7 +118,7 @@ postgres_specific_spec connection db_name pending = Test.group "[PostgreSQL] Table.aggregate should correctly infer result types" pending=pending <| name = Name_Generator.random_name "Ttypes" connection.execute_update 'CREATE TEMPORARY TABLE "'+name+'" ("txt" VARCHAR, "i1" SMALLINT, "i2" INT, "i3" BIGINT, "i4" NUMERIC, "r1" REAL, "r2" DOUBLE PRECISION, "bools" BOOLEAN)' - t = connection.access_table name + t = connection.query (Table_Name name) Test.specify "Concatenate, Shortest and Longest" <| r = t.aggregate [Concatenate "txt", Shortest "txt", Longest "txt"] r.columns.at 0 . sql_type . should_equal SQL_Type.text @@ -169,7 +169,7 @@ run_tests connection db_name pending=Nothing = table_names.each name-> sql = 'DROP TABLE "' + name + '"' Panic.rethrow <| connection.execute_update sql - materialize = .to_dataframe + materialize = .read Common_Spec.spec prefix connection pending=pending postgres_specific_spec connection db_name pending=pending diff --git a/test/Table_Tests/src/Database/Redshift_Spec.enso b/test/Table_Tests/src/Database/Redshift_Spec.enso index 1c8cef955a..607a2fa440 100644 --- a/test/Table_Tests/src/Database/Redshift_Spec.enso +++ b/test/Table_Tests/src/Database/Redshift_Spec.enso @@ -17,7 +17,7 @@ redshift_specific_spec connection pending = Test.group "[Redshift] Info" pending=pending <| tinfo = Name_Generator.random_name "Tinfo" connection.execute_update 'CREATE TEMPORARY TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)' - t = connection.access_table tinfo + t = connection.query (Table_Name tinfo) t.insert ["a", Nothing, False, 1.2] t.insert ["abc", Nothing, Nothing, 1.3] t.insert ["def", 42, True, 1.4] @@ -51,7 +51,7 @@ run_tests connection pending=Nothing = table_names.each name-> sql = 'DROP TABLE "' + name + '"' Panic.rethrow <| connection.execute_update sql - materialize = .to_dataframe + materialize = .read Common_Spec.spec prefix connection pending=pending redshift_specific_spec connection pending=pending diff --git a/test/Table_Tests/src/Database/SQLite_Spec.enso b/test/Table_Tests/src/Database/SQLite_Spec.enso index b0563864fc..e8c4e44459 100644 --- a/test/Table_Tests/src/Database/SQLite_Spec.enso +++ b/test/Table_Tests/src/Database/SQLite_Spec.enso @@ -67,17 +67,17 @@ sqlite_specific_spec connection = Test.group "[SQLite] Error Handling" <| Test.specify "should wrap errors" <| - connection.execute_query "foobar" . should_fail_with SQL_Error_Data + connection.read (Raw_SQL "foobar") . should_fail_with SQL_Error_Data connection.execute_update "foobar" . should_fail_with SQL_Error_Data - action = connection.execute_query "SELECT A FROM undefined_table" + action = connection.read (Raw_SQL "SELECT A FROM undefined_table") action . should_fail_with SQL_Error_Data action.catch.to_text . should_equal "There was an SQL error: '[SQLITE_ERROR] SQL error or missing database (no such table: undefined_table)'. [Query was: SELECT A FROM undefined_table]" Test.group "[SQLite] Metadata" <| tinfo = Name_Generator.random_name "Tinfo" connection.execute_update 'CREATE TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)' - t = connection.access_table tinfo + t = connection.query (Table_Name tinfo) t.insert ["a", Nothing, False, 1.2] t.insert ["abc", Nothing, Nothing, 1.3] t.insert ["def", 42, True, 1.4] @@ -106,7 +106,7 @@ sqlite_spec connection prefix = in_mem_table = Materialized_Table.new columns connection.upload_table name in_mem_table - materialize = .to_dataframe + materialize = .read Common_Spec.spec prefix connection sqlite_specific_spec connection diff --git a/test/Visualization_Tests/src/SQL_Spec.enso b/test/Visualization_Tests/src/SQL_Spec.enso index 86e7dc9f5d..46612325cc 100644 --- a/test/Visualization_Tests/src/SQL_Spec.enso +++ b/test/Visualization_Tests/src/SQL_Spec.enso @@ -9,7 +9,7 @@ import Standard.Test visualization_spec connection = connection.execute_update 'CREATE TABLE "T" ("A" VARCHAR, "B" INTEGER, "C" INTEGER)' - t = connection.access_table "T" + t = connection.query (Table_Name "T") Test.group "SQL Visualization" <| Test.specify "should provide type metadata for interpolations" <| q = t.where ((t.at "B" == 2) && (t.at "A" == True)) . at "C"