From 2ce156738462ae3798161d2cef8adeb3ef728471 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 6 Nov 2023 17:41:47 +0100 Subject: [PATCH] Limit `max_rows` that are downloaded in `Table.read` by default, and warn if more rows are available (#8159) - Sets the default limit for `Table.read` in Database to be max 1000 rows. - The limit for in-memory compatible API still defaults to `Nothing`. - Adds a warning if there are more rows than limit. - Enables a few unrelated asserts. --- .../0.0.0-dev/src/Data/Time/Date_Range.enso | 5 +- .../0.0.0-dev/src/Connection/Connection.enso | 14 ++- .../Database/0.0.0-dev/src/Data/Column.enso | 15 +-- .../Database/0.0.0-dev/src/Data/Table.enso | 43 ++++--- .../Internal/Common/Lookup_Query_Helper.enso | 4 +- .../Postgres/Postgres_Connection.enso | 13 ++- .../Internal/SQLite/SQLite_Connection.enso | 13 ++- .../0.0.0-dev/src/Internal/Upload_Table.enso | 9 +- .../Table/0.0.0-dev/src/Data/Column.enso | 13 ++- .../Table/0.0.0-dev/src/Data/Table.enso | 33 +++--- .../Standard/Table/0.0.0-dev/src/Errors.enso | 20 ++++ .../0.0.0-dev/src/Excel/Excel_Workbook.enso | 5 +- .../src/Internal/Aggregate_Column_Helper.enso | 3 +- .../0.0.0-dev/src/Table/Visualization.enso | 2 +- .../Common_Table_Operations/Core_Spec.enso | 110 ++++++++++++++++++ .../src/Database/Common/Common_Spec.enso | 2 +- test/Tests/src/Data/Vector_Spec.enso | 2 - 17 files changed, 235 insertions(+), 71 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Range.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Range.enso index 1e95212068..d4310ea0da 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Range.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Range.enso @@ -17,6 +17,7 @@ import project.Nothing.Nothing from project.Data.Boolean import Boolean, False, True from project.Data.Filter_Condition import unify_condition_or_predicate, unify_condition_predicate_or_element from project.Data.Range.Extensions import all +from project.Runtime import assert polyglot java import org.enso.base.Time_Utils @@ -519,7 +520,7 @@ compute_length_step_days start end step increasing = diff = case increasing of True -> Time_Utils.days_between start end False -> Time_Utils.days_between end start - # assert (diff >= 0) + assert (diff >= 0) steps = diff . div step exact_fit = diff % step == 0 if exact_fit then steps else steps+1 @@ -530,7 +531,7 @@ compute_length_step_months start end step increasing = diff = case increasing of True -> Time_Utils.months_between start end False -> Time_Utils.months_between end start - # assert (diff >= 0) + assert (diff >= 0) steps = diff . div step exact_fit = case increasing of True -> start + Period.new months=steps*step == end diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso index 1240cfa63f..79b66c9114 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso @@ -251,8 +251,12 @@ type Connection Arguments: - query: name of the table or sql statement to query. - If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query. - - limit: the maximum number of rows to return. + If supplied as `Text`, the name is checked against the `tables` list to + determine if it is a table or a query. + - limit: the maximum number of rows to read. + If set to `Nothing`, all rows will be returned. + - warn_if_more_rows: if set to `True`, a warning is attached to the + result if the number of rows returned by the query exceeds `limit`. ? Side Effects @@ -262,9 +266,9 @@ type Connection `execute_update` for DML queries, or if they are supposed to return results, the `read` should be wrapped in an execution context check. @query make_table_name_selector - read : Text | SQL_Query -> Integer | Nothing -> Materialized_Table ! Table_Not_Found - read self query limit=Nothing = - self.query query . read max_rows=limit + read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Materialized_Table ! Table_Not_Found + read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) = + self.query query . read max_rows=limit warn_if_more_rows=warn_if_more_rows ## PRIVATE Creates a new empty table in the database and returns a query referencing diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index 6a6aa5d4ad..b1c88a4c40 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -95,17 +95,18 @@ type Column Returns a materialized column containing rows of this column. Arguments: - - max_rows: specifies a maximum amount of rows to fetch; if not set, all - available rows are fetched. - read : (Nothing | Integer) -> Materialized_Column - read self max_rows=Nothing = - self.to_table.read max_rows . at self.name + - max_rows: specifies the maximum number of rows to read. + If `Nothing`, all available rows are returned. + - warn_if_more_rows: if set to `True`, a warning is attached to the + result if the number of rows returned by the query exceeds `max_rows`. + read : (Nothing | Integer) -> Boolean -> Materialized_Column + read self (max_rows : Integer | Nothing = 1000) (warn_if_more_rows:Boolean = True) = + self.to_table.read max_rows warn_if_more_rows . at 0 ## GROUP Standard.Base.Conversions Returns a vector containing all the elements in this column. to_vector : Vector Any - to_vector self = - self.to_table.read . at 0 . to_vector + to_vector self = self.read max_rows=Nothing . to_vector ## GROUP Standard.Base.Metadata Returns the `Value_Type` associated with that column. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index a0bc8cd3f0..aad233acec 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -10,6 +10,7 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Base.Errors.Unimplemented.Unimplemented from Standard.Base.Metadata import make_single_choice +from Standard.Base.Runtime import assert from Standard.Base.Widget_Helpers import make_delimiter_selector import Standard.Table.Data.Calculations.Column_Operation.Column_Operation @@ -90,7 +91,7 @@ type Table - format_terminal: whether ANSI-terminal formatting should be used display : Integer -> Boolean -> Text display self show_rows=10 format_terminal=False = - df = self.read max_rows=show_rows + df = self.read max_rows=show_rows warn_if_more_rows=False all_rows_count = self.row_count display_dataframe df indices_count=0 all_rows_count format_terminal @@ -965,24 +966,25 @@ type Table In the database backend, it first materializes the table to in-memory. Arguments: - - max_rows: The maximum amount of rows to return. It is mainly meant for - the Database backend, to limit how many rows are downloaded. In the - in-memory backend it is only kept for API compatibility. - rows : Integer -> Vector Row - rows self max_rows=1000 = - self.read max_rows=max_rows . rows + - max_rows: specifies the maximum number of rows to read. + If `Nothing`, all available rows are returned. + - warn_if_more_rows: if set to `True`, a warning is attached to the + result if the number of rows returned by the query exceeds `max_rows`. + rows : Integer | Nothing -> Boolean -> Vector Row + rows self (max_rows : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) = + self.read max_rows=max_rows warn_if_more_rows=warn_if_more_rows . rows ## GROUP Standard.Base.Selections Returns the first row of the table. first_row : Row ! Index_Out_Of_Bounds first_row self = - self.read max_rows=1 . rows . first + self.read max_rows=1 warn_if_more_rows=False . rows . first ## GROUP Standard.Base.Selections Returns the second row of the table. second_row : Row ! Index_Out_Of_Bounds second_row self = - self.read max_rows=2 . rows . second + self.read max_rows=2 warn_if_more_rows=False . rows . second ## GROUP Standard.Base.Selections Returns the last row of the table. @@ -2238,11 +2240,14 @@ type Table Returns a materialized dataframe containing rows of this table. Arguments: - - max_rows: specifies a maximum amount of rows to fetch; if not set, all - available rows are fetched. - read : (Integer | Nothing) -> Materialized_Table - read self max_rows=Nothing = - preprocessed = self.limit max_rows + - max_rows: specifies the maximum number of rows to read. + If `Nothing`, all available rows are returned. + - warn_if_more_rows: if set to `True`, a warning is attached to the + result if the number of rows returned by the query exceeds `max_rows`. + read : (Integer | Nothing) -> Boolean -> Materialized_Table + read self (max_rows : Integer | Nothing = 1000) (warn_if_more_rows:Boolean = True) = + preprocessed = if max_rows.is_nothing then self else + if warn_if_more_rows then self.limit max_rows+1 else self.limit max_rows case preprocessed.internal_columns.is_empty of True -> Error.throw (Illegal_Argument.Error "Cannot create a table with no columns.") @@ -2252,9 +2257,9 @@ type Table materialized_table = self.connection.read_statement sql column_type_suggestions . catch SQL_Error sql_error-> Error.throw (self.connection.dialect.get_error_mapper.transform_custom_errors sql_error) + warnings_builder = Vector.new_builder expected_types = self.columns.map .value_type actual_types = materialized_table.columns.map .value_type - warnings_builder = Vector.new_builder expected_types.zip actual_types expected_type-> actual_type-> if expected_type == actual_type then Nothing else expected_type_kind = Meta.meta expected_type . constructor @@ -2265,14 +2270,18 @@ type Table However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported. if expected_type_kind == actual_type_kind then Nothing else warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type) - Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector materialized_table + result = if max_rows.is_nothing || materialized_table.row_count <= max_rows then materialized_table else + assert warn_if_more_rows "We may get more rows than we asked for _only_ if warn_if_more_rows=True" + warnings_builder.append (Not_All_Rows_Downloaded.Warning max_rows) + materialized_table.take max_rows + Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector result ## PRIVATE Creates a query corresponding to this table. to_select_query : Query to_select_query self = cols = self.internal_columns.map (c -> [c.name, c.expression]) - # assert cols.not_empty + assert cols.not_empty Query.Select cols self.context ## Returns an SQL statement that will be used for materializing this table. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Lookup_Query_Helper.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Lookup_Query_Helper.enso index b61d8e8da7..4aba2a7690 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Lookup_Query_Helper.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Lookup_Query_Helper.enso @@ -72,7 +72,7 @@ check_initial_invariants base_table lookup_table lookup_columns allow_unmatched_ check_for_null_keys lookup_table key_column_names <| if allow_unmatched_rows then continuation else unmatched_rows = base_table.join lookup_table on=key_column_names join_kind=Join_Kind.Left_Exclusive . select_columns key_column_names - unmatched_example = unmatched_rows.read max_rows=1 + unmatched_example = unmatched_rows.read max_rows=1 warn_if_more_rows=False if unmatched_example.row_count == 0 then continuation else first_row = unmatched_example.rows.first Error.throw (Unmatched_Rows_In_Lookup.Error first_row.to_vector) @@ -193,7 +193,7 @@ precheck_for_duplicate_matches lookup_columns subquery_setup connection new_ctx Lookup_Column.Key_Column _ _ -> [subquery_setup.get_self_column ix] _ -> [] table_for_duplicate_check = Table.Value subquery_setup.new_table_name connection [subquery_setup.lookup_counter]+key_columns_for_duplicate_check new_ctx - duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read max_rows=1 + duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read max_rows=1 warn_if_more_rows=False case duplicate_lookup_matches.row_count > 0 of True -> first_example_row = duplicate_lookup_matches.read.rows.first.to_vector diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso index 35dd7b7c70..bf8716b585 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso @@ -148,8 +148,12 @@ type Postgres_Connection Arguments: - query: name of the table or sql statement to query. - If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query. - - limit: the maximum number of rows to return. + If supplied as `Text`, the name is checked against the `tables` list to + determine if it is a table or a query. + - limit: the maximum number of rows to read. + If set to `Nothing`, all rows will be returned. + - warn_if_more_rows: if set to `True`, a warning is attached to the + result if the number of rows returned by the query exceeds `limit`. ? Side Effects @@ -159,8 +163,9 @@ type Postgres_Connection `execute_update` for DML queries, or if they are supposed to return results, the `read` should be wrapped in an execution context check. @query make_table_name_selector - read : Text | SQL_Query -> Integer | Nothing -> Materialized_Table ! Table_Not_Found - read self query limit=Nothing = self.connection.read query limit + read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Materialized_Table ! Table_Not_Found + read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) = + self.connection.read query limit warn_if_more_rows ## GROUP Standard.Base.Output Creates a new empty table in the database and returns a query referencing diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso index 18d91c7dac..dbef85ef7a 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso @@ -141,8 +141,12 @@ type SQLite_Connection Arguments: - query: name of the table or sql statement to query. - If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query. - - limit: the maximum number of rows to return. + If supplied as `Text`, the name is checked against the `tables` list to + determine if it is a table or a query. + - limit: the maximum number of rows to read. + If set to `Nothing`, all rows will be returned. + - warn_if_more_rows: if set to `True`, a warning is attached to the + result if the number of rows returned by the query exceeds `limit`. ? Side Effects @@ -152,8 +156,9 @@ type SQLite_Connection `execute_update` for DML queries, or if they are supposed to return results, the `read` should be wrapped in an execution context check. @query make_table_name_selector - read : Text | SQL_Query -> Integer | Nothing -> Materialized_Table ! Table_Not_Found - read self query limit=Nothing = self.connection.read query limit + read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Materialized_Table ! Table_Not_Found + read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) = + self.connection.read query limit warn_if_more_rows ## GROUP Standard.Base.Output Creates a new empty table in the database and returns a query referencing diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso index b044a6002a..8e52b7c8fe 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso @@ -1,4 +1,5 @@ from Standard.Base import all +from Standard.Base.Runtime import assert import Standard.Base.Errors.Common.Dry_Run_Operation import Standard.Base.Errors.Common.Forbidden_Operation import Standard.Base.Errors.Illegal_Argument.Illegal_Argument @@ -228,7 +229,7 @@ type Non_Unique_Key_Recipe raise_duplicated_primary_key_error source_table primary_key original_panic = agg = source_table.aggregate [Aggregate_Column.Count]+(primary_key.map Aggregate_Column.Group_By) filtered = agg.filter column=0 (Filter_Condition.Greater than=1) - materialized = filtered.read max_rows=1 + materialized = filtered.read max_rows=1 warn_if_more_rows=False case materialized.row_count == 0 of ## If we couldn't find a duplicated key, we give up the translation and rethrow the original panic containing the SQL error. This could @@ -439,7 +440,7 @@ type Append_Helper ## PRIVATE check_rows_unmatched_in_target self ~continuation = - # assert key_columns.not_empty + assert self.key_columns.not_empty unmatched_rows = self.new_source_rows count = unmatched_rows.row_count if count != 0 then Error.throw (Unmatched_Rows.Error count) else continuation @@ -619,7 +620,7 @@ check_multiple_rows_match left_table right_table key_columns ~continuation = joined = left_table.join right_table on=key_columns join_kind=Join_Kind.Inner counted = joined.aggregate [Aggregate_Column.Count]+(key_columns.map (Aggregate_Column.Group_By _)) duplicates = counted.filter 0 (Filter_Condition.Greater than=1) - example = duplicates.read max_rows=1 + example = duplicates.read max_rows=1 warn_if_more_rows=False case example.row_count == 0 of True -> continuation False -> @@ -633,7 +634,7 @@ check_for_null_keys table key_columns ~continuation = keys = table.select_columns key_columns is_any_key_blank = keys.columns.map (_.is_nothing) . reduce (||) null_keys = table.filter is_any_key_blank Filter_Condition.Is_True - example = null_keys.read max_rows=1 + example = null_keys.read max_rows=1 warn_if_more_rows=False case example.row_count == 0 of True -> continuation False -> diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index ce37abd972..a7658a780d 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -1984,11 +1984,14 @@ type Column ## Returns a column containing rows of this column. Arguments: - - max_rows: specifies a maximum amount of rows to fetch; if not set, all - available rows are fetched. - read : (Nothing | Integer) -> Column - read self max_rows=Nothing = - if max_rows.is_nothing then self else self.slice 0 max_rows + - max_rows: specifies the maximum number of rows to read. + If `Nothing`, all available rows are returned. + - warn_if_more_rows: if set to `True`, a warning is attached to the + result if the number of rows returned by the query exceeds `max_rows`. + read : (Nothing | Integer) -> Boolean -> Column + read self (max_rows : Integer | Nothing = Nothing) (warn_if_more_rows:Boolean = True) = + if max_rows.is_nothing then self else + self.to_table.read max_rows warn_if_more_rows . at 0 ## GROUP Standard.Base.Conversions Returns a vector containing all the elements in this column. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 1e467fb866..695bc74e56 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -1676,15 +1676,13 @@ type Table In the database backend, it first materializes the table to in-memory. Arguments: - - max_rows: The maximum amount of rows to return. It is mainly meant for - the Database backend, to limit how many rows are downloaded. In the - in-memory backend it is only kept for API compatibility. - rows : Integer -> Vector Row - rows self max_rows=Nothing = - table = case max_rows of - Nothing -> self - _ : Integer -> self.slice 0 max_rows - proxy = Rows_View.Value table + - max_rows: specifies the maximum number of rows to read. + If `Nothing`, all available rows are returned. + - warn_if_more_rows: if set to `True`, a warning is attached to the + result if the number of rows returned by the query exceeds `max_rows`. + rows : Integer | Nothing -> Boolean -> Vector Row + rows self (max_rows : Integer | Nothing = Nothing) (warn_if_more_rows : Boolean = True) = + proxy = Rows_View.Value (self.read max_rows warn_if_more_rows) Vector.from_polyglot_array (Array_Proxy.from_proxy_object proxy) ## GROUP Standard.Base.Selections @@ -2144,12 +2142,19 @@ type Table table is now in-memory, regardless of its origin. Arguments: - - max_rows: specifies a maximum amount of rows to fetch; if not set, all - available rows are fetched. - read : (Integer | Nothing) -> Table - read self max_rows=Nothing = case max_rows of + - max_rows: specifies the maximum number of rows to read. + If `Nothing`, all available rows are returned. + - warn_if_more_rows: if set to `True`, a warning is attached to the + result if the number of rows returned by the query exceeds `max_rows`. + read : (Integer | Nothing) -> Boolean -> Table + read self (max_rows : Integer | Nothing = Nothing) (warn_if_more_rows:Boolean = True) = case max_rows of Nothing -> self - _ : Integer -> self.take (First max_rows) + _ : Integer -> + truncated = self.take (First max_rows) + needs_warning = warn_if_more_rows && self.row_count > max_rows + if needs_warning.not then truncated else + Problem_Behavior.Report_Warning.attach_problem_after truncated <| + Not_All_Rows_Downloaded.Warning max_rows ## GROUP Standard.Base.Metadata Returns a Table describing this table's contents. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso index 1b1281ee2d..5cb645680b 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso @@ -777,3 +777,23 @@ type Null_Values_In_Key_Columns to_display_text self = suffix = if self.add_sql_suffix.not then "" else " The operation has been rolled back. Due to how NULL equality works in SQL, these rows would not be correctly matched to the target rows. Please use a key that does not contain NULLs." "The operation encountered input rows that contained Nothing values in key columns (for example, the row " + self.example_row.to_display_text + ")."+suffix + +## Indicates that the query may not have downloaded all rows that were + available. + + The count of extra rows is not included, because computing it would add too + much additional cost. + + ! In-memory + + The warning may also be reported in the in-memory backend, when + `Table.read` is called with a user-specified limit and some rows are + dropped. This is done to ensure both APIs behave consistently with `read`. +type Not_All_Rows_Downloaded + ## PRIVATE + Warning (max_rows:Integer) + + ## PRIVATE + to_display_text : Text + to_display_text self = + "The query has returned more than the maximum of "+self.max_rows.to_text+" rows, so some rows have been dropped from the result. If you want to get the full result, change the row limit." diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso index c1a0392411..e905e59f1f 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso @@ -159,10 +159,11 @@ type Excel_Workbook Arguments: - query: sheet name, range name or address to read from the workbook. - - limit: the maximum number of rows to return. + - limit: the maximum number of rows to read. + If set to `Nothing`, all rows will be returned. @query (self-> Single_Choice display=Display.Always values=(self.tables.at "Name" . to_vector . map t-> Option t t.pretty)) read : Text -> Integer | Nothing -> Table - read self query limit=Nothing = + read self query (limit : Integer | Nothing = Nothing) = java_headers = Excel_Reader.make_java_headers self.headers java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator-> case query of diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso index f5d7c526f7..78f728af58 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso @@ -1,4 +1,5 @@ from Standard.Base import all hiding First, Last +from Standard.Base.Runtime import assert import project.Data.Aggregate_Column.Aggregate_Column import project.Data.Column.Column @@ -56,7 +57,7 @@ prepare_aggregate_columns naming_helper aggregates table error_on_missing_column any missing columns will be reported as errors. Therefore, we can assume that all the columns were present. keys_problem_builder.attach_problems_before Problem_Behavior.Report_Error <| - # assert resolved_keys.find .is_nothing . is_nothing . not then + assert (resolved_keys.contains Nothing . not) problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns valid_resolved_aggregate_columns = aggregates.map (resolve_aggregate table problem_builder) . filter x-> x.is_nothing.not diff --git a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso index 9ca17ce456..6ae391a24d 100644 --- a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso +++ b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso @@ -39,7 +39,7 @@ prepare_visualization y max_rows=1000 = make_json_for_table dataframe [index] all_rows_count _ : Database_Column -> prepare_visualization x.to_table max_rows _ : Database_Table -> - dataframe = x.read max_rows + dataframe = x.read max_rows warn_if_more_rows=False all_rows_count = x.row_count make_json_for_table dataframe [] all_rows_count _ : Function -> diff --git a/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso index 706c6d0395..fcd5a5c4e4 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso @@ -262,5 +262,115 @@ spec setup = table.rows . at 0 . at -4 . should_fail_with Index_Out_Of_Bounds table.rows . at 0 . at "unknown" . should_fail_with No_Such_Column + + Test.group prefix+"Table.read" <| + t_big = table_builder [["X", (0.up_to 1500)]] + t_small = table_builder [["X", (0.up_to 10)]] + + has_default_row_limit = setup.is_database + + Test.specify "should have a row limit by default and warn about it" <| + t_big.row_count . should_equal 1500 + t_small.row_count . should_equal 10 + + t1 = t_big.read + case has_default_row_limit of + True -> + t1.row_count . should_equal 1000 + w1 = Problems.expect_only_warning Not_All_Rows_Downloaded t1 + w1.max_rows . should_equal 1000 + False -> + t1.row_count . should_equal 1500 + Problems.assume_no_problems t1 + + t2 = t_small.read + t2.row_count . should_equal 10 + Problems.assume_no_problems t2 + + Test.specify "should allow to set the row limit" <| + t1 = t_big.read max_rows=23 + t1.row_count . should_equal 23 + w1 = Problems.expect_only_warning Not_All_Rows_Downloaded t1 + w1.max_rows . should_equal 23 + w1.to_display_text . should_contain "some rows have been dropped" + + t2 = t_big.read max_rows=1500 + t2.row_count . should_equal 1500 + Problems.assume_no_problems t2 + + t3 = t_small.read max_rows=1 + t3.row_count . should_equal 1 + w3 = Problems.expect_only_warning Not_All_Rows_Downloaded t3 + w3.max_rows . should_equal 1 + + Test.specify "should allow to have no row limit" <| + t1 = t_big.read max_rows=Nothing + t1.row_count . should_equal 1500 + Problems.assume_no_problems t1 + + Test.specify "should allow to turn off the warning" <| + t1 = t_big.read warn_if_more_rows=False + t1.row_count . should_equal (if has_default_row_limit then 1000 else 1500) + Problems.assume_no_problems t1 + + t2 = t_big.read max_rows=123 warn_if_more_rows=False + t2.row_count . should_equal 123 + Problems.assume_no_problems t2 + + t3 = t_big.read max_rows=12300 warn_if_more_rows=False + t3.row_count . should_equal 1500 + Problems.assume_no_problems t3 + + Test.specify "should also work as Column.read" <| + c1 = t_big.at "X" + c1.length . should_equal 1500 + + r2 = c1.read + case has_default_row_limit of + True -> + r2.length . should_equal 1000 + w2 = Problems.expect_only_warning Not_All_Rows_Downloaded r2 + w2.max_rows . should_equal 1000 + False -> + r2.length . should_equal 1500 + Problems.assume_no_problems r2 + + # to_vector always downloads the whole column, even if its large + c1.to_vector.length . should_equal 1500 + + r3 = c1.read max_rows=10 + r3.length . should_equal 10 + Problems.expect_only_warning Not_All_Rows_Downloaded r3 + + r4 = c1.read max_rows=Nothing + r4.length . should_equal 1500 + Problems.assume_no_problems r4 + + r5 = c1.read max_rows=3 warn_if_more_rows=False + r5.length . should_equal 3 + Problems.assume_no_problems r5 + + if setup.is_database then Test.specify "should allow similar API on Connection.read" <| + connection = setup.connection + connection.query t_big.name . row_count . should_equal 1500 + + t1 = connection.read t_big.name + t1.row_count . should_equal 1000 + w1 = Problems.expect_only_warning Not_All_Rows_Downloaded t1 + w1.max_rows . should_equal 1000 + + t2 = connection.read t_big.name limit=42 + t2.row_count . should_equal 42 + w2 = Problems.expect_only_warning Not_All_Rows_Downloaded t2 + w2.max_rows . should_equal 42 + + t3 = connection.read t_big.name limit=Nothing + t3.row_count . should_equal 1500 + Problems.assume_no_problems t3 + + t4 = connection.read t_big.name warn_if_more_rows=False + t4.row_count . should_equal 1000 + Problems.assume_no_problems t4 + # A set of potentially problematic column names. weird_names = ['whitespace and \t others', 'foo "the bar" baz', "a 'X' c", "emoji? 😎 yep", "πŸ˜ŠπŸ’‘πŸŽ‰πŸŒ»", "Polskie znaki - np. Δ…Δ™Δ‡Ε›Γ³", 'acce\u0301nt a\u0301cutΓ©', 'SELECT \'A\',"B" FROM t;--', '"', "'", '”', 'one " quote', 'double "" quote', 'even \nnewlines could go here', 'and\r\nthat\rtoo', 'foo ” bar', 'foo ”the” bar', 'x”; --'] diff --git a/test/Table_Tests/src/Database/Common/Common_Spec.enso b/test/Table_Tests/src/Database/Common/Common_Spec.enso index 27d5beadf5..4d8a23f166 100644 --- a/test/Table_Tests/src/Database/Common/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common/Common_Spec.enso @@ -52,7 +52,7 @@ run_tests prefix connection upload = t2 = upload "T2" (Table.new [["d", [100, 200]]]) t1.set (t2.at "d") . should_fail_with Integrity_Error - Test.group prefix+"Table.query" <| + Test.group prefix+"Connection.query" <| name = t1.name Test.specify "should allow to access a Table by name" <| t2 = connection.query (SQL_Query.Table_Name name) diff --git a/test/Tests/src/Data/Vector_Spec.enso b/test/Tests/src/Data/Vector_Spec.enso index 23cadd1e7c..66bf5ff55d 100644 --- a/test/Tests/src/Data/Vector_Spec.enso +++ b/test/Tests/src/Data/Vector_Spec.enso @@ -458,8 +458,6 @@ type_spec name alter = Test.group name <| vec.take . should_equal [1] vec.drop . should_equal [2, 3, 4, 5, 6] - IO.println 'AAA' - IO.println (Meta.get_simple_type_name vec) vec.take (2.up_to 4) . should_equal [3, 4] vec.take (0.up_to 0) . should_equal [] vec.take (100.up_to 100) . should_fail_with Index_Out_Of_Bounds