From 2ce156738462ae3798161d2cef8adeb3ef728471 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= <radoslaw.wasko@enso.org>
Date: Mon, 6 Nov 2023 17:41:47 +0100
Subject: [PATCH] Limit `max_rows` that are downloaded in `Table.read` by
 default, and warn if more rows are available (#8159)

- Sets the default limit for `Table.read` in Database to be max 1000 rows.
- The limit for in-memory compatible API still defaults to `Nothing`.
- Adds a warning if there are more rows than limit.
- Enables a few unrelated asserts.
---
 .../0.0.0-dev/src/Data/Time/Date_Range.enso   |   5 +-
 .../0.0.0-dev/src/Connection/Connection.enso  |  14 ++-
 .../Database/0.0.0-dev/src/Data/Column.enso   |  15 +--
 .../Database/0.0.0-dev/src/Data/Table.enso    |  43 ++++---
 .../Internal/Common/Lookup_Query_Helper.enso  |   4 +-
 .../Postgres/Postgres_Connection.enso         |  13 ++-
 .../Internal/SQLite/SQLite_Connection.enso    |  13 ++-
 .../0.0.0-dev/src/Internal/Upload_Table.enso  |   9 +-
 .../Table/0.0.0-dev/src/Data/Column.enso      |  13 ++-
 .../Table/0.0.0-dev/src/Data/Table.enso       |  33 +++---
 .../Standard/Table/0.0.0-dev/src/Errors.enso  |  20 ++++
 .../0.0.0-dev/src/Excel/Excel_Workbook.enso   |   5 +-
 .../src/Internal/Aggregate_Column_Helper.enso |   3 +-
 .../0.0.0-dev/src/Table/Visualization.enso    |   2 +-
 .../Common_Table_Operations/Core_Spec.enso    | 110 ++++++++++++++++++
 .../src/Database/Common/Common_Spec.enso      |   2 +-
 test/Tests/src/Data/Vector_Spec.enso          |   2 -
 17 files changed, 235 insertions(+), 71 deletions(-)

diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Range.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Range.enso
index 1e95212068..d4310ea0da 100644
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Range.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Range.enso
@@ -17,6 +17,7 @@ import project.Nothing.Nothing
 from project.Data.Boolean import Boolean, False, True
 from project.Data.Filter_Condition import unify_condition_or_predicate, unify_condition_predicate_or_element
 from project.Data.Range.Extensions import all
+from project.Runtime import assert
 
 polyglot java import org.enso.base.Time_Utils
 
@@ -519,7 +520,7 @@ compute_length_step_days start end step increasing =
     diff = case increasing of
         True  -> Time_Utils.days_between start end
         False -> Time_Utils.days_between end start
-    # assert (diff >= 0)
+    assert (diff >= 0)
     steps = diff . div step
     exact_fit = diff % step == 0
     if exact_fit then steps else steps+1
@@ -530,7 +531,7 @@ compute_length_step_months start end step increasing =
     diff = case increasing of
         True  -> Time_Utils.months_between start end
         False -> Time_Utils.months_between end start
-    # assert (diff >= 0)
+    assert (diff >= 0)
     steps = diff . div step
     exact_fit = case increasing of
         True  -> start + Period.new months=steps*step == end
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso
index 1240cfa63f..79b66c9114 100644
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso
@@ -251,8 +251,12 @@ type Connection
 
        Arguments:
        - query: name of the table or sql statement to query.
-         If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
-       - limit: the maximum number of rows to return.
+         If supplied as `Text`, the name is checked against the `tables` list to
+         determine if it is a table or a query.
+       - limit: the maximum number of rows to read.
+         If set to `Nothing`, all rows will be returned.
+       - warn_if_more_rows: if set to `True`, a warning is attached to the
+         result if the number of rows returned by the query exceeds `limit`.
 
        ? Side Effects
 
@@ -262,9 +266,9 @@ type Connection
          `execute_update` for DML queries, or if they are supposed to return
          results, the `read` should be wrapped in an execution context check.
     @query make_table_name_selector
-    read : Text | SQL_Query -> Integer | Nothing -> Materialized_Table ! Table_Not_Found
-    read self query limit=Nothing =
-        self.query query . read max_rows=limit
+    read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Materialized_Table ! Table_Not_Found
+    read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
+        self.query query . read max_rows=limit warn_if_more_rows=warn_if_more_rows
 
     ## PRIVATE
        Creates a new empty table in the database and returns a query referencing
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
index 6a6aa5d4ad..b1c88a4c40 100644
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
@@ -95,17 +95,18 @@ type Column
        Returns a materialized column containing rows of this column.
 
        Arguments:
-       - max_rows: specifies a maximum amount of rows to fetch; if not set, all
-         available rows are fetched.
-    read : (Nothing | Integer) -> Materialized_Column
-    read self max_rows=Nothing =
-        self.to_table.read max_rows . at self.name
+       - max_rows: specifies the maximum number of rows to read.
+         If `Nothing`, all available rows are returned.
+       - warn_if_more_rows: if set to `True`, a warning is attached to the
+         result if the number of rows returned by the query exceeds `max_rows`.
+    read : (Nothing | Integer) -> Boolean -> Materialized_Column
+    read self (max_rows : Integer | Nothing = 1000) (warn_if_more_rows:Boolean = True) =
+        self.to_table.read max_rows warn_if_more_rows . at 0
 
     ## GROUP Standard.Base.Conversions
        Returns a vector containing all the elements in this column.
     to_vector : Vector Any
-    to_vector self =
-        self.to_table.read . at 0 . to_vector
+    to_vector self = self.read max_rows=Nothing . to_vector
 
     ## GROUP Standard.Base.Metadata
        Returns the `Value_Type` associated with that column.
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
index a0bc8cd3f0..aad233acec 100644
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
@@ -10,6 +10,7 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
 import Standard.Base.Errors.Illegal_State.Illegal_State
 import Standard.Base.Errors.Unimplemented.Unimplemented
 from Standard.Base.Metadata import make_single_choice
+from Standard.Base.Runtime import assert
 from Standard.Base.Widget_Helpers import make_delimiter_selector
 
 import Standard.Table.Data.Calculations.Column_Operation.Column_Operation
@@ -90,7 +91,7 @@ type Table
          - format_terminal: whether ANSI-terminal formatting should be used
     display : Integer -> Boolean -> Text
     display self show_rows=10 format_terminal=False =
-        df = self.read max_rows=show_rows
+        df = self.read max_rows=show_rows warn_if_more_rows=False
         all_rows_count = self.row_count
         display_dataframe df indices_count=0 all_rows_count format_terminal
 
@@ -965,24 +966,25 @@ type Table
        In the database backend, it first materializes the table to in-memory.
 
        Arguments:
-       - max_rows: The maximum amount of rows to return. It is mainly meant for
-         the Database backend, to limit how many rows are downloaded. In the
-         in-memory backend it is only kept for API compatibility.
-    rows : Integer -> Vector Row
-    rows self max_rows=1000 =
-        self.read max_rows=max_rows . rows
+       - max_rows: specifies the maximum number of rows to read.
+         If `Nothing`, all available rows are returned.
+       - warn_if_more_rows: if set to `True`, a warning is attached to the
+         result if the number of rows returned by the query exceeds `max_rows`.
+    rows : Integer | Nothing -> Boolean -> Vector Row
+    rows self (max_rows : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
+        self.read max_rows=max_rows warn_if_more_rows=warn_if_more_rows . rows
 
     ## GROUP Standard.Base.Selections
        Returns the first row of the table.
     first_row : Row ! Index_Out_Of_Bounds
     first_row self =
-        self.read max_rows=1 . rows . first
+        self.read max_rows=1 warn_if_more_rows=False . rows . first
 
     ## GROUP Standard.Base.Selections
        Returns the second row of the table.
     second_row : Row ! Index_Out_Of_Bounds
     second_row self =
-        self.read max_rows=2 . rows . second
+        self.read max_rows=2 warn_if_more_rows=False . rows . second
 
     ## GROUP Standard.Base.Selections
        Returns the last row of the table.
@@ -2238,11 +2240,14 @@ type Table
        Returns a materialized dataframe containing rows of this table.
 
        Arguments:
-       - max_rows: specifies a maximum amount of rows to fetch; if not set, all
-         available rows are fetched.
-    read : (Integer | Nothing) -> Materialized_Table
-    read self max_rows=Nothing =
-        preprocessed = self.limit max_rows
+       - max_rows: specifies the maximum number of rows to read.
+         If `Nothing`, all available rows are returned.
+       - warn_if_more_rows: if set to `True`, a warning is attached to the
+         result if the number of rows returned by the query exceeds `max_rows`.
+    read : (Integer | Nothing) -> Boolean -> Materialized_Table
+    read self (max_rows : Integer | Nothing = 1000) (warn_if_more_rows:Boolean = True) =
+        preprocessed = if max_rows.is_nothing then self else
+            if warn_if_more_rows then self.limit max_rows+1 else self.limit max_rows
         case preprocessed.internal_columns.is_empty of
             True ->
                 Error.throw (Illegal_Argument.Error "Cannot create a table with no columns.")
@@ -2252,9 +2257,9 @@ type Table
                 materialized_table = self.connection.read_statement sql column_type_suggestions . catch SQL_Error sql_error->
                     Error.throw (self.connection.dialect.get_error_mapper.transform_custom_errors sql_error)
 
+                warnings_builder = Vector.new_builder
                 expected_types = self.columns.map .value_type
                 actual_types = materialized_table.columns.map .value_type
-                warnings_builder = Vector.new_builder
                 expected_types.zip actual_types expected_type-> actual_type->
                     if expected_type == actual_type then Nothing else
                         expected_type_kind = Meta.meta expected_type . constructor
@@ -2265,14 +2270,18 @@ type Table
                            However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported.
                         if expected_type_kind == actual_type_kind then Nothing else
                             warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type)
-                Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector materialized_table
+                result = if max_rows.is_nothing || materialized_table.row_count <= max_rows then materialized_table else
+                    assert warn_if_more_rows "We may get more rows than we asked for _only_ if warn_if_more_rows=True"
+                    warnings_builder.append (Not_All_Rows_Downloaded.Warning max_rows)
+                    materialized_table.take max_rows
+                Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector result
 
     ## PRIVATE
        Creates a query corresponding to this table.
     to_select_query : Query
     to_select_query self =
         cols = self.internal_columns.map (c -> [c.name, c.expression])
-        # assert cols.not_empty
+        assert cols.not_empty
         Query.Select cols self.context
 
     ## Returns an SQL statement that will be used for materializing this table.
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Lookup_Query_Helper.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Lookup_Query_Helper.enso
index b61d8e8da7..4aba2a7690 100644
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Lookup_Query_Helper.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Lookup_Query_Helper.enso
@@ -72,7 +72,7 @@ check_initial_invariants base_table lookup_table lookup_columns allow_unmatched_
     check_for_null_keys lookup_table key_column_names <|
         if allow_unmatched_rows then continuation else
             unmatched_rows = base_table.join lookup_table on=key_column_names join_kind=Join_Kind.Left_Exclusive . select_columns key_column_names
-            unmatched_example = unmatched_rows.read max_rows=1
+            unmatched_example = unmatched_rows.read max_rows=1 warn_if_more_rows=False
             if unmatched_example.row_count == 0 then continuation else
                 first_row = unmatched_example.rows.first
                 Error.throw (Unmatched_Rows_In_Lookup.Error first_row.to_vector)
@@ -193,7 +193,7 @@ precheck_for_duplicate_matches lookup_columns subquery_setup connection new_ctx
         Lookup_Column.Key_Column _ _ -> [subquery_setup.get_self_column ix]
         _ -> []
     table_for_duplicate_check = Table.Value subquery_setup.new_table_name connection [subquery_setup.lookup_counter]+key_columns_for_duplicate_check new_ctx
-    duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read max_rows=1
+    duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read max_rows=1 warn_if_more_rows=False
     case duplicate_lookup_matches.row_count > 0 of
         True ->
             first_example_row = duplicate_lookup_matches.read.rows.first.to_vector
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso
index 35dd7b7c70..bf8716b585 100644
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Connection.enso
@@ -148,8 +148,12 @@ type Postgres_Connection
 
        Arguments:
        - query: name of the table or sql statement to query.
-         If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
-       - limit: the maximum number of rows to return.
+         If supplied as `Text`, the name is checked against the `tables` list to
+         determine if it is a table or a query.
+       - limit: the maximum number of rows to read.
+         If set to `Nothing`, all rows will be returned.
+       - warn_if_more_rows: if set to `True`, a warning is attached to the
+         result if the number of rows returned by the query exceeds `limit`.
 
        ? Side Effects
 
@@ -159,8 +163,9 @@ type Postgres_Connection
          `execute_update` for DML queries, or if they are supposed to return
          results, the `read` should be wrapped in an execution context check.
     @query make_table_name_selector
-    read : Text | SQL_Query -> Integer | Nothing -> Materialized_Table ! Table_Not_Found
-    read self query limit=Nothing = self.connection.read query limit
+    read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Materialized_Table ! Table_Not_Found
+    read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
+        self.connection.read query limit warn_if_more_rows
 
     ## GROUP Standard.Base.Output
        Creates a new empty table in the database and returns a query referencing
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso
index 18d91c7dac..dbef85ef7a 100644
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Connection.enso
@@ -141,8 +141,12 @@ type SQLite_Connection
 
        Arguments:
        - query: name of the table or sql statement to query.
-         If supplied as `Text`, the name is checked against the `tables` list to determine if it is a table or a query.
-       - limit: the maximum number of rows to return.
+         If supplied as `Text`, the name is checked against the `tables` list to
+         determine if it is a table or a query.
+       - limit: the maximum number of rows to read.
+         If set to `Nothing`, all rows will be returned.
+       - warn_if_more_rows: if set to `True`, a warning is attached to the
+         result if the number of rows returned by the query exceeds `limit`.
 
        ? Side Effects
 
@@ -152,8 +156,9 @@ type SQLite_Connection
          `execute_update` for DML queries, or if they are supposed to return
          results, the `read` should be wrapped in an execution context check.
     @query make_table_name_selector
-    read : Text | SQL_Query -> Integer | Nothing -> Materialized_Table ! Table_Not_Found
-    read self query limit=Nothing = self.connection.read query limit
+    read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Materialized_Table ! Table_Not_Found
+    read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
+        self.connection.read query limit warn_if_more_rows
 
     ## GROUP Standard.Base.Output
        Creates a new empty table in the database and returns a query referencing
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso
index b044a6002a..8e52b7c8fe 100644
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso
@@ -1,4 +1,5 @@
 from Standard.Base import all
+from Standard.Base.Runtime import assert
 import Standard.Base.Errors.Common.Dry_Run_Operation
 import Standard.Base.Errors.Common.Forbidden_Operation
 import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
@@ -228,7 +229,7 @@ type Non_Unique_Key_Recipe
 raise_duplicated_primary_key_error source_table primary_key original_panic =
     agg = source_table.aggregate [Aggregate_Column.Count]+(primary_key.map Aggregate_Column.Group_By)
     filtered = agg.filter column=0 (Filter_Condition.Greater than=1)
-    materialized = filtered.read max_rows=1
+    materialized = filtered.read max_rows=1 warn_if_more_rows=False
     case materialized.row_count == 0 of
         ## If we couldn't find a duplicated key, we give up the translation and
            rethrow the original panic containing the SQL error. This could
@@ -439,7 +440,7 @@ type Append_Helper
 
     ## PRIVATE
     check_rows_unmatched_in_target self ~continuation =
-        # assert key_columns.not_empty
+        assert self.key_columns.not_empty
         unmatched_rows = self.new_source_rows
         count = unmatched_rows.row_count
         if count != 0 then Error.throw (Unmatched_Rows.Error count) else continuation
@@ -619,7 +620,7 @@ check_multiple_rows_match left_table right_table key_columns ~continuation =
     joined = left_table.join right_table on=key_columns join_kind=Join_Kind.Inner
     counted = joined.aggregate [Aggregate_Column.Count]+(key_columns.map (Aggregate_Column.Group_By _))
     duplicates = counted.filter 0 (Filter_Condition.Greater than=1)
-    example = duplicates.read max_rows=1
+    example = duplicates.read max_rows=1 warn_if_more_rows=False
     case example.row_count == 0 of
         True -> continuation
         False ->
@@ -633,7 +634,7 @@ check_for_null_keys table key_columns ~continuation =
     keys = table.select_columns key_columns
     is_any_key_blank = keys.columns.map (_.is_nothing) . reduce (||)
     null_keys = table.filter is_any_key_blank Filter_Condition.Is_True
-    example = null_keys.read max_rows=1
+    example = null_keys.read max_rows=1 warn_if_more_rows=False
     case example.row_count == 0 of
         True -> continuation
         False ->
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
index ce37abd972..a7658a780d 100644
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
@@ -1984,11 +1984,14 @@ type Column
     ## Returns a column containing rows of this column.
 
        Arguments:
-       - max_rows: specifies a maximum amount of rows to fetch; if not set, all
-         available rows are fetched.
-    read : (Nothing | Integer) -> Column
-    read self max_rows=Nothing =
-        if max_rows.is_nothing then self else self.slice 0 max_rows
+       - max_rows: specifies the maximum number of rows to read.
+         If `Nothing`, all available rows are returned.
+       - warn_if_more_rows: if set to `True`, a warning is attached to the
+         result if the number of rows returned by the query exceeds `max_rows`.
+    read : (Nothing | Integer) -> Boolean -> Column
+    read self (max_rows : Integer | Nothing = Nothing) (warn_if_more_rows:Boolean = True) =
+        if max_rows.is_nothing then self else
+            self.to_table.read max_rows warn_if_more_rows . at 0
 
     ## GROUP Standard.Base.Conversions
        Returns a vector containing all the elements in this column.
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
index 1e467fb866..695bc74e56 100644
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
@@ -1676,15 +1676,13 @@ type Table
        In the database backend, it first materializes the table to in-memory.
 
        Arguments:
-       - max_rows: The maximum amount of rows to return. It is mainly meant for
-         the Database backend, to limit how many rows are downloaded. In the
-         in-memory backend it is only kept for API compatibility.
-    rows : Integer -> Vector Row
-    rows self max_rows=Nothing =
-        table = case max_rows of
-            Nothing -> self
-            _ : Integer -> self.slice 0 max_rows
-        proxy = Rows_View.Value table
+       - max_rows: specifies the maximum number of rows to read.
+         If `Nothing`, all available rows are returned.
+       - warn_if_more_rows: if set to `True`, a warning is attached to the
+         result if the number of rows returned by the query exceeds `max_rows`.
+    rows : Integer | Nothing -> Boolean -> Vector Row
+    rows self (max_rows : Integer | Nothing = Nothing) (warn_if_more_rows : Boolean = True) =
+        proxy = Rows_View.Value (self.read max_rows warn_if_more_rows)
         Vector.from_polyglot_array (Array_Proxy.from_proxy_object proxy)
 
     ## GROUP Standard.Base.Selections
@@ -2144,12 +2142,19 @@ type Table
        table is now in-memory, regardless of its origin.
 
        Arguments:
-       - max_rows: specifies a maximum amount of rows to fetch; if not set, all
-         available rows are fetched.
-    read : (Integer | Nothing) -> Table
-    read self max_rows=Nothing = case max_rows of
+       - max_rows: specifies the maximum number of rows to read.
+         If `Nothing`, all available rows are returned.
+       - warn_if_more_rows: if set to `True`, a warning is attached to the
+         result if the number of rows returned by the query exceeds `max_rows`.
+    read : (Integer | Nothing) -> Boolean -> Table
+    read self (max_rows : Integer | Nothing = Nothing) (warn_if_more_rows:Boolean = True) = case max_rows of
         Nothing -> self
-        _ : Integer -> self.take (First max_rows)
+        _ : Integer ->
+            truncated = self.take (First max_rows)
+            needs_warning = warn_if_more_rows && self.row_count > max_rows
+            if needs_warning.not then truncated else
+                Problem_Behavior.Report_Warning.attach_problem_after truncated <|
+                    Not_All_Rows_Downloaded.Warning max_rows
 
     ## GROUP Standard.Base.Metadata
        Returns a Table describing this table's contents.
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso
index 1b1281ee2d..5cb645680b 100644
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso
@@ -777,3 +777,23 @@ type Null_Values_In_Key_Columns
     to_display_text self =
         suffix = if self.add_sql_suffix.not then "" else " The operation has been rolled back. Due to how NULL equality works in SQL, these rows would not be correctly matched to the target rows. Please use a key that does not contain NULLs."
         "The operation encountered input rows that contained Nothing values in key columns (for example, the row " + self.example_row.to_display_text + ")."+suffix
+
+## Indicates that the query may not have downloaded all rows that were
+   available.
+
+   The count of extra rows is not included, because computing it would add too
+   much additional cost.
+
+   ! In-memory
+
+     The warning may also be reported in the in-memory backend, when
+     `Table.read` is called with a user-specified limit and some rows are
+     dropped. This is done to ensure both APIs behave consistently with `read`.
+type Not_All_Rows_Downloaded
+    ## PRIVATE
+    Warning (max_rows:Integer)
+
+    ## PRIVATE
+    to_display_text : Text
+    to_display_text self =
+        "The query has returned more than the maximum of "+self.max_rows.to_text+" rows, so some rows have been dropped from the result. If you want to get the full result, change the row limit."
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso
index c1a0392411..e905e59f1f 100644
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso
@@ -159,10 +159,11 @@ type Excel_Workbook
 
        Arguments:
        - query: sheet name, range name or address to read from the workbook.
-       - limit: the maximum number of rows to return.
+       - limit: the maximum number of rows to read.
+         If set to `Nothing`, all rows will be returned.
     @query (self-> Single_Choice display=Display.Always values=(self.tables.at "Name" . to_vector . map t-> Option t t.pretty))
     read : Text -> Integer | Nothing -> Table
-    read self query limit=Nothing =
+    read self query (limit : Integer | Nothing = Nothing) =
         java_headers = Excel_Reader.make_java_headers self.headers
         java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
             case query of
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso
index f5d7c526f7..78f728af58 100644
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso
@@ -1,4 +1,5 @@
 from Standard.Base import all hiding First, Last
+from Standard.Base.Runtime import assert
 
 import project.Data.Aggregate_Column.Aggregate_Column
 import project.Data.Column.Column
@@ -56,7 +57,7 @@ prepare_aggregate_columns naming_helper aggregates table error_on_missing_column
        any missing columns will be reported as errors. Therefore, we can assume
        that all the columns were present.
     keys_problem_builder.attach_problems_before Problem_Behavior.Report_Error <|
-        # assert resolved_keys.find .is_nothing . is_nothing . not then
+        assert (resolved_keys.contains Nothing . not)
         problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
         valid_resolved_aggregate_columns = aggregates.map (resolve_aggregate table problem_builder) . filter x-> x.is_nothing.not
 
diff --git a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso
index 9ca17ce456..6ae391a24d 100644
--- a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso
+++ b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso
@@ -39,7 +39,7 @@ prepare_visualization y max_rows=1000 =
             make_json_for_table dataframe [index] all_rows_count
         _ : Database_Column -> prepare_visualization x.to_table max_rows
         _ : Database_Table ->
-            dataframe = x.read max_rows
+            dataframe = x.read max_rows warn_if_more_rows=False
             all_rows_count = x.row_count
             make_json_for_table dataframe [] all_rows_count
         _ : Function ->
diff --git a/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso
index 706c6d0395..fcd5a5c4e4 100644
--- a/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso
+++ b/test/Table_Tests/src/Common_Table_Operations/Core_Spec.enso
@@ -262,5 +262,115 @@ spec setup =
             table.rows . at 0 . at -4 . should_fail_with Index_Out_Of_Bounds
             table.rows . at 0 . at "unknown" . should_fail_with No_Such_Column
 
+
+    Test.group prefix+"Table.read" <|
+        t_big = table_builder [["X", (0.up_to 1500)]]
+        t_small = table_builder [["X", (0.up_to 10)]]
+
+        has_default_row_limit = setup.is_database
+
+        Test.specify "should have a row limit by default and warn about it" <|
+            t_big.row_count . should_equal 1500
+            t_small.row_count . should_equal 10
+
+            t1 = t_big.read
+            case has_default_row_limit of
+                True ->
+                    t1.row_count . should_equal 1000
+                    w1 = Problems.expect_only_warning Not_All_Rows_Downloaded t1
+                    w1.max_rows . should_equal 1000
+                False ->
+                    t1.row_count . should_equal 1500
+                    Problems.assume_no_problems t1
+
+            t2 = t_small.read
+            t2.row_count . should_equal 10
+            Problems.assume_no_problems t2
+
+        Test.specify "should allow to set the row limit" <|
+            t1 = t_big.read max_rows=23
+            t1.row_count . should_equal 23
+            w1 = Problems.expect_only_warning Not_All_Rows_Downloaded t1
+            w1.max_rows . should_equal 23
+            w1.to_display_text . should_contain "some rows have been dropped"
+
+            t2 = t_big.read max_rows=1500
+            t2.row_count . should_equal 1500
+            Problems.assume_no_problems t2
+
+            t3 = t_small.read max_rows=1
+            t3.row_count . should_equal 1
+            w3 = Problems.expect_only_warning Not_All_Rows_Downloaded t3
+            w3.max_rows . should_equal 1
+
+        Test.specify "should allow to have no row limit" <|
+            t1 = t_big.read max_rows=Nothing
+            t1.row_count . should_equal 1500
+            Problems.assume_no_problems t1
+
+        Test.specify "should allow to turn off the warning" <|
+            t1 = t_big.read warn_if_more_rows=False
+            t1.row_count . should_equal (if has_default_row_limit then 1000 else 1500)
+            Problems.assume_no_problems t1
+
+            t2 = t_big.read max_rows=123 warn_if_more_rows=False
+            t2.row_count . should_equal 123
+            Problems.assume_no_problems t2
+
+            t3 = t_big.read max_rows=12300 warn_if_more_rows=False
+            t3.row_count . should_equal 1500
+            Problems.assume_no_problems t3
+
+        Test.specify "should also work as Column.read" <|
+            c1 = t_big.at "X"
+            c1.length . should_equal 1500
+
+            r2 = c1.read
+            case has_default_row_limit of
+                True ->
+                    r2.length . should_equal 1000
+                    w2 = Problems.expect_only_warning Not_All_Rows_Downloaded r2
+                    w2.max_rows . should_equal 1000
+                False ->
+                    r2.length . should_equal 1500
+                    Problems.assume_no_problems r2
+
+            # to_vector always downloads the whole column, even if its large
+            c1.to_vector.length . should_equal 1500
+
+            r3 = c1.read max_rows=10
+            r3.length . should_equal 10
+            Problems.expect_only_warning Not_All_Rows_Downloaded r3
+
+            r4 = c1.read max_rows=Nothing
+            r4.length . should_equal 1500
+            Problems.assume_no_problems r4
+
+            r5 = c1.read max_rows=3 warn_if_more_rows=False
+            r5.length . should_equal 3
+            Problems.assume_no_problems r5
+
+        if setup.is_database then Test.specify "should allow similar API on Connection.read" <|
+            connection = setup.connection
+            connection.query t_big.name . row_count . should_equal 1500
+
+            t1 = connection.read t_big.name
+            t1.row_count . should_equal 1000
+            w1 = Problems.expect_only_warning Not_All_Rows_Downloaded t1
+            w1.max_rows . should_equal 1000
+
+            t2 = connection.read t_big.name limit=42
+            t2.row_count . should_equal 42
+            w2 = Problems.expect_only_warning Not_All_Rows_Downloaded t2
+            w2.max_rows . should_equal 42
+
+            t3 = connection.read t_big.name limit=Nothing
+            t3.row_count . should_equal 1500
+            Problems.assume_no_problems t3
+
+            t4 = connection.read t_big.name warn_if_more_rows=False
+            t4.row_count . should_equal 1000
+            Problems.assume_no_problems t4
+
 # A set of potentially problematic column names.
 weird_names = ['whitespace and \t others', 'foo "the bar" baz', "a 'X' c", "emoji? 😎 yep", "😊💡🎉🌻", "Polskie znaki - np. ąęćśó", 'acce\u0301nt a\u0301cuté', 'SELECT \'A\',"B" FROM t;--', '"', "'", '”', 'one " quote', 'double "" quote', 'even \nnewlines could go here', 'and\r\nthat\rtoo', 'foo ” bar', 'foo ”the” bar', 'x”; --']
diff --git a/test/Table_Tests/src/Database/Common/Common_Spec.enso b/test/Table_Tests/src/Database/Common/Common_Spec.enso
index 27d5beadf5..4d8a23f166 100644
--- a/test/Table_Tests/src/Database/Common/Common_Spec.enso
+++ b/test/Table_Tests/src/Database/Common/Common_Spec.enso
@@ -52,7 +52,7 @@ run_tests prefix connection upload =
             t2 = upload "T2" (Table.new [["d", [100, 200]]])
             t1.set (t2.at "d") . should_fail_with Integrity_Error
 
-    Test.group prefix+"Table.query" <|
+    Test.group prefix+"Connection.query" <|
         name = t1.name
         Test.specify "should allow to access a Table by name" <|
             t2 = connection.query (SQL_Query.Table_Name name)
diff --git a/test/Tests/src/Data/Vector_Spec.enso b/test/Tests/src/Data/Vector_Spec.enso
index 23cadd1e7c..66bf5ff55d 100644
--- a/test/Tests/src/Data/Vector_Spec.enso
+++ b/test/Tests/src/Data/Vector_Spec.enso
@@ -458,8 +458,6 @@ type_spec name alter = Test.group name <|
         vec.take . should_equal [1]
         vec.drop . should_equal [2, 3, 4, 5, 6]
 
-        IO.println 'AAA'
-        IO.println (Meta.get_simple_type_name vec)
         vec.take (2.up_to 4) . should_equal [3, 4]
         vec.take (0.up_to 0) . should_equal []
         vec.take (100.up_to 100) . should_fail_with Index_Out_Of_Bounds