Moving away from Integer | Nothing to Rows_To_Read for limiting number of rows. (#9925)

- Added a new `Rows_To_Read` type with conversions from `Nothing` and integers.
- Updated `read` on `Table`, `Column`, `DB_Table` and `DB_Column`.
- Updated `Delimited_Format.Delimited` to use `Rows_To_Read` for `row_limit`.
- Updated `Excel_Format.Sheet` and `Excel_Format.Range` to use `Rows_To_Read` for `row_limit`.
- Updated `Excel_Workbook.read` to use `Rows_To_Read`.
- Updated `Connection.read` (in all connection types) to use `Rows_To_Read`.

![image](https://github.com/enso-org/enso/assets/4699705/553c027f-f4c3-4855-9f51-2c4bcaec48a0)

![image](https://github.com/enso-org/enso/assets/4699705/a06c3912-77e0-4c10-abb8-73aed667458d)
This commit is contained in:
James Dunkerley 2024-05-14 17:31:26 +01:00 committed by GitHub
parent 557d585216
commit b2aeb9fc84
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 205 additions and 156 deletions

View File

@ -10,6 +10,7 @@ from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Single_Choice, Vector_Editor
import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper
import Standard.Table.Rows_To_Read.Rows_To_Read
from Standard.Table import Table, Value_Type
import project.Column_Description.Column_Description
@ -251,9 +252,6 @@ type Connection
If supplied as `Text`, the name is checked against the `tables` list to
determine if it is a table or a query.
- limit: the maximum number of rows to read.
If set to `Nothing`, all rows will be returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `limit`.
? Side Effects
@ -263,9 +261,10 @@ type Connection
`execute_update` for DML queries, or if they are supposed to return
results, the `read` should be wrapped in an execution context check.
@query make_table_name_selector
read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Table ! Table_Not_Found
read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
self.query query . read max_rows=limit warn_if_more_rows=warn_if_more_rows
@limit Rows_To_Read.default_widget
read : Text | SQL_Query -> Rows_To_Read -> Table ! Table_Not_Found
read self query (limit : Rows_To_Read = ..First_With_Warning 1000) =
self.query query . read max_rows=limit
## PRIVATE
Creates a new empty table in the database and returns a query referencing

View File

@ -11,6 +11,7 @@ import Standard.Table.Internal.Java_Problems
import Standard.Table.Internal.Problem_Builder.Problem_Builder
import Standard.Table.Internal.Value_Type_Helpers
import Standard.Table.Internal.Widget_Helpers
import Standard.Table.Rows_To_Read.Rows_To_Read
from Standard.Table import Auto, Column, Data_Formatter, Previous_Value, Sort_Column, Table, Value_Type
from Standard.Table.Column import default_date_period
from Standard.Table.Errors import Conversion_Failure, Floating_Point_Equality, Inexact_Type_Coercion, Invalid_Value_Type
@ -105,18 +106,16 @@ type DB_Column
Arguments:
- max_rows: specifies the maximum number of rows to read.
If `Nothing`, all available rows are returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `max_rows`.
read : (Nothing | Integer) -> Boolean -> Column
read self (max_rows : Nothing | Integer = 1000) (warn_if_more_rows:Boolean = True) =
self.to_table.read max_rows warn_if_more_rows . at 0
@max_rows Rows_To_Read.default_widget
read : Rows_To_Read -> Column
read self (max_rows : Rows_To_Read = ..First_With_Warning 1000) =
self.to_table.read max_rows . at 0
## GROUP Standard.Base.Conversions
ICON convert
Returns a vector containing all the elements in this column.
to_vector : Vector Any
to_vector self = self.read max_rows=Nothing . to_vector
to_vector self = self.read (..All_Rows) . to_vector
## GROUP Standard.Base.Selections
ICON select_row
@ -154,7 +153,7 @@ type DB_Column
example_at = Examples.integer_column.get 0 -1
get : Integer -> Any -> Any | Nothing
get self (index : Integer) (~default=Nothing) =
self.read index+1 . get index default
self.read (..First index+1) . get index default
## GROUP Standard.Base.Metadata
Returns the `Value_Type` associated with that column.

View File

@ -35,6 +35,7 @@ import Standard.Table.Internal.Value_Type_Helpers
import Standard.Table.Internal.Widget_Helpers
import Standard.Table.Match_Columns as Match_Columns_Helpers
import Standard.Table.Row.Row
import Standard.Table.Rows_To_Read.Rows_To_Read
from Standard.Table import Aggregate_Column, Auto, Blank_Selector, Column_Ref, Data_Formatter, Join_Condition, Join_Kind, Match_Columns, Position, Previous_Value, Report_Unmatched, Set_Mode, Simple_Expression, Sort_Column, Table, Value_Type
from Standard.Table.Errors import all
from Standard.Table.Internal.Filter_Condition_Helpers import make_filter_column
@ -96,7 +97,7 @@ type DB_Table
- format_terminal: whether ANSI-terminal formatting should be used
display : Integer -> Boolean -> Text
display self show_rows=10 format_terminal=False =
data_fragment_with_warning = self.read max_rows=show_rows warn_if_more_rows=True
data_fragment_with_warning = self.read (..First_With_Warning show_rows)
has_more_rows = data_fragment_with_warning.has_warnings warning_type=Not_All_Rows_Downloaded
data_fragment_cleared = data_fragment_with_warning.remove_warnings Not_All_Rows_Downloaded
# `row_count` means another Database query is performed, so we only do it if we need to.
@ -1130,26 +1131,24 @@ type DB_Table
Arguments:
- max_rows: specifies the maximum number of rows to read.
If `Nothing`, all available rows are returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `max_rows`.
rows : Integer | Nothing -> Boolean -> Vector Row
rows self (max_rows : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
self.read max_rows=max_rows warn_if_more_rows=warn_if_more_rows . rows
@max_rows Rows_To_Read.default_widget
rows : Rows_To_Read -> Vector Row
rows self (max_rows : Rows_To_Read = (..First_With_Warning 1000)) =
self.read max_rows . rows
## GROUP Standard.Base.Selections
ICON select_row
Returns the first row of the table.
first_row : Row ! Index_Out_Of_Bounds
first_row self =
self.read max_rows=1 warn_if_more_rows=False . rows . first
self.read (..First 1) . rows . first
## GROUP Standard.Base.Selections
ICON select_row
Returns the second row of the table.
second_row : Row ! Index_Out_Of_Bounds
second_row self =
self.read max_rows=2 warn_if_more_rows=False . rows . second
self.read (..First 2) . rows . second
## GROUP Standard.Base.Selections
ICON select_row
@ -2547,40 +2546,36 @@ type DB_Table
Arguments:
- max_rows: specifies the maximum number of rows to read.
If `Nothing`, all available rows are returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `max_rows`.
read : (Integer | Nothing) -> Boolean -> Table
read self (max_rows : Integer | Nothing = 1000) (warn_if_more_rows:Boolean = True) =
preprocessed = if max_rows.is_nothing then self else
if warn_if_more_rows then self.limit max_rows+1 else self.limit max_rows
case preprocessed.internal_columns.is_empty of
True ->
Error.throw (Illegal_Argument.Error "Cannot create a table with no columns.")
False ->
sql = preprocessed.to_sql
column_type_suggestions = preprocessed.internal_columns.map .sql_type_reference
materialized_table = self.connection.read_statement sql column_type_suggestions . catch SQL_Error sql_error->
Error.throw (self.connection.dialect.get_error_mapper.transform_custom_errors sql_error)
@max_rows Rows_To_Read.default_widget
read : Rows_To_Read -> Table
read self (max_rows : Rows_To_Read = ..First_With_Warning 1000) =
if self.internal_columns.is_empty then Error.throw (Illegal_Argument.Error "Cannot create a table with no columns.") else
preprocessed = case max_rows of
Rows_To_Read.All_Rows -> self
Rows_To_Read.First n -> self.limit n
Rows_To_Read.First_With_Warning n -> self.limit n+1
warnings_builder = Vector.new_builder
expected_types = self.columns.map .value_type
actual_types = materialized_table.columns.map .value_type
expected_types.zip actual_types expected_type-> actual_type->
if expected_type == actual_type then Nothing else
expected_type_kind = Meta.meta expected_type . constructor
actual_type_kind = Meta.meta actual_type . constructor
## We ignore simple approximations that our in-memory backend does - things like adding default
timezone (because we do not have Date_Time without timezone in-memory),
or changing Float32 to Float64 are silently ignored.
However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported.
if expected_type_kind == actual_type_kind then Nothing else
warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type)
result = if max_rows.is_nothing || materialized_table.row_count <= max_rows then materialized_table else
assert warn_if_more_rows "We may get more rows than we asked for _only_ if warn_if_more_rows=True"
warnings_builder.append (Not_All_Rows_Downloaded.Warning max_rows)
materialized_table.take max_rows
Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector result
sql = preprocessed.to_sql
column_type_suggestions = preprocessed.internal_columns.map .sql_type_reference
materialized_table = self.connection.read_statement sql column_type_suggestions . catch SQL_Error sql_error->
Error.throw (self.connection.dialect.get_error_mapper.transform_custom_errors sql_error)
warnings_builder = Vector.new_builder
expected_types = self.columns.map .value_type
actual_types = materialized_table.columns.map .value_type
expected_types.zip actual_types expected_type-> actual_type->
if expected_type == actual_type then Nothing else
expected_type_kind = Meta.meta expected_type . constructor
actual_type_kind = Meta.meta actual_type . constructor
## We ignore simple approximations that our in-memory backend does - things like adding default
timezone (because we do not have Date_Time without timezone in-memory),
or changing Float32 to Float64 are silently ignored.
However, bigger changes, like a Binary type column getting coerced to Mixed - _will_ still be reported.
if expected_type_kind == actual_type_kind then Nothing else
warnings_builder.append (Inexact_Type_Coercion.Warning expected_type actual_type)
result = max_rows.attach_warning materialized_table
Problem_Behavior.Report_Warning.attach_problems_before warnings_builder.to_vector result
## PRIVATE
Creates a query corresponding to this table.

View File

@ -72,7 +72,7 @@ check_initial_invariants base_table lookup_table lookup_columns allow_unmatched_
check_for_null_keys lookup_table key_column_names <|
if allow_unmatched_rows then continuation else
unmatched_rows = base_table.join lookup_table on=key_column_names join_kind=Join_Kind.Left_Exclusive . select_columns key_column_names
unmatched_example = unmatched_rows.read max_rows=1 warn_if_more_rows=False
unmatched_example = unmatched_rows.read (..First 1)
if unmatched_example.row_count == 0 then continuation else
first_row = unmatched_example.rows.first
Error.throw (Unmatched_Rows_In_Lookup.Error first_row.to_vector)
@ -197,7 +197,7 @@ precheck_for_duplicate_matches lookup_columns subquery_setup connection new_ctx
Lookup_Column.Key_Column _ _ -> [subquery_setup.get_self_column ix]
_ -> []
table_for_duplicate_check = DB_Table.Value subquery_setup.new_table_name connection [subquery_setup.lookup_counter]+key_columns_for_duplicate_check new_ctx
duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read max_rows=1 warn_if_more_rows=False
duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read (..First 1)
case duplicate_lookup_matches.row_count > 0 of
True ->
first_example_row = duplicate_lookup_matches.read.rows.first.to_vector

View File

@ -7,6 +7,7 @@ from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Single_Choice
from Standard.Table import Table, Value_Type
import Standard.Table.Rows_To_Read.Rows_To_Read
import project.Column_Description.Column_Description
import project.Connection.Connection.Connection
@ -167,9 +168,6 @@ type Postgres_Connection
If supplied as `Text`, the name is checked against the `tables` list to
determine if it is a table or a query.
- limit: the maximum number of rows to read.
If set to `Nothing`, all rows will be returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `limit`.
? Side Effects
@ -179,9 +177,10 @@ type Postgres_Connection
`execute_update` for DML queries, or if they are supposed to return
results, the `read` should be wrapped in an execution context check.
@query make_table_name_selector
read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Table ! Table_Not_Found
read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
self.connection.read query limit warn_if_more_rows
@limit Rows_To_Read.default_widget
read : Text | SQL_Query -> Rows_To_Read -> Table ! Table_Not_Found
read self query (limit : Rows_To_Read = ..First_With_Warning 1000) =
self.connection.read query limit
## GROUP Standard.Base.Output
ICON data_output

View File

@ -5,6 +5,7 @@ from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Single_Choice
from Standard.Table import Table, Value_Type
import Standard.Table.Rows_To_Read.Rows_To_Read
import project.Column_Description.Column_Description
import project.Connection.Connection.Connection
@ -154,9 +155,6 @@ type SQLite_Connection
If supplied as `Text`, the name is checked against the `tables` list to
determine if it is a table or a query.
- limit: the maximum number of rows to read.
If set to `Nothing`, all rows will be returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `limit`.
? Side Effects
@ -166,9 +164,10 @@ type SQLite_Connection
`execute_update` for DML queries, or if they are supposed to return
results, the `read` should be wrapped in an execution context check.
@query make_table_name_selector
read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Table ! Table_Not_Found
read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
self.connection.read query limit warn_if_more_rows
@limit Rows_To_Read.default_widget
read : Text | SQL_Query -> Rows_To_Read -> Table ! Table_Not_Found
read self query (limit : Rows_To_Read = ..First_With_Warning 1000) =
self.connection.read query limit
## GROUP Standard.Base.Output
ICON data_output

View File

@ -228,7 +228,7 @@ type Non_Unique_Key_Recipe
raise_duplicated_primary_key_error source_table primary_key original_panic =
agg = source_table.aggregate primary_key [Aggregate_Column.Count]
filtered = agg.filter column=-1 (Filter_Condition.Greater than=1)
materialized = filtered.read max_rows=1 warn_if_more_rows=False
materialized = filtered.read (..First 1)
case materialized.row_count == 0 of
## If we couldn't find a duplicated key, we give up the translation and
rethrow the original panic containing the SQL error. This could
@ -619,7 +619,7 @@ check_multiple_rows_match left_table right_table key_columns ~continuation =
joined = left_table.join right_table on=key_columns join_kind=Join_Kind.Inner
counted = joined.aggregate key_columns [Aggregate_Column.Count]
duplicates = counted.filter -1 (Filter_Condition.Greater than=1)
example = duplicates.read max_rows=1 warn_if_more_rows=False
example = duplicates.read (..First 1)
case example.row_count == 0 of
True -> continuation
False ->
@ -633,7 +633,7 @@ check_for_null_keys table key_columns ~continuation =
keys = table.select_columns key_columns
is_any_key_blank = keys.columns.map (_.is_nothing) . reduce (||)
null_keys = table.filter is_any_key_blank Filter_Condition.Is_True
example = null_keys.read max_rows=1 warn_if_more_rows=False
example = null_keys.read (..First 1)
case example.row_count == 0 of
True -> continuation
False ->

View File

@ -6,6 +6,7 @@ from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Single_Choice
from Standard.Table import Table
import Standard.Table.Rows_To_Read.Rows_To_Read
import Standard.Database.Column_Description.Column_Description
import Standard.Database.Connection.Connection.Connection
@ -185,9 +186,6 @@ type Snowflake_Connection
If supplied as `Text`, the name is checked against the `tables` list to
determine if it is a table or a query.
- limit: the maximum number of rows to read.
If set to `Nothing`, all rows will be returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `limit`.
? Side Effects
@ -197,9 +195,10 @@ type Snowflake_Connection
`execute_update` for DML queries, or if they are supposed to return
results, the `read` should be wrapped in an execution context check.
@query make_table_name_selector
read : Text | SQL_Query -> Integer | Nothing -> Boolean -> Table ! Table_Not_Found
read self query (limit : Integer | Nothing = 1000) (warn_if_more_rows : Boolean = True) =
self.connection.read query limit warn_if_more_rows
@limit Rows_To_Read.default_widget
read : Text | SQL_Query -> Rows_To_Read -> Table ! Table_Not_Found
read self query (limit : Rows_To_Read = ..First_With_Warning 1000) =
self.connection.read query limit
## GROUP Standard.Base.Output
ICON data_output

View File

@ -24,6 +24,7 @@ import project.Internal.Parse_Values_Helper
import project.Internal.Storage
import project.Internal.Value_Type_Helpers
import project.Internal.Widget_Helpers
import project.Rows_To_Read.Rows_To_Read
import project.Table.Table
import project.Value_Type.Auto
import project.Value_Type.Value_Type
@ -2140,13 +2141,11 @@ type Column
Arguments:
- max_rows: specifies the maximum number of rows to read.
If `Nothing`, all available rows are returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `max_rows`.
read : (Nothing | Integer) -> Boolean -> Column
read self (max_rows : Nothing | Integer = Nothing) (warn_if_more_rows:Boolean = True) =
@max_rows Rows_To_Read.default_widget
read : Rows_To_Read -> Column
read self (max_rows : Rows_To_Read = ..All_Rows) =
if max_rows.is_nothing then self else
self.to_table.read max_rows warn_if_more_rows . at 0
self.to_table.read max_rows . at 0
## GROUP Standard.Base.Conversions
ICON convert

View File

@ -14,6 +14,7 @@ import project.Headers.Headers
import project.Internal.Delimited_Reader
import project.Internal.Delimited_Writer
import project.Match_Columns.Match_Columns
import project.Rows_To_Read.Rows_To_Read
import project.Table.Table
## Read delimited files such as CSVs into a Table.
@ -57,7 +58,8 @@ type Delimited_Format
defaults to `Nothing` which means that comments are disabled.
@delimiter make_file_read_delimiter_selector
@encoding Encoding.default_widget
Delimited (delimiter:Text=',') (encoding:Encoding=Encoding.utf_8) (skip_rows:Integer=0) (row_limit:Integer|Nothing=Nothing) (quote_style:Quote_Style=Quote_Style.With_Quotes) (headers:Headers=Headers.Detect_Headers) (value_formatter:Data_Formatter|Nothing=Data_Formatter.Value) (keep_invalid_rows:Boolean=True) (line_endings:Line_Ending_Style|Infer=Infer) (comment_character:Text|Nothing=Nothing)
@row_limit Rows_To_Read.default_widget
Delimited (delimiter:Text=',') (encoding:Encoding=Encoding.utf_8) (skip_rows:Integer=0) (row_limit:Rows_To_Read=..All_Rows) (quote_style:Quote_Style=Quote_Style.With_Quotes) (headers:Headers=Headers.Detect_Headers) (value_formatter:Data_Formatter|Nothing=Data_Formatter.Value) (keep_invalid_rows:Boolean=True) (line_endings:Line_Ending_Style|Infer=Infer) (comment_character:Text|Nothing=Nothing)
## PRIVATE
ADVANCED

View File

@ -15,6 +15,7 @@ import project.Internal.Excel_Reader
import project.Internal.Excel_Section.Excel_Section
import project.Internal.Excel_Writer
import project.Match_Columns.Match_Columns
import project.Rows_To_Read.Rows_To_Read
import project.Table.Table
## PRIVATE
@ -49,14 +50,14 @@ type Excel_Format
present. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- skip_rows: The number of rows to skip before reading the data.
- row_limit: The maximum number of rows to read. If set to `Nothing`, all
rows are read.
- row_limit: The maximum number of rows to read.
- xls_format:
If set to `True`, the file is read as an Excel 95-2003 format.
If set to `False`, the file is read as an Excel 2007+ format.
`Infer` will attempt to deduce this from the extension of the filename.
@sheet (Text_Input display=Display.Always)
Sheet (sheet:(Integer|Text)=1) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing) (xls_format:Boolean|Infer=Infer)
@row_limit Rows_To_Read.default_widget
Sheet (sheet:(Integer|Text)=1) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:Rows_To_Read=..All_Rows) (xls_format:Boolean|Infer=Infer)
## Reads a range from an Excel file as a `Table`.
@ -67,14 +68,14 @@ type Excel_Format
present. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- skip_rows: The number of rows to skip before reading the data.
- row_limit: The maximum number of rows to read. If set to `Nothing`, all
rows are read.
- row_limit: The maximum number of rows to read.
- xls_format:
If set to `True`, the file is read as an Excel 95-2003 format.
If set to `False`, the file is read as an Excel 2007+ format.
`Infer` will attempt to deduce this from the extension of the filename.
@address Text_Input
Range (address:(Text|Excel_Range)) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing) (xls_format : Boolean | Infer = Infer)
@row_limit Rows_To_Read.default_widget
Range (address:(Text|Excel_Range)) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:Rows_To_Read=..All_Rows) (xls_format : Boolean | Infer = Infer)
## PRIVATE
ADVANCED

View File

@ -19,6 +19,7 @@ import project.Internal.Excel_Section.Excel_Section
import project.Internal.Java_Problems
import project.Internal.Problem_Builder.Problem_Builder
import project.Match_Columns.Match_Columns
import project.Rows_To_Read.Rows_To_Read
import project.Table.Table
from project.Errors import Empty_Sheet
from project.Internal.Excel_Reader import handle_invalid_location
@ -206,14 +207,16 @@ type Excel_Workbook
- limit: the maximum number of rows to read.
If set to `Nothing`, all rows will be returned.
@query (self-> Single_Choice display=Display.Always values=(self.tables.at "Name" . to_vector . map t-> Option t t.pretty))
read : Text | Excel_Range -> Headers -> Integer | Nothing -> Problem_Behavior -> Table
read self (query : Text | Excel_Range) (headers : Headers = Headers.Detect_Headers) (limit : Integer | Nothing = Nothing) (on_problems:Problem_Behavior=..Report_Warning) =
@limit Rows_To_Read.default_widget
read : Text | Excel_Range -> Headers -> Rows_To_Read -> Problem_Behavior -> Table
read self (query : Text | Excel_Range) (headers : Headers = Headers.Detect_Headers) (limit : Rows_To_Read = ..All_Rows) (on_problems:Problem_Behavior=..Report_Warning) =
java_headers = Excel_Reader.make_java_headers headers
java_limit = limit.rows_to_read
java_table = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
self.with_java_workbook java_workbook-> case query of
_ : Excel_Range -> ExcelReader.readRange java_workbook query.java_range java_headers 0 limit java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_workbook query java_headers 0 limit java_problem_aggregator
Table.Value java_table
_ : Excel_Range -> ExcelReader.readRange java_workbook query.java_range java_headers 0 java_limit java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_workbook query java_headers 0 java_limit java_problem_aggregator
limit.attach_warning (Table.Value java_table)
## GROUP Standard.Base.Input
ICON data_input
@ -226,6 +229,7 @@ type Excel_Workbook
case section of
Excel_Section.Worksheet sheet headers skip_rows row_limit ->
java_headers = Excel_Reader.make_java_headers headers
java_limit = row_limit.rows_to_read
names = self.sheet_names
sheet_name = case sheet of
_ : Text -> if names.contains sheet then sheet else Error.throw (Illegal_Argument.Error "Worksheet not found.")
@ -235,15 +239,16 @@ type Excel_Workbook
_ -> Error.throw (Illegal_Argument.Error "Worksheet must be either Text or an Integer.")
java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
self.with_java_workbook java_workbook->
ExcelReader.readRangeByName java_workbook sheet_name java_headers skip_rows row_limit java_problem_aggregator
Table.Value java_table
ExcelReader.readRangeByName java_workbook sheet_name java_headers skip_rows java_limit java_problem_aggregator
row_limit.attach_warning (Table.Value java_table)
Excel_Section.Cell_Range address headers skip_rows row_limit ->
java_headers = Excel_Reader.make_java_headers headers
java_limit = row_limit.rows_to_read
java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
self.with_java_workbook java_workbook-> case address of
_ : Excel_Range -> ExcelReader.readRange java_workbook address.java_range java_headers skip_rows row_limit java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_workbook address java_headers skip_rows row_limit java_problem_aggregator
Table.Value java_table
_ : Excel_Range -> ExcelReader.readRange java_workbook address.java_range java_headers skip_rows java_limit java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_workbook address java_headers skip_rows java_limit java_problem_aggregator
row_limit.attach_warning (Table.Value java_table)
## ALIAS get, worksheet
GROUP Standard.Base.Input

View File

@ -9,6 +9,7 @@ import project.Delimited.Delimited_Format.Delimited_Format
import project.Delimited.Quote_Style.Quote_Style
import project.Headers.Headers
import project.Internal.Java_Problems
import project.Rows_To_Read.Rows_To_Read
import project.Table.Table
from project.Errors import Empty_File_Error, Mismatched_Quote, Parser_Error
@ -96,19 +97,15 @@ read_from_reader format java_reader on_problems max_columns=4096 =
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
reader = prepare_reader format max_columns on_problems java_problem_aggregator
java_table = reader.read java_reader
Table.Value java_table
format.row_limit.attach_warning (Table.Value java_table)
## PRIVATE
prepare_reader format max_columns on_problems java_problem_aggregator newline_override=Nothing =
prepare_reader format:Delimited_Format max_columns on_problems java_problem_aggregator newline_override=Nothing =
java_headers = case format.headers of
Headers.Has_Headers -> DelimitedReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Headers.Detect_Headers -> DelimitedReader.HeaderBehavior.INFER
Headers.No_Headers -> DelimitedReader.HeaderBehavior.GENERATE_HEADERS
row_limit = case format.row_limit of
Nothing -> -1
_ : Integer -> format.row_limit
_ ->
Error.throw (Illegal_Argument.Error "`row_limit` should be Integer or Nothing.")
row_limit = format.row_limit.rows_to_read.if_nothing -1
quote_characters = case format.quote_style of
Quote_Style.No_Quotes -> Pair.new Nothing Nothing
Quote_Style.With_Quotes _ quote quote_escape -> Pair.new quote quote_escape

View File

@ -56,16 +56,18 @@ read_file file section on_problems xls_format=False =
reader java_file = case section of
Excel_Section.Worksheet sheet headers skip_rows row_limit ->
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
java_limit = row_limit.rows_to_read
java_table = case sheet of
_ : Integer -> ExcelReader.readSheetByIndex java_file sheet (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
_ : Text -> ExcelReader.readSheetByName java_file sheet (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
Table.Value java_table
_ : Integer -> ExcelReader.readSheetByIndex java_file sheet (make_java_headers headers) skip_rows java_limit file_format java_problem_aggregator
_ : Text -> ExcelReader.readSheetByName java_file sheet (make_java_headers headers) skip_rows java_limit file_format java_problem_aggregator
row_limit.attach_warning (Table.Value java_table)
Excel_Section.Cell_Range address headers skip_rows row_limit ->
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
java_limit = row_limit.rows_to_read
java_table = case address of
_ : Excel_Range -> ExcelReader.readRange java_file address.java_range (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_file address (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
Table.Value java_table
_ : Excel_Range -> ExcelReader.readRange java_file address.java_range (make_java_headers headers) skip_rows java_limit file_format java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_file address (make_java_headers headers) skip_rows java_limit file_format java_problem_aggregator
row_limit.attach_warning (Table.Value java_table)
handle_reader file reader

View File

@ -4,6 +4,7 @@ from Standard.Base import all
import project.Excel.Excel_Range.Excel_Range
import project.Headers.Headers
import project.Rows_To_Read.Rows_To_Read
type Excel_Section
## Gets the data from a specific sheet.
@ -15,9 +16,9 @@ type Excel_Section
present. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- skip_rows: The number of rows to skip before reading the data.
- row_limit: The maximum number of rows to read. If set to `Nothing`, all
rows are read.
Worksheet (sheet:(Integer|Text)=1) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
- row_limit: The maximum number of rows to read.
@row_limit Rows_To_Read.default.widget
Worksheet (sheet:(Integer|Text)=1) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:Rows_To_Read=..All_Rows)
## Gets a specific range (taking either a defined name or external style
address) from the workbook.
@ -31,6 +32,6 @@ type Excel_Section
present. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- skip_rows: The number of rows to skip before reading the data.
- row_limit: The maximum number of rows to read. If set to `Nothing`, all
rows are read.
Cell_Range (address:(Text|Excel_Range)) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
- row_limit: The maximum number of rows to read.
@row_limit Rows_To_Read.default.widget
Cell_Range (address:(Text|Excel_Range)) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:Rows_To_Read=..All_Rows)

View File

@ -124,15 +124,17 @@ prepare_file_modification_strategy table section on_existing_file match_columns
case section of
Excel_Section.Worksheet sheet headers skip_rows row_limit ->
java_headers = Excel_Reader.make_java_headers headers
java_limit = row_limit.rows_to_write
workbook_to_modify->
ExcelWriter.writeTableToSheet workbook_to_modify sheet existing_data_mode skip_rows table.java_table row_limit java_headers
ExcelWriter.writeTableToSheet workbook_to_modify sheet existing_data_mode skip_rows table.java_table java_limit java_headers
Excel_Section.Cell_Range address headers skip_rows row_limit ->
java_headers = Excel_Reader.make_java_headers headers
java_limit = row_limit.rows_to_write
java_range = case address of
Excel_Range.Value java_range -> java_range
text : Text -> text
workbook_to_modify->
ExcelWriter.writeTableToRange workbook_to_modify java_range existing_data_mode skip_rows table.java_table row_limit java_headers
ExcelWriter.writeTableToRange workbook_to_modify java_range existing_data_mode skip_rows table.java_table java_limit java_headers
## PRIVATE
Handle and map the Java errors when writing an Excel file

View File

@ -0,0 +1,53 @@
from Standard.Base import all
from Standard.Base.Metadata import make_single_choice, Widget
import project.Table.Table
import project.Errors.Not_All_Rows_Downloaded
## PRIVATE
type Rows_To_Read
## Read all rows.
All_Rows
## Read the first specified number of rows.
First rows:Integer=1000
## Read a limited number of rows, attaching warning if there is more.
By default, a 1000 rows are read.
This is the default for database reading.
First_With_Warning rows:Integer=1000
## PRIVATE
default_widget -> Widget =
make_single_choice [["All_Rows", "..All_Rows"], ["First", "..First"]]
## PRIVATE
Gets the number of rows to read (or Nothing if all rows).
For the First_With_Warning case, the number of rows is one more than maximum rows.
rows_to_read self -> Integer | Nothing = case self of
Rows_To_Read.All_Rows -> Nothing
Rows_To_Read.First rows -> rows
Rows_To_Read.First_With_Warning rows -> rows+1
## PRIVATE
Gets the number of rows to write (or Nothing if all rows).
rows_to_write self -> Integer | Nothing = case self of
Rows_To_Read.All_Rows -> Nothing
Rows_To_Read.First rows -> rows
Rows_To_Read.First_With_Warning rows -> rows
## PRIVATE
attach_warning self input:Table -> Table = case self of
Rows_To_Read.First_With_Warning rows -> if input.row_count <= rows then input else
Problem_Behavior.Report_Warning.attach_problem_after (input.take (First rows)) <|
Not_All_Rows_Downloaded.Warning rows
_ -> input
## PRIVATE
Rows_To_Read.from (that:Nothing) =
_ = that
Rows_To_Read.All_Rows
## PRIVATE
Rows_To_Read.from (that:Integer) =
Rows_To_Read.First that

View File

@ -58,6 +58,7 @@ import project.Match_Columns.Match_Columns
import project.Position.Position
import project.Prefix_Name.Prefix_Name
import project.Row.Row
import project.Rows_To_Read.Rows_To_Read
import project.Set_Mode.Set_Mode
import project.Simple_Expression.Simple_Expression
import project.Sort_Column.Sort_Column
@ -1832,12 +1833,10 @@ type Table
Arguments:
- max_rows: specifies the maximum number of rows to read.
If `Nothing`, all available rows are returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `max_rows`.
rows : Integer | Nothing -> Boolean -> Vector Row
rows self (max_rows : Integer | Nothing = Nothing) (warn_if_more_rows : Boolean = True) =
proxy = Rows_View.Value (self.read max_rows warn_if_more_rows)
@max_rows Rows_To_Read.default_widget
rows : Rows_To_Read -> Vector Row
rows self (max_rows : Rows_To_Read = ..All_Rows) =
proxy = Rows_View.Value (self.read max_rows)
Vector.from_polyglot_array (Array_Proxy.from_proxy_object proxy)
## GROUP Standard.Base.Selections
@ -2371,18 +2370,16 @@ type Table
Arguments:
- max_rows: specifies the maximum number of rows to read.
If `Nothing`, all available rows are returned.
- warn_if_more_rows: if set to `True`, a warning is attached to the
result if the number of rows returned by the query exceeds `max_rows`.
read : (Integer | Nothing) -> Boolean -> Table
read self (max_rows : Integer | Nothing = Nothing) (warn_if_more_rows:Boolean = True) = case max_rows of
Nothing -> self
_ : Integer ->
truncated = self.take (First max_rows)
needs_warning = warn_if_more_rows && self.row_count > max_rows
if needs_warning.not then truncated else
@max_rows Rows_To_Read.default_widget
read : Rows_To_Read -> Table
read self (max_rows : Rows_To_Read = ..All_Rows) = case max_rows of
Rows_To_Read.All_Rows -> self
Rows_To_Read.First n -> self.take (First n)
Rows_To_Read.First_With_Warning n ->
truncated = self.take (First n)
if self.row_count <= n then truncated else
Problem_Behavior.Report_Warning.attach_problem_after truncated <|
Not_All_Rows_Downloaded.Warning max_rows
Not_All_Rows_Downloaded.Warning n
## ALIAS metadata, field info, column types
GROUP Standard.Base.Metadata

View File

@ -38,7 +38,7 @@ prepare_visualization y max_rows=1000 =
make_json_for_table dataframe [index] all_rows_count
_ : DB_Column -> prepare_visualization x.to_table max_rows
_ : DB_Table ->
dataframe = x.read max_rows warn_if_more_rows=False
dataframe = x.read (..First max_rows)
all_rows_count = x.row_count
make_json_for_table dataframe [] all_rows_count
_ : Function ->
@ -196,7 +196,7 @@ make_json_for_value val level=0 = case val of
"Row{" + (prepared.join ", ") + "}"
_ : DB_Column ->
if level != 0 then "Column{" +val.name + ": " + val.row_count + " rows}" else
materialise = val.read 5 warn_if_more_rows=False
materialise = val.read (..First 5)
truncated = materialise . map k-> k.to_text + ": " + (make_json_for_value (val.get k) level+1).to_text
prepared = if val.length > 5 then truncated + ["… " + (val.length - 5).to_text+ " items"] else truncated
"Column{" + val.name + ": " + prepared + "}"

View File

@ -369,36 +369,36 @@ add_specs suite_builder setup =
Problems.assume_no_problems t2
group_builder.specify "should allow to set the row limit" <|
t1 = data.t_big.read max_rows=23
t1 = data.t_big.read (..First_With_Warning 23)
t1.row_count . should_equal 23
w1 = Problems.expect_only_warning Not_All_Rows_Downloaded t1
w1.max_rows . should_equal 23
w1.to_display_text . should_contain "some rows have been dropped"
t2 = data.t_big.read max_rows=1500
t2 = data.t_big.read (..First_With_Warning 1500)
t2.row_count . should_equal 1500
Problems.assume_no_problems t2
t3 = data.t_small.read max_rows=1
t3 = data.t_small.read (..First_With_Warning 1)
t3.row_count . should_equal 1
w3 = Problems.expect_only_warning Not_All_Rows_Downloaded t3
w3.max_rows . should_equal 1
group_builder.specify "should allow to have no row limit" <|
t1 = data.t_big.read max_rows=Nothing
t1 = data.t_big.read (..All_Rows)
t1.row_count . should_equal 1500
Problems.assume_no_problems t1
group_builder.specify "should allow to turn off the warning" <|
t1 = data.t_big.read warn_if_more_rows=False
t1.row_count . should_equal (if has_default_row_limit then 1000 else 1500)
t1 = data.t_big.read (..First 1000)
t1.row_count . should_equal 1000
Problems.assume_no_problems t1
t2 = data.t_big.read max_rows=123 warn_if_more_rows=False
t2 = data.t_big.read (..First 123)
t2.row_count . should_equal 123
Problems.assume_no_problems t2
t3 = data.t_big.read max_rows=12300 warn_if_more_rows=False
t3 = data.t_big.read (..First 12300)
t3.row_count . should_equal 1500
Problems.assume_no_problems t3
@ -419,15 +419,15 @@ add_specs suite_builder setup =
# to_vector always downloads the whole column, even if its large
c1.to_vector.length . should_equal 1500
r3 = c1.read max_rows=10
r3 = c1.read (..First_With_Warning 10)
r3.length . should_equal 10
Problems.expect_only_warning Not_All_Rows_Downloaded r3
r4 = c1.read max_rows=Nothing
r4 = c1.read (..All_Rows)
r4.length . should_equal 1500
Problems.assume_no_problems r4
r5 = c1.read max_rows=3 warn_if_more_rows=False
r5 = c1.read (..First 3)
r5.length . should_equal 3
Problems.assume_no_problems r5
@ -439,16 +439,16 @@ add_specs suite_builder setup =
w1 = Problems.expect_only_warning Not_All_Rows_Downloaded t1
w1.max_rows . should_equal 1000
t2 = data.connection.read data.t_big.name limit=42
t2 = data.connection.read data.t_big.name (..First_With_Warning 42)
t2.row_count . should_equal 42
w2 = Problems.expect_only_warning Not_All_Rows_Downloaded t2
w2.max_rows . should_equal 42
t3 = data.connection.read data.t_big.name limit=Nothing
t3 = data.connection.read data.t_big.name (..All_Rows)
t3.row_count . should_equal 1500
Problems.assume_no_problems t3
t4 = data.connection.read data.t_big.name warn_if_more_rows=False
t4 = data.connection.read data.t_big.name (..First 1000)
t4.row_count . should_equal 1000
Problems.assume_no_problems t4