Excel_Workbook.read_many (#9759)

- Some minor linting fixes.
- Adjust `headers` parameter so a dedicated type.
![image](https://github.com/enso-org/enso/assets/4699705/989f464d-df95-410e-a03b-36661f1c4a37)
- Fix bug with `read` on an `Excel_Workbook` so error handled more gracefully and not panicking to UI.
![image](https://github.com/enso-org/enso/assets/4699705/23b4575f-daad-4719-a5cc-30d064bd7f7a)
- Fix bug when writing to a file with an `Excel_Format` with an invalid extension which was causing a panic.
![image](https://github.com/enso-org/enso/assets/4699705/dc0e055c-c1b6-482f-b129-eb69f6554d72)
- Add `read_many` to `Excel_Workbook` allowing reading more than one sheet at a time.
This commit is contained in:
James Dunkerley 2024-04-24 14:16:44 +01:00 committed by GitHub
parent 717f6bb330
commit fb9cf38914
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 262 additions and 105 deletions

View File

@ -655,6 +655,8 @@
- [Added `recursive` option to `File.delete`.][9719]
- [Added `Vector.build`.][9725]
- [Added `Table.running` method][9577]
- [Added `Excel_Workbook.read_many` allowing reading more than one sheet at a
time.][9759]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -959,6 +961,7 @@
[9716]: https://github.com/enso-org/enso/pull/9716
[9719]: https://github.com/enso-org/enso/pull/9719
[9725]: https://github.com/enso-org/enso/pull/9725
[9759]: https://github.com/enso-org/enso/pull/9759
[9577]: https://github.com/enso-org/enso/pull/9577
#### Enso Compiler

View File

@ -76,19 +76,19 @@ type S3_File
content_length = translate_file_errors self <| S3.raw_head self.s3_path.bucket self.s3_path.key self.credentials . contentLength
if content_length.is_nothing then Error.throw (S3_Error.Error "ContentLength header is missing." self.uri) else content_length
## ICON folder_add
GROUP Output
## GROUP Output
ICON folder_add
Creates the directory represented by this file if it did not exist.
It also creates parent directories if they did not exist.
? S3 Handling of Directories
S3 does not have a native concept of directories.
TODO: Add more information about how S3 handles directories.
https://github.com/enso-org/enso/issues/9704
S3 does not have a native concept of directories.
create_directory : File
create_directory self =
## TODO Add more information about how S3 handles directories.
https://github.com/enso-org/enso/issues/9704
Unimplemented.throw "Creating S3 folders is currently not implemented."
## PRIVATE

View File

@ -40,10 +40,10 @@ import project.System.Output_Stream.Output_Stream
from project.Data.Boolean import Boolean, False, True
from project.Data.Index_Sub_Range.Index_Sub_Range import Last
from project.Data.Text.Extensions import all
from project.Enso_Cloud.Public_Utils import get_required_field
from project.Enso_Cloud.Internal.Enso_File_Helpers import all
from project.System.File_Format import Auto_Detect, Bytes, File_Format, Plain_Text_Format
from project.Enso_Cloud.Public_Utils import get_required_field
from project.System.File import find_extension_from_name
from project.System.File_Format import Auto_Detect, Bytes, File_Format, Plain_Text_Format
from project.System.File.Generic.File_Write_Strategy import generic_copy
type Enso_File
@ -310,8 +310,8 @@ type Enso_File
Asset_Cache.update file asset
file
## ICON folder_add
GROUP Output
## GROUP Output
ICON folder_add
Creates the directory represented by this file if it did not exist.
It also creates parent directories if they did not exist.

View File

@ -4,8 +4,8 @@ import project.Data.Json.JS_Object
import project.Enso_Cloud.Enso_File.Enso_Asset_Type
import project.Enso_Cloud.Enso_File.Enso_File
import project.Enso_Cloud.Errors.Enso_Cloud_Error
import project.Enso_Cloud.Internal.Existing_Enso_Asset.Existing_Enso_Asset
import project.Enso_Cloud.Internal.Existing_Enso_Asset.Asset_Cache
import project.Enso_Cloud.Internal.Existing_Enso_Asset.Existing_Enso_Asset
import project.Enso_Cloud.Internal.Utils
import project.Error.Error
import project.Errors.File_Error.File_Error

View File

@ -495,8 +495,8 @@ type File
is_directory : Boolean
is_directory self = @Builtin_Method "File.is_directory"
## ICON folder_add
GROUP Output
## GROUP Output
ICON folder_add
Creates the directory represented by this file if it did not exist.
It also creates parent directories if they did not exist.

View File

@ -10,6 +10,7 @@ from Standard.Base.Widget_Helpers import make_file_read_delimiter_selector
import project.Data_Formatter.Data_Formatter
import project.Delimited.Quote_Style.Quote_Style
import project.Headers.Headers
import project.Internal.Delimited_Reader
import project.Internal.Delimited_Writer
import project.Match_Columns.Match_Columns
@ -35,12 +36,10 @@ type Delimited_Format
does not include the header row (if applicable).
- quote_style: Specifies the style of quotes used for reading and
writing.
- headers: If set to `True`, the first row is used as column names. If
set to `False`, the column names are generated by adding increasing
numeric suffixes to the base name `Column` (i.e. `Column_1`,
`Column_2` etc.). If set to `Infer`, the process tries to infer if
headers are present on the first row. If the column names are not
unique, numeric suffixes will be appended to disambiguate them.
- headers: Specifies if the first row contains the column names. If set
to `Detect_Headers`, the process tries to infer if headers are
present. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- value_formatter: Formatter to parse text values into numbers, dates,
times, etc. If `Nothing` values are left as Text.
- keep_invalid_rows: Specifies whether rows that contain less or more
@ -58,7 +57,7 @@ type Delimited_Format
defaults to `Nothing` which means that comments are disabled.
@delimiter make_file_read_delimiter_selector
@encoding Encoding.default_widget
Delimited (delimiter:Text=',') (encoding:Encoding=Encoding.utf_8) (skip_rows:Integer=0) (row_limit:Integer|Nothing=Nothing) (quote_style:Quote_Style=Quote_Style.With_Quotes) (headers:Boolean|Infer=Infer) (value_formatter:Data_Formatter|Nothing=Data_Formatter.Value) (keep_invalid_rows:Boolean=True) (line_endings:Line_Ending_Style|Infer=Infer) (comment_character:Text|Nothing=Nothing)
Delimited (delimiter:Text=',') (encoding:Encoding=Encoding.utf_8) (skip_rows:Integer=0) (row_limit:Integer|Nothing=Nothing) (quote_style:Quote_Style=Quote_Style.With_Quotes) (headers:Headers=Headers.Detect_Headers) (value_formatter:Data_Formatter|Nothing=Data_Formatter.Value) (keep_invalid_rows:Boolean=True) (line_endings:Line_Ending_Style|Infer=Infer) (comment_character:Text|Nothing=Nothing)
## PRIVATE
ADVANCED
@ -112,8 +111,8 @@ type Delimited_Format
## PRIVATE
Clone the instance with some properties overridden.
Note: This function is internal until such time as Atom cloning with modification is built into Enso.
clone : Quote_Style -> (Boolean|Infer) -> (Data_Formatter|Nothing) -> Boolean -> (Text|Nothing) -> (Text|Nothing) -> Delimited_Format
clone self (quote_style=self.quote_style) (headers=self.headers) (value_formatter=self.value_formatter) (keep_invalid_rows=self.keep_invalid_rows) (line_endings=self.line_endings) (comment_character=self.comment_character) =
clone : Quote_Style -> Headers -> (Data_Formatter|Nothing) -> Boolean -> (Text|Nothing) -> (Text|Nothing) -> Delimited_Format
clone self (quote_style=self.quote_style) (headers:Headers=self.headers) (value_formatter=self.value_formatter) (keep_invalid_rows=self.keep_invalid_rows) (line_endings=self.line_endings) (comment_character=self.comment_character) =
Delimited_Format.Delimited self.delimiter self.encoding self.skip_rows self.row_limit quote_style headers value_formatter keep_invalid_rows line_endings comment_character
## ICON data_input
@ -131,13 +130,13 @@ type Delimited_Format
## ICON data_input
Create a clone of this with first row treated as header.
with_headers : Delimited_Format
with_headers self = self.clone headers=True
with_headers self = self.clone headers=Headers.Has_Headers
## ICON data_input
Create a clone of this where the first row is treated as data, not a
header.
without_headers : Delimited_Format
without_headers self = self.clone headers=False
without_headers self = self.clone headers=Headers.No_Headers
## ICON data_input
Create a clone of this with value parsing.

View File

@ -247,13 +247,13 @@ type Unquoted_Characters_In_Output
## Indicates that a specified location was not valid.
type Invalid_Location
## PRIVATE
Error (location : Text | Any)
Error (location : Text | Any) (message:Text|Nothing=Nothing)
## PRIVATE
Pretty print the invalid location error.
to_display_text : Text
to_display_text self =
"The location '"+self.location.to_text+"' is not valid."
self.message.if_nothing ("The location '"+self.location.to_text+"' is not valid.")
## Indicates that some values did not match the expected datatype format.
@ -278,22 +278,29 @@ type Empty_File_Error
## PRIVATE
Pretty print the empty file error.
to_display_text : Text
to_display_text = "It is not allowed to create a Table with no columns, so an empty file could not have been loaded."
to_display_text self =
_ = self
"The file is empty so it cannot be loaded."
## PRIVATE
handle_java_exception =
Panic.catch EmptyFileException handler=(_ -> Error.throw Empty_File_Error)
## Indicates that an empty sheet was encountered, so no data could be loaded.
type Empty_Sheet_Error
type Empty_Sheet
## PRIVATE
Error
## PRIVATE
Pretty print the empty sheet error.
to_display_text : Text
to_display_text = "It is not allowed to create a Table with no columns, so an empty sheet could not have been loaded."
to_display_text self =
_ = self
"There is no data in the sheet."
## PRIVATE
handle_java_exception =
Panic.catch EmptySheetException handler=(_ -> Error.throw Empty_Sheet_Error)
Panic.catch EmptySheetException handler=(_ -> Error.throw Empty_Sheet.Error)
## Indicates that the column was already present in the table.
type Existing_Column

View File

@ -10,6 +10,7 @@ from Standard.Base.System.File_Format import parse_boolean_with_infer
import project.Excel.Excel_Range.Excel_Range
import project.Excel.Excel_Workbook.Excel_Workbook
import project.Headers.Headers
import project.Internal.Excel_Reader
import project.Internal.Excel_Section.Excel_Section
import project.Internal.Excel_Writer
@ -18,12 +19,12 @@ import project.Table.Table
## PRIVATE
Resolve the xls_format setting to a boolean.
should_treat_as_xls_format : (Boolean|Infer) -> File -> Boolean ! Illegal_Argument
should_treat_as_xls_format xls_format file =
should_treat_as_xls_format : (Boolean|Infer) -> File_Format_Metadata -> Boolean ! Illegal_Argument
should_treat_as_xls_format xls_format file:File_Format_Metadata =
if xls_format != Infer then xls_format else
inferred_xls_format = xls_format_from_metadata file
inferred_xls_format.if_nothing <|
Error.throw (Illegal_Argument.Error ("File not recognized as Excel file (" + file.name + ")"))
Error.throw (Illegal_Argument.Error ("File extension not recognized as Excel (" + file.name + "). Specify xls_format explicitly."))
## Read the file to a `Table` from an Excel file
type Excel_Format
@ -43,11 +44,10 @@ type Excel_Format
Arguments:
- sheet: The sheet number or name.
- headers: If set to `True`, the first row is used as column names. If
set to `False`, the column names are Excel column names. If set to
`Infer`, the process tries to infer if headers are present on the first
row. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- headers: Specifies if the first row contains the column names. If set
to `Detect_Headers`, the process tries to infer if headers are
present. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- skip_rows: The number of rows to skip before reading the data.
- row_limit: The maximum number of rows to read. If set to `Nothing`, all
rows are read.
@ -56,17 +56,16 @@ type Excel_Format
If set to `False`, the file is read as an Excel 2007+ format.
`Infer` will attempt to deduce this from the extension of the filename.
@sheet (Text_Input display=Display.Always)
Sheet (sheet:(Integer|Text)=1) (headers:(Boolean|Infer)=Infer) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing) (xls_format:Boolean|Infer=Infer)
Sheet (sheet:(Integer|Text)=1) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing) (xls_format:Boolean|Infer=Infer)
## Reads a range from an Excel file as a `Table`.
Arguments:
- address: A name of a range or an Excel-style address (e.g. Sheet1!A1:B2).
- headers: If set to `True`, the first row is used as column names. If
set to `False`, the column names are Excel column names. If set to
`Infer`, the process tries to infer if headers are present on the first
row. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- headers: Specifies if the first row contains the column names. If set
to `Detect_Headers`, the process tries to infer if headers are
present. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- skip_rows: The number of rows to skip before reading the data.
- row_limit: The maximum number of rows to read. If set to `Nothing`, all
rows are read.
@ -75,7 +74,7 @@ type Excel_Format
If set to `False`, the file is read as an Excel 2007+ format.
`Infer` will attempt to deduce this from the extension of the filename.
@address Text_Input
Range (address:(Text|Excel_Range)) (headers:(Boolean|Infer)=Infer) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing) (xls_format : Boolean | Infer = Infer)
Range (address:(Text|Excel_Range)) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing) (xls_format : Boolean | Infer = Infer)
## PRIVATE
ADVANCED

View File

@ -3,19 +3,25 @@ import Standard.Base.Errors.File_Error.File_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Metadata.Display
import Standard.Base.Metadata.Widget
import Standard.Base.Runtime.Managed_Resource.Managed_Resource
import Standard.Base.Runtime.Ref.Ref
import Standard.Base.System.File.Advanced.Temporary_File.Temporary_File
import Standard.Base.System.Input_Stream.Input_Stream
from Standard.Base.Data.Filter_Condition import sql_like_to_regex
from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Single_Choice
from Standard.Base.Metadata.Widget import Single_Choice, Multiple_Choice
import project.Excel.Excel_Range.Excel_Range
import project.Headers.Headers
import project.Internal.Excel_Reader
import project.Internal.Excel_Section.Excel_Section
import project.Internal.Java_Problems
import project.Internal.Problem_Builder.Problem_Builder
import project.Match_Columns.Match_Columns
import project.Table.Table
from project.Errors import Empty_Sheet
from project.Internal.Excel_Reader import handle_invalid_location
polyglot java import java.io.File as Java_File
polyglot java import org.apache.poi.ss.usermodel.Workbook
@ -180,10 +186,11 @@ type Excel_Workbook
Arguments:
- query: sheet name, range name or address to read from the workbook.
- alias: optionally specify a friendly alias for the query (unused).
- headers: whether to use the first row as headers (default is `Infer`).
- headers: whether to use the first row as headers (default is
`Detect_Headers`).
@query (self-> Single_Choice display=Display.Always values=(self.tables.at "Name" . to_vector . map t-> Option t t.pretty))
query : Text -> Text -> Boolean | Infer -> Table
query self (query : Text) (alias : Text = "") (headers : Boolean | Infer = Infer) =
query : Text -> Text -> Headers -> Table
query self (query : Text) (alias : Text = "") (headers : Headers = Headers.Detect_Headers) =
_ = [alias]
self.read query headers=headers
@ -194,14 +201,15 @@ type Excel_Workbook
Arguments:
- query: sheet name, range name or address to read from the workbook.
- headers: whether to use the first row as headers (default is `Infer`).
- headers: whether to use the first row as headers (default is
`Detect_Headers`).
- limit: the maximum number of rows to read.
If set to `Nothing`, all rows will be returned.
@query (self-> Single_Choice display=Display.Always values=(self.tables.at "Name" . to_vector . map t-> Option t t.pretty))
read : Text | Excel_Range -> Boolean | Infer -> Integer | Nothing -> Table
read self (query : Text | Excel_Range) (headers : Boolean | Infer = Infer) (limit : Integer | Nothing = Nothing) =
read : Text | Excel_Range -> Headers -> Integer | Nothing -> Problem_Behavior -> Table
read self (query : Text | Excel_Range) (headers : Headers = Headers.Detect_Headers) (limit : Integer | Nothing = Nothing) (on_problems:Problem_Behavior=..Report_Warning) =
java_headers = Excel_Reader.make_java_headers headers
java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
java_table = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
self.with_java_workbook java_workbook-> case query of
_ : Excel_Range -> ExcelReader.readRange java_workbook query.java_range java_headers 0 limit java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_workbook query java_headers 0 limit java_problem_aggregator
@ -244,10 +252,11 @@ type Excel_Workbook
Arguments:
- name: the name of the worksheet to read.
- headers: whether to use the first row as headers (default is `Infer`).
- headers: whether to use the first row as headers (default is
`Detect_Headers`).
@name (self-> Single_Choice display=Display.Always values=(self.sheet_names.map t-> Option t t.pretty))
sheet : Text | Integer -> Boolean | Infer -> Table
sheet self name:(Text | Integer) headers:(Boolean | Infer)=Infer =
sheet : Text | Integer -> Headers -> Table
sheet self name:(Text | Integer) headers:Headers=Headers.Detect_Headers =
self.read_section (Excel_Section.Worksheet name headers 0 Nothing)
## PRIVATE
@ -289,12 +298,60 @@ type Excel_Workbook
## PRIVATE
with_java_workbook : (Workbook -> Any) -> Any ! Illegal_State
with_java_workbook self f = Illegal_State.handle_java_exception <|
with_java_workbook self f = Illegal_State.handle_java_exception <| handle_invalid_location <| Empty_Sheet.handle_java_exception <|
case self.excel_connection_resource_ref.get of
Nothing ->
Error.throw (Illegal_State.Error "The workbook is being used after it was closed.")
resource -> resource.with connection->
connection.withWorkbook f
## GROUP Standard.Base.Input
ICON data_input
Reads one or more sheets into a Table.
Arguments:
- sheet_names: the names of sheets, ranges or addresses to read.
Defaults to all sheets. An invalid `sheet_name` will be reported as a
problem.
- headers: whether to use the first row as headers (default is
`Detect_Headers`) for each sheet.
- return: how to return the tables. Defaults to `Merged_Table` meaning
the tables are merged into a single table.
- on_problems: how to handle problems during reading. Defaults to
`Report_Warning`.
@sheet_names build_sheet_selector
read_many : Vector Text -> Headers -> Return_As -> Problem_Behavior -> Table
read_many self sheet_names:Vector=self.sheet_names (headers:Headers=..Detect_Headers) (return:Return_As=..Merged_Table) (on_problems:Problem_Behavior=..Report_Warning) =
if sheet_names.is_empty then Error.throw (Illegal_Argument.Error "No sheets selected.") else
tables = sheet_names.map on_problems=on_problems address-> self.read address headers on_problems=on_problems
case return of
Return_As.Table_Of_Tables -> Table.new [["Sheet Name", sheet_names], ["Table", tables]]
Return_As.Merged_Table match ->
first_tbl = tables.find t-> t != Nothing
if first_tbl == Nothing then Error.throw (Illegal_Argument.Error "No valid sheets found.") else
unique = first_tbl.column_naming_helper.create_unique_name_strategy
tables.each tbl-> if tbl != Nothing then unique.mark_used tbl.column_names
new_column_name = unique.make_unique "Sheet Name"
with_names = tables.zip sheet_names tbl->name-> if tbl == Nothing then Nothing else tbl.set name new_column_name . reorder_columns [new_column_name]
result = Table.from_union (with_names.filter Filter_Condition.Not_Nothing) match keep_unmatched_columns=True
problem_builder = Problem_Builder.new
problem_builder.report_unique_name_strategy unique
problem_builder.attach_problems_after on_problems result
## PRIVATE
close_connection c = c.close
## PRIVATE
build_sheet_selector workbook:Excel_Workbook display:Display=Display.Always -> Widget =
names = workbook.sheet_names.map n-> Option n n.pretty
Multiple_Choice display=display values=names
## How to merge sheets into a single table.
type Return_As
## Each sheet is returned as a row.
Table_Of_Tables
## All sheets are merged into a single table. A union operation is performed.
Merged_Table match:Match_Columns=Match_Columns.By_Name

View File

@ -0,0 +1,22 @@
from Standard.Base import all
## Determines how the first row of the data should be treated.
If `Detect_Headers` is used, the system will attempt to determine if the
first row of the data is a header row or not.
type Headers
## Attempt to detect if the data has headers or not.
Detect_Headers
## The first row of the data is the column names.
Has_Headers
## Treat the first row of data as a normal row.
No_Headers
## PRIVATE
Headers.from (that:Infer) =
_ = that
Headers.Detect_Headers
## PRIVATE
Headers.from (that:Boolean) = if that then Headers.Has_Headers else Headers.No_Headers

View File

@ -7,6 +7,7 @@ import Standard.Base.System.Input_Stream.Input_Stream
import project.Data_Formatter.Data_Formatter
import project.Delimited.Delimited_Format.Delimited_Format
import project.Delimited.Quote_Style.Quote_Style
import project.Headers.Headers
import project.Internal.Java_Problems
import project.Table.Table
from project.Errors import Empty_File_Error, Mismatched_Quote, Parser_Error
@ -100,9 +101,9 @@ read_from_reader format java_reader on_problems max_columns=4096 =
## PRIVATE
prepare_reader format max_columns on_problems java_problem_aggregator newline_override=Nothing =
java_headers = case format.headers of
True -> DelimitedReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Infer -> DelimitedReader.HeaderBehavior.INFER
False -> DelimitedReader.HeaderBehavior.GENERATE_HEADERS
Headers.Has_Headers -> DelimitedReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Headers.Detect_Headers -> DelimitedReader.HeaderBehavior.INFER
Headers.No_Headers -> DelimitedReader.HeaderBehavior.GENERATE_HEADERS
row_limit = case format.row_limit of
Nothing -> -1
_ : Integer -> format.row_limit
@ -118,7 +119,7 @@ prepare_reader format max_columns on_problems java_problem_aggregator newline_ov
value_parser = if format.value_formatter.is_nothing then base_parser else
wrapped = format.value_formatter.wrap_base_parser base_parser
TypeInferringParser.new format.value_formatter.get_specific_type_parsers wrapped
cell_type_guesser = if format.headers != Infer then Nothing else
cell_type_guesser = if format.headers != Headers.Detect_Headers then Nothing else
formatter = format.value_formatter.if_nothing Data_Formatter.Value
TypeInferringParser.new formatter.get_specific_type_parsers IdentityParser.new
newline = newline_override.if_nothing <| case format.line_endings of

View File

@ -7,6 +7,7 @@ import Standard.Base.System.Output_Stream.Output_Stream
import project.Data_Formatter.Data_Formatter
import project.Delimited.Delimited_Format.Delimited_Format
import project.Delimited.Quote_Style.Quote_Style
import project.Headers.Headers
import project.Internal.Delimited_Reader
import project.Internal.Delimited_Reader.Detected_Headers
import project.Internal.Java_Problems
@ -172,10 +173,10 @@ write_to_writer table format java_writer separator_override=Nothing needs_leadin
writer.write table.java_table
## PRIVATE
should_write_headers headers = case headers of
True -> True
Infer -> True
False -> False
should_write_headers headers:Headers = case headers of
Headers.Has_Headers -> True
Headers.Detect_Headers -> True
Headers.No_Headers -> False
## PRIVATE
The default line separator used for newly written delimited files, if no

View File

@ -1,13 +1,16 @@
private
from Standard.Base import all
import Standard.Base.Errors.File_Error.File_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.System.Input_Stream.Input_Stream
import project.Excel.Excel_Range.Excel_Range
import project.Headers.Headers
import project.Internal.Excel_Section.Excel_Section
import project.Internal.Java_Problems
import project.Table.Table
from project.Errors import Duplicate_Output_Column_Names, Empty_Sheet_Error, Invalid_Column_Names, Invalid_Location
from project.Errors import Duplicate_Output_Column_Names, Empty_Sheet, Invalid_Column_Names, Invalid_Location
polyglot java import java.io.File as Java_File
polyglot java import org.apache.poi.poifs.filesystem.NotOLE2FileException
@ -18,23 +21,20 @@ polyglot java import org.enso.table.excel.ExcelHeaders
polyglot java import org.enso.table.read.ExcelReader
## PRIVATE
Convert Boolean|Infer to the correct HeaderBehavior
make_java_headers : (Boolean | Infer) -> Any
make_java_headers headers = case headers of
True -> ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Infer -> ExcelHeaders.HeaderBehavior.INFER
False -> ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES
Convert Headers to the correct HeaderBehavior
make_java_headers : Headers -> Any
make_java_headers headers:Headers = case headers of
Headers.Has_Headers -> ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Headers.Detect_Headers -> ExcelHeaders.HeaderBehavior.INFER
Headers.No_Headers -> ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES
## PRIVATE
Handle and map the Java errors when reading an Excel file
handle_reader : File -> (Java_File -> (Table | Vector)) -> (Table | Vector)
handle_reader file reader =
bad_argument caught_panic = Error.throw (Invalid_Location.Error caught_panic.payload.getCause)
handle_bad_argument = Panic.catch InvalidLocationException handler=bad_argument
# TODO [RW] handle InvalidFormatException
File_Error.handle_java_exceptions file <| handle_bad_argument <|
handle_bad_format file <| Empty_Sheet_Error.handle_java_exception <|
File_Error.handle_java_exceptions file <| handle_invalid_location <|
handle_bad_format file <| Empty_Sheet.handle_java_exception <|
java_file = Java_File.new file.path
reader java_file
@ -84,3 +84,13 @@ handle_bad_format_with_handler handler ~action =
Panic.catch UnsupportedFileFormatException handler=handler <|
Panic.catch NotOLE2FileException handler=handler <|
action
## PRIVATE
A helper that handles the Java exceptions reported when a sheet, range or
address is invalid.
handle_invalid_location ~action =
bad_argument caught_panic =
java_exception = caught_panic.payload
Error.throw (Invalid_Location.Error java_exception.getLocation java_exception.getMessage)
Panic.catch InvalidLocationException handler=bad_argument <|
action

View File

@ -3,21 +3,21 @@ private
from Standard.Base import all
import project.Excel.Excel_Range.Excel_Range
import project.Headers.Headers
type Excel_Section
## Gets the data from a specific sheet.
Arguments:
- sheet: The sheet number or name.
- headers: If set to `True`, the first row is used as column names. If
set to `False`, the column names are Excel column names. If set to
`Infer`, the process tries to infer if headers are present on the first
row. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- headers: Specifies if the first row contains the column names. If set
to `Detect_Headers`, the process tries to infer if headers are
present. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- skip_rows: The number of rows to skip before reading the data.
- row_limit: The maximum number of rows to read. If set to `Nothing`, all
rows are read.
Worksheet (sheet:(Integer|Text)=1) (headers:(Boolean|Infer)=Infer) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
Worksheet (sheet:(Integer|Text)=1) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
## Gets a specific range (taking either a defined name or external style
address) from the workbook.
@ -26,12 +26,11 @@ type Excel_Section
Arguments:
- address: A name of a range or an Excel-style address (e.g. Sheet1!A1:B2).
- headers: If set to `True`, the first row is used as column names. If
set to `False`, the column names are Excel column names. If set to
`Infer`, the process tries to infer if headers are present on the first
row. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- headers: Specifies if the first row contains the column names. If set
to `Detect_Headers`, the process tries to infer if headers are
present. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- skip_rows: The number of rows to skip before reading the data.
- row_limit: The maximum number of rows to read. If set to `Nothing`, all
rows are read.
Cell_Range (address:(Text|Excel_Range)) (headers:(Boolean|Infer)=Infer) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
Cell_Range (address:(Text|Excel_Range)) (headers:Headers=Headers.Detect_Headers) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)

View File

@ -11,6 +11,7 @@ export project.Delimited.Quote_Style.Quote_Style
export project.Excel.Excel_Format.Excel_Format
export project.Excel.Excel_Range.Excel_Range
export project.Excel.Excel_Workbook.Excel_Workbook
export project.Headers.Headers
export project.Join_Condition.Join_Condition
export project.Join_Kind.Join_Kind
export project.Match_Columns.Match_Columns
@ -28,4 +29,3 @@ from project.Constants export all
from project.Expression export expr
from project.Extensions.Column_Vector_Extensions export all
from project.Extensions.Table_Conversions export all

View File

@ -2678,7 +2678,7 @@ type Table
from Standard.Table import all
example_to_xlsx = Examples.inventory_table.write (enso_project.data / "example_xlsx_output.xlsx") (Excel_Format.Sheet "MySheetName")
@path (Widget.Text_Input display=Display.Always)
@path (Widget.File_Browse display=Display.Always)
@format Widget_Helpers.write_table_selector
write : Writable_File -> File_Format -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> File ! Column_Count_Mismatch | Illegal_Argument | File_Error
write self path:Writable_File format=Auto_Detect on_existing_file=Existing_File_Behavior.Backup match_columns=Match_Columns.By_Name on_problems=Report_Warning =

View File

@ -1,7 +1,14 @@
package org.enso.table.error;
public class InvalidLocationException extends RuntimeException {
public InvalidLocationException(String errorMessage) {
private final String location;
public InvalidLocationException(String location, String errorMessage) {
super(errorMessage);
this.location = location;
}
public String getLocation() {
return this.location;
}
}

View File

@ -111,7 +111,7 @@ public class ExcelReader {
workbook -> {
int sheetIndex = workbook.getSheetIndex(sheetName);
if (sheetIndex == -1) {
throw new InvalidLocationException("Unknown sheet '" + sheetName + "'.");
throw new InvalidLocationException(sheetName, "Unknown sheet '" + sheetName + "'.");
}
return readTable(
@ -153,7 +153,8 @@ public class ExcelReader {
int sheetCount = workbook.getNumberOfSheets();
if (index < 1 || index > sheetCount) {
throw new InvalidLocationException(
"Sheet index is not in valid range (1 to " + sheetCount + " inclusive).");
Integer.toString(index),
"Sheet " + index + " is out of range (1 to " + sheetCount + " inclusive).");
}
return readTable(
@ -236,7 +237,8 @@ public class ExcelReader {
excelRange = new ExcelRange(name == null ? rangeNameOrAddress : name.getRefersToFormula());
} catch (IllegalArgumentException e) {
throw new InvalidLocationException(
"Invalid range name or address '" + rangeNameOrAddress + "'.");
rangeNameOrAddress,
"Unknown sheet or range name or invalid address: '" + rangeNameOrAddress + "'.");
}
return readRange(workbook, excelRange, headers, skip_rows, row_limit, problemAggregator);
@ -287,7 +289,8 @@ public class ExcelReader {
throws InvalidLocationException {
int sheetIndex = workbook.getSheetIndex(excelRange.getSheetName());
if (sheetIndex == -1) {
throw new InvalidLocationException("Unknown sheet '" + excelRange.getSheetName() + "'.");
throw new InvalidLocationException(
excelRange.getSheetName(), "Unknown sheet '" + excelRange.getSheetName() + "'.");
}
return readTable(

View File

@ -179,7 +179,7 @@ public class ExcelWriter {
excelRange = new ExcelRange(name == null ? rangeNameOrAddress : name.getRefersToFormula());
} catch (IllegalArgumentException e) {
throw new InvalidLocationException(
"Invalid range name or address '" + rangeNameOrAddress + "'.");
rangeNameOrAddress, "Invalid range name or address '" + rangeNameOrAddress + "'.");
}
writeTableToRange(workbook, excelRange, existingDataMode, skipRows, table, rowLimit, headers);
}
@ -200,7 +200,8 @@ public class ExcelWriter {
ColumnCountMismatchException {
int sheetIndex = workbook.getSheetIndex(range.getSheetName());
if (sheetIndex == -1) {
throw new InvalidLocationException("Unknown sheet '" + range.getSheetName() + "'.");
throw new InvalidLocationException(
range.getSheetName(), "Unknown sheet '" + range.getSheetName() + "'.");
}
ExcelSheet sheet = new ExcelSheet(workbook, sheetIndex);

View File

@ -9,7 +9,7 @@ import Standard.Base.Runtime.Ref.Ref
from Standard.Table import Table, Match_Columns, Excel_Format, Excel_Range, Data_Formatter, Delimited_Format, Excel_Workbook
from Standard.Table.Errors import Invalid_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch, Empty_Sheet_Error
from Standard.Table.Errors import Invalid_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch, Empty_Sheet
from Standard.Test import all
@ -49,7 +49,7 @@ spec_fmt suite_builder header file read_method sheet_count=5 =
group_builder.specify "should give an informative error when reading an empty table" <|
t = read_method file (Excel_Format.Sheet "Empty")
t.should_fail_with Empty_Sheet_Error
t.should_fail_with Empty_Sheet
group_builder.specify "should gracefully handle duplicate column names and formulas" <|
t = read_method file (Excel_Format.Sheet "Duplicate Columns")
@ -73,8 +73,43 @@ spec_fmt suite_builder header file read_method sheet_count=5 =
t_3.at 'B' . to_vector . should_equal [Nothing, 30]
t_3.at 'C' . to_vector . should_equal [43.2, 54]
group_builder.specify "should let you read all sheets into a single table" <|
wb = read_method file
action = wb.read_many on_problems=_
tester table =
table.row_count . should_equal 25
table.column_names . should_equal ["Sheet Name", "Name", "Quantity", "Price", "A", "B", "C", "D", "E", "Student Name", "Enrolment Date", "Item", "Price 1"]
problems = [Empty_Sheet.Error, Duplicate_Output_Column_Names.Error ["Price"]]
Problems.test_problem_handling action problems tester
group_builder.specify "should let you read all sheets into a table of tables" <|
wb = read_method file
action = wb.read_many return=..Table_Of_Tables on_problems=_
tester table =
table.row_count . should_equal 5
table.column_names . should_equal ["Sheet Name", "Table"]
problems = [Empty_Sheet.Error, Duplicate_Output_Column_Names.Error ["Price"]]
Problems.test_problem_handling action problems tester
group_builder.specify "should let you read some sheets from xlsx" <|
wb = read_method file
single_table = wb.read_many ["Simple", "Dates"]
single_table.row_count . should_equal 12
single_table.column_names . should_equal ["Sheet Name", "Name", "Quantity", "Price", "Student Name", "Enrolment Date"]
Problems.assume_no_problems single_table
group_builder.specify "should let you read some sheets with a bad name from xlsx" <|
wb = read_method file
single_table = wb.read_many ["Simple", "Dates", "Not A Sheet"]
single_table.row_count . should_equal 12
single_table.column_names . should_equal ["Sheet Name", "Name", "Quantity", "Price", "Student Name", "Enrolment Date"]
single_table.has_warnings.should_be_true
warning = Problems.expect_only_warning Invalid_Location single_table
warning.location . should_equal "Not A Sheet"
warning.to_display_text . should_contain "Unknown sheet or range name or invalid address: 'Not A Sheet'."
type Spec_Write_Data
Value ~data counter suffix
Value ~data counter suffix prefix
table self = self.data.at 0
clothes self = self.data.at 1
@ -85,15 +120,17 @@ type Spec_Write_Data
clothes = enso_project.data/'clothes.csv' . read
sub_clothes = clothes.select_columns [0, 1]
counter = Ref.new 0
Spec_Write_Data.Value [table, clothes, sub_clothes] counter suffix
prefix = "out_" + (Random.integer 1 10000).to_text + "_"
Spec_Write_Data.Value [table, clothes, sub_clothes] counter suffix prefix
teardown self =
enso_project.data/"transient" . list "out*" . each .delete
enso_project.data/"transient" . list (self.prefix+"*") . each f->
f.delete . catch Any error-> IO.println "[CLEANUP] Failed to delete "+f.to_text+": "+error.to_display_text
create_out self =
create_out self suffix=self.suffix =
i = self.counter.get + 1
self.counter.put i
f = enso_project.data / "transient" / ("out" + i.to_text + "." + self.suffix)
f = enso_project.data / "transient" / (self.prefix + i.to_text + "." + suffix)
Panic.rethrow f.delete_if_exists
f
@ -161,8 +198,10 @@ spec_write suite_builder suffix test_sheet_name =
read_1 . should_equal data.table
read_2 = out.read (Excel_Format.Sheet "Sheet2")
read_2 . should_equal data.clothes
read_3 = out.read.sheet_names
written = out.read
read_3 = written.sheet_names
read_3 . should_equal ["Sheet2", "Sheet1"]
written.close
group_builder.specify 'should write a table to specific single cell location of an existing sheet' <|
out = data.create_out
@ -341,6 +380,11 @@ spec_write suite_builder suffix test_sheet_name =
written = out.read (Excel_Format.Sheet "Another") . select_columns [0, 1, 2]
written.should_equal expected
group_builder.specify 'should error gracefully if an unknown extension' <|
out = data.create_out suffix="notxls"
data.table.write out format=Excel_Format.Workbook on_problems=Report_Error . should_fail_with Illegal_Argument
data.table.write out format=Excel_Format.Sheet on_problems=Report_Error . should_fail_with Illegal_Argument
group_builder.specify 'should be able to write to a new dry run file' <|
out = data.create_out
temp = Context.Output.with_disabled <|
@ -832,6 +876,10 @@ add_specs suite_builder =
table_4 = xlsx_sheet.read . sheet "Sheet1"
check_table table_4
group_builder.specify "should error if you read by an invalid sheet name" <|
xlsx_sheet.read (Excel_Format.Sheet "NoSuchSheet") . should_fail_with Invalid_Location
xlsx_sheet.read . read "NoSuchSheet" . should_fail_with Invalid_Location
group_builder.specify "should let you read XLS by sheet index" <|
table = xls_sheet.read (Excel_Format.Sheet 1)
check_table table