From d0f3d7f098457220ce8014ff36cfdca2a89bee91 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 14 Nov 2024 19:31:47 +0000 Subject: [PATCH] `skip_rows` for `Excel_Workbook.read`. (#11560) - Adds `skip_rows` to `Excel_Workbook.read`. - Support for read by index. --- .../0.0.0-dev/src/Excel/Excel_Range.enso | 2 +- .../0.0.0-dev/src/Excel/Excel_Workbook.enso | 21 +++++++++++---- .../0.0.0-dev/src/Internal/Excel_Writer.enso | 4 +-- .../java/org/enso/table/read/ExcelReader.java | 2 +- test/Table_Tests/src/IO/Excel_Spec.enso | 26 +++++++++++++++++++ 5 files changed, 46 insertions(+), 9 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Range.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Range.enso index 29291b3c5b..2c30cdedb9 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Range.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Range.enso @@ -18,7 +18,7 @@ excel_2007_row_limit = 1048576 type Excel_Range ## PRIVATE Specifies a range within an Excel Workbook. - Value java_range:Java_Range + private Value java_range:Java_Range ## GROUP Standard.Base.Metadata ICON metadata diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso index 2815308816..353cab9e64 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso @@ -221,16 +221,20 @@ type Excel_Workbook `Detect_Headers`). - limit: the maximum number of rows to read. If set to `Nothing`, all rows will be returned. - @query (self-> Single_Choice display=Display.Always values=(self.tables.at "Name" . to_vector . map t-> Option t t.pretty)) + @query self-> _query_widget self @limit Rows_To_Read.default_widget - read : Text | Excel_Range -> Headers -> Rows_To_Read -> Problem_Behavior -> Table - read self (query : Text | Excel_Range) (headers : Headers = Headers.Detect_Headers) (limit : Rows_To_Read = ..All_Rows) (on_problems:Problem_Behavior=..Report_Warning) = + read : Text | Excel_Range | Integer -> Headers -> Rows_To_Read -> Integer -> Problem_Behavior -> Table + read self (query : Text | Excel_Range | Integer) (headers : Headers = ..Detect_Headers) (limit : Rows_To_Read = ..All_Rows) (skip_rows:Integer=0) (on_problems:Problem_Behavior=..Report_Warning) = java_headers = Excel_Reader.make_java_headers headers java_limit = limit.rows_to_read java_table = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator-> self.with_java_workbook java_workbook-> case query of - _ : Excel_Range -> ExcelReader.readRange java_workbook query.java_range java_headers 0 java_limit java_problem_aggregator - _ : Text -> ExcelReader.readRangeByName java_workbook query java_headers 0 java_limit java_problem_aggregator + _ : Excel_Range -> ExcelReader.readRange java_workbook query.java_range java_headers skip_rows java_limit java_problem_aggregator + _ : Text -> ExcelReader.readRangeByName java_workbook query java_headers skip_rows java_limit java_problem_aggregator + _ : Integer -> + names = self.sheet_names + if (query < 1 || query >= names.length) then Error.throw (Illegal_Argument.Error "Worksheet index out of range (1 - "+names.length.to_text+").") else + ExcelReader.readRangeByName java_workbook (names.at (query - 1)) java_headers skip_rows java_limit java_problem_aggregator limit.attach_warning (Table.Value java_table) ## PRIVATE @@ -378,3 +382,10 @@ type Return_As ## All sheets are merged into a single table. A union operation is performed. Merged_Table (columns_to_keep : Columns_To_Keep = Columns_To_Keep.In_Any) (match : Match_Columns = Match_Columns.By_Name) + +## PRIVATE + Creates a query widget for the `read` method. +private _query_widget wb:Excel_Workbook display:Display=..Always -> Widget = + default_address = "'" + wb.sheet_names.first.replace "'" "''" + "'!A1:E5" + options = [Option "" "1", Option "" "Excel_Range.from_address "+default_address.pretty] + (wb.tables.at "Name" . to_vector . map t-> Option t t.pretty) + Single_Choice display=display values=options diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Writer.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Writer.enso index 132dada58c..b47f7bab7f 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Writer.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Writer.enso @@ -131,8 +131,8 @@ prepare_file_modification_strategy table section on_existing_file match_columns java_headers = Excel_Reader.make_java_headers headers java_limit = row_limit.rows_to_write java_range = case address of - Excel_Range.Value java_range -> java_range - text : Text -> text + _ : Excel_Range -> address.java_range + _ : Text -> address workbook_to_modify-> ExcelWriter.writeTableToRange workbook_to_modify java_range existing_data_mode skip_rows table.java_table java_limit java_headers diff --git a/std-bits/table/src/main/java/org/enso/table/read/ExcelReader.java b/std-bits/table/src/main/java/org/enso/table/read/ExcelReader.java index 6a53baf283..1a8e1ab54c 100644 --- a/std-bits/table/src/main/java/org/enso/table/read/ExcelReader.java +++ b/std-bits/table/src/main/java/org/enso/table/read/ExcelReader.java @@ -279,7 +279,7 @@ public class ExcelReader { } } - private static Table readRange( + public static Table readRange( Workbook workbook, ExcelRange excelRange, ExcelHeaders.HeaderBehavior headers, diff --git a/test/Table_Tests/src/IO/Excel_Spec.enso b/test/Table_Tests/src/IO/Excel_Spec.enso index 43872f01f0..b4fba5dce3 100644 --- a/test/Table_Tests/src/IO/Excel_Spec.enso +++ b/test/Table_Tests/src/IO/Excel_Spec.enso @@ -58,6 +58,32 @@ spec_fmt suite_builder header file read_method sheet_count=5 = t.columns.map .name . should_equal ['Item', 'Price', 'Quantity', 'Price 1'] t.at 'Price 1' . to_vector . should_equal [20, 40, 0, 60, 0, 10] + group_builder.specify "should allow reading with limited rows and skipping rows from workbook" <| + wb = read_method file + t = wb.read 1 headers=..No_Headers + t.column_names.should_equal ['A', 'B', 'C'] + + t_1 = wb.read 1 headers=..No_Headers limit=3 + t_1.column_names . should_equal t.column_names + t_1.row_count . should_equal 3 + t_1.at 'A' . to_vector . should_equal ['Name', 'blouse', 't-shirt'] + t_1.at 'B' . to_vector . should_equal ['Quantity', 10, 20] + t_1.at 'C' . to_vector . should_equal ['Price', 22.3, 32] + + t_2 = wb.read 1 headers=..No_Headers skip_rows=3 + t_2.column_names . should_equal t.column_names + t_2.row_count . should_equal 4 + t_2.at 'A' . to_vector . should_equal ['trousers', 'shoes', 'skirt', 'dress'] + t_2.at 'B' . to_vector . should_equal [Nothing, 30, Nothing, 5] + t_2.at 'C' . to_vector . should_equal [43.2, 54, 31, Nothing] + + t_3 = wb.read 1 headers=..No_Headers limit=2 skip_rows=3 + t_3.column_names . should_equal t.column_names + t_3.row_count . should_equal 2 + t_3.at 'A' . to_vector . should_equal ['trousers', 'shoes'] + t_3.at 'B' . to_vector . should_equal [Nothing, 30] + t_3.at 'C' . to_vector . should_equal [43.2, 54] + group_builder.specify "should allow reading with cell range specified" <| t_1 = read_method file (..Range "Simple!B:C") t_1.columns.map .name . should_equal ['Quantity', 'Price']