diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a1473ab02..2c9c601845 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -147,6 +147,7 @@ - [Added `File_Format.Delimited` support to `Table.write` for new files.][3528] - [Adjusted `Database.connect` API to new design.][3542] - [Added `File_Format.Excel` support to `Table.write` for new files.][3551] +- [Added append support for `File_Format.Excel`.][3558] - [Added support for custom encodings in `File_Format.Delimited` writing.][3564] [debug-shortcuts]: @@ -233,8 +234,9 @@ [3528]: https://github.com/enso-org/enso/pull/3528 [3542]: https://github.com/enso-org/enso/pull/3542 [3551]: https://github.com/enso-org/enso/pull/3551 -[3564]: https://github.com/enso-org/enso/pull/3564 [3552]: https://github.com/enso-org/enso/pull/3552 +[3558]: https://github.com/enso-org/enso/pull/3558 +[3564]: https://github.com/enso-org/enso/pull/3564 #### Enso Compiler diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time.enso index d6c75e83ef..696727f60d 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time.enso @@ -468,6 +468,14 @@ type Time sign = self.internal_zoned_date_time.compareTo that.internal_zoned_date_time Ordering.from_sign sign + ## Checks if `self` equals `that`. + + Arguments: + - that: The other `Time` to compare against. + == : Time -> Boolean + == that = + self.internal_zoned_date_time.equals that.internal_zoned_date_time + type Time_Error ## UNSTABLE diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso index 55a8ff65a6..cf45a14834 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso @@ -8,6 +8,7 @@ import Standard.Base.Data.Text.Text_Sub_Range from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning from Standard.Base.Runtime.Resource import all +from Standard.Base.Data.Time as Time_Module import Time export Standard.Base.System.File.Option @@ -21,6 +22,7 @@ polyglot java import java.nio.file.FileAlreadyExistsException polyglot java import java.nio.file.FileSystems polyglot java import java.nio.file.Path polyglot java import java.nio.file.StandardCopyOption +polyglot java import java.time.ZonedDateTime ## ALIAS New File @@ -361,6 +363,48 @@ type File exists : Boolean exists = @Builtin_Method "File.exists" + ## Gets the creation time of a file. + + > Example + Gets the creation time of a file. + + import Standard.Examples + + example_exists = Examples.csv.creation_time + creation_time : Time ! File_Error + creation_time = + here.handle_java_exceptions self <| + Time (self.creation_time_builtin) + + ## PRIVATE + + Builtin method that gets this file's creation time. + Recommended to use `File.creation_time` instead which handles potential + exceptions. + creation_time_builtin : File -> ZonedDateTime + creation_time_builtin = @Builtin_Method "File.creation_time_builtin" + + ## Gets the last modified time of a file. + + > Example + Gets the last modified time of a file. + + import Standard.Examples + + example_exists = Examples.csv.last_modified_time + last_modified_time : Time ! File_Error + last_modified_time = + here.handle_java_exceptions self <| + Time (self.last_modified_time_builtin) + + ## PRIVATE + + Builtin method that gets this file's last modified time. + Recommended to use `File.last_modified_time` instead which handles + potential exceptions. + last_modified_time_builtin : File -> ZonedDateTime + last_modified_time_builtin = @Builtin_Method "File.last_modified_time_builtin" + ## Checks whether the file exists and is a directory. > Example diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 85e56cf9e0..cac01243a6 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -22,10 +22,11 @@ from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_O from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error -import Standard.Table.Data.Column_Mapping +import Standard.Table.Data.Column_Name_Mapping import Standard.Table.Data.Position import Standard.Table.Data.Sort_Column_Selector import Standard.Table.Data.Sort_Column +import Standard.Table.Data.Match_Columns polyglot java import java.sql.JDBCType @@ -297,8 +298,8 @@ type Table other, a Duplicate_Output_Column_Names. > Example - rename_columns : Column_Mapping -> Problem_Behavior -> Table - rename_columns (column_map=(Column_Mapping.By_Position ["Column"])) (on_problems=Report_Warning) = + rename_columns : Column_Name_Mapping -> Problem_Behavior -> Table + rename_columns (column_map=(Column_Name_Mapping.By_Position ["Column"])) (on_problems=Report_Warning) = new_names = Table_Helpers.rename_columns internal_columns=self.internal_columns mapping=column_map on_problems=on_problems if new_names.is_error then new_names else new_columns = self.internal_columns.map_with_index i->c->(c.rename (new_names.at i)) @@ -870,12 +871,13 @@ type Table specific type and uses the default settings for that type to be used. Details of this type are below. - on_existing_file: Specified how to handle if the file already exists. - - column_matching: Specifies how to map columns against an existing file. - If `Column_Matching.By_Name` - the columns are mapped by name against - an existing file. - If `Column_Matching.By_Position` - the columns are mapped by position - against an existing file. - If there is a mismatch, then a `Column_Mismatch` error is raised. + - match_columns: Specifies how to match columns against an existing file. + If `Match_Columns.By_Name` - the columns are mapped by name against an + existing file. If there is a mismatch, then a `Column_Name_Mismatch` + error is raised. + If `Match_Columns.By_Position` - the columns are mapped by position + against an existing file. If there is a mismatch, then a + `Column_Count_Mismatch` error is raised. - on_problems: Specifies how to handle if a problem occurs, raising as a warning by default. The specific issues depend on the `File_Format` argument. @@ -914,10 +916,10 @@ type Table connection = Database.connect (SQLite (File.new "db.sqlite")) table = connection.access_table "Table" table.write (Enso_Project.data / "example_csv_output.csv") - write : File|Text -> File_Format -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | Io_Error - write path format=File_Format.Auto on_existing_file=Existing_File_Behavior.Backup column_mapping=Column_Mapping.By_Name on_problems=Report_Warning = + write : File|Text -> File_Format -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | Io_Error + write path format=File_Format.Auto on_existing_file=Existing_File_Behavior.Backup match_columns=Match_Columns.By_Name on_problems=Report_Warning = # TODO This should ideally be done in a streaming manner, or at least respect the row limits. - self.to_dataframe.write path format on_existing_file column_mapping on_problems + self.to_dataframe.write path format on_existing_file match_columns on_problems type Integrity_Error diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 34d6ac1894..0321519044 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -2,6 +2,7 @@ from Standard.Base import all import Standard.Table.Data.Table import Standard.Table.Data.Storage +import Standard.Base.Data.Ordering.Comparator polyglot java import org.enso.table.data.table.Column as Java_Column polyglot java import org.enso.table.operations.OrderBuilder @@ -996,16 +997,12 @@ type Column Examples.decimal_column.sort comparator=my_comparator sort : Sort_Direction -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column sort order=Sort_Direction.Ascending missing_last=True comparator=Nothing = - comparator_to_java cmp x y = cmp x y . to_sign order_bool = case order of Sort_Direction.Ascending -> True Sort_Direction.Descending -> False - java_cmp = case comparator of - Nothing -> Nothing - cmp -> comparator_to_java cmp + java_cmp = Comparator.new comparator rule = OrderBuilder.OrderRule.new self.java_column java_cmp order_bool missing_last - fallback_cmp = comparator_to_java .compare_to - mask = OrderBuilder.buildOrderMask [rule].to_array fallback_cmp + mask = OrderBuilder.buildOrderMask [rule].to_array new_col = self.java_column.applyMask mask Column new_col diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column_Mapping.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column_Name_Mapping.enso similarity index 98% rename from distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column_Mapping.enso rename to distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column_Name_Mapping.enso index e2ae1fdaf1..0d93d478db 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column_Mapping.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column_Name_Mapping.enso @@ -2,7 +2,7 @@ from Standard.Base import all ## Specifies a selection of columns from the table and the new name for them to become. -type Column_Mapping +type Column_Name_Mapping ## Selects columns based on their names. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Match_Columns.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Match_Columns.enso new file mode 100644 index 0000000000..4741bbb95f --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Match_Columns.enso @@ -0,0 +1,15 @@ +from Standard.Base import all + +## Specifies how to join columns in the table to existing data. +type Match_Columns + ## Columns are matched by Name against an existing file. + A `Column_Name_Mismatch` error occurs if any column name in the existing + data could not be matched to the new data, or any column name in the new + data was not found in the existing data. + type By_Name + + ## Columns are matched by Position against the existing data. + Note: column names are not compared. + A `Column_Count_Mismatch` error occurs if the existing data has a + different number of columns than the table. + type By_Position diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 76941d2870..8776b7c1cf 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -21,8 +21,9 @@ from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_For from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector +import Standard.Table.Data.Match_Columns -import Standard.Table.Data.Column_Mapping +import Standard.Table.Data.Column_Name_Mapping import Standard.Table.Data.Position import Standard.Table.Data.Sort_Column_Selector import Standard.Table.Data.Sort_Column @@ -459,9 +460,9 @@ type Table > Example Rename the first column to "FirstColumn" - table.rename_columns (Column_Mapping.By_Position ["FirstColumn"]) - rename_columns : Column_Mapping -> Problem_Behavior -> Table - rename_columns (column_map=(Column_Mapping.By_Position ["Column"])) (on_problems=Report_Warning) = + table.rename_columns (Column_Name_Mapping.By_Position ["FirstColumn"]) + rename_columns : Column_Name_Mapping -> Problem_Behavior -> Table + rename_columns (column_map=(Column_Name_Mapping.By_Position ["Column"])) (on_problems=Report_Warning) = new_names = Table_Helpers.rename_columns internal_columns=self.columns mapping=column_map on_problems=on_problems if new_names.is_error then new_names else new_columns = self.columns.map_with_index i->c->(c.rename (new_names.at i)) @@ -493,7 +494,7 @@ type Table Nothing -> Nothing _ -> val.to_text new_names = self.columns.map mapper - self.take_end (self.length - 1) . rename_columns (Column_Mapping.By_Position new_names) on_problems=on_problems + self.take_end (self.length - 1) . rename_columns (Column_Name_Mapping.By_Position new_names) on_problems=on_problems ## ALIAS group, summarize @@ -1059,12 +1060,13 @@ type Table specific type and uses the default settings for that type to be used. Details of this type are below. - on_existing_file: Specified how to handle if the file already exists. - - column_matching: Specifies how to map columns against an existing file. - If `Column_Matching.By_Name` - the columns are mapped by name against - an existing file. - If `Column_Matching.By_Position` - the columns are mapped by position - against an existing file. - If there is a mismatch, then a `Column_Mismatch` error is raised. + - match_columns: Specifies how to match columns against an existing file. + If `Match_Columns.By_Name` - the columns are mapped by name against an + existing file. If there is a mismatch, then a `Column_Name_Mismatch` + error is raised. + If `Match_Columns.By_Position` - the columns are mapped by position + against an existing file. If there is a mismatch, then a + `Column_Count_Mismatch` error is raised. - on_problems: Specifies how to handle if a problem occurs, raising as a warning by default. The specific issues depend on the `File_Format` argument. @@ -1108,9 +1110,9 @@ type Table import Standard.Table example_to_xlsx = Examples.inventory_table.write (Enso_Project.data / "example_xlsx_output.xlsx") File_Format.Excel - write : File|Text -> File_Format -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | Io_Error - write path format=File_Format.Auto on_existing_file=Existing_File_Behavior.Backup column_mapping=Column_Mapping.By_Name on_problems=Report_Warning = - format.write_table (File.new path) self on_existing_file column_mapping on_problems + write : File|Text -> File_Format -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | Io_Error + write path format=File_Format.Auto on_existing_file=Existing_File_Behavior.Backup match_columns=Match_Columns.By_Name on_problems=Report_Warning = + format.write_table (File.new path) self on_existing_file match_columns on_problems ## Creates a text representation of the table using the CSV format. to_csv : Text diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso index 135ba6e68c..b509e10a87 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso @@ -187,3 +187,18 @@ type Range_Exceeded message Range_Exceeded.to_display_text : Text Range_Exceeded.to_display_text = self.message + +## Indicates that the existing table has a different number of columns to the + new table. +type Column_Count_Mismatch expected actual + +Column_Count_Mismatch.to_display_text : Text +Column_Count_Mismatch.to_display_text = + "Expected " + self.expected.to_text + " columns, got " + self.actual.to_text + "." + +## Indicates that the existing table has a different set of column names to the + new table. +type Column_Name_Mismatch expected actual message + +Column_Name_Mismatch.to_display_text : Text +Column_Name_Mismatch.to_display_text = self.message diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso index ed20dfb25f..8c4eb8bd59 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso @@ -8,7 +8,7 @@ from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Prob import Standard.Table.Data.Position from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Too_Many_Column_Names_Provided, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, No_Input_Columns_Selected from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Name, By_Index, By_Column -import Standard.Table.Data.Column_Mapping +import Standard.Table.Data.Column_Name_Mapping import Standard.Table.Internal.Unique_Name_Strategy import Standard.Table.Internal.Problem_Builder import Standard.Base.Data.Ordering.Natural_Order @@ -129,7 +129,7 @@ reorder_columns internal_columns selector position on_problems = operation. By default, a warning is issued, but the operation proceeds. If set to `Report_Error`, the operation fails with a dataflow error. If set to `Ignore`, the operation proceeds without errors or warnings. -rename_columns : Vector -> Column_Mapping -> Problem_Behavior -> Map +rename_columns : Vector -> Column_Name_Mapping -> Problem_Behavior -> Map rename_columns internal_columns mapping on_problems = unique = Unique_Name_Strategy.new col_count = internal_columns.length @@ -158,9 +158,9 @@ rename_columns internal_columns mapping on_problems = new_names mapped = case mapping of - Column_Mapping.By_Column vec -> name_mapper (vec.map r-> [r.at 0 . name, r.at 1]) (Text_Matcher case_sensitive=True) - Column_Mapping.By_Name map ms -> name_mapper map.to_vector ms - Column_Mapping.By_Index map -> + Column_Name_Mapping.By_Column vec -> name_mapper (vec.map r-> [r.at 0 . name, r.at 1]) (Text_Matcher case_sensitive=True) + Column_Name_Mapping.By_Name map ms -> name_mapper map.to_vector ms + Column_Name_Mapping.By_Index map -> good_indices = here.validate_indices col_count map.keys problem_builder index_map = Map.from_vector <| good_indices.map p->[p.at 0, map.get (p.at 1)] @@ -170,7 +170,7 @@ rename_columns internal_columns mapping on_problems = if target.is_nothing then Nothing else unique.make_unique target new_names - Column_Mapping.By_Position vec -> + Column_Name_Mapping.By_Position vec -> good_names = case vec.length > col_count of True -> problem_builder.report_other_warning (Too_Many_Column_Names_Provided (vec.drop_start col_count)) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index 2a37b7b0e2..e006840e2e 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -5,16 +5,20 @@ import Standard.Base.System.File.Option from Standard.Table.Io.File_Format import Infer import Standard.Table.Data.Table -from Standard.Table.Error as Error_Module import Invalid_Location, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Range_Exceeded, Existing_Data +from Standard.Table.Error as Error_Module import Invalid_Location, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch import Standard.Base.Error.Common as Errors +import Standard.Table.Data.Match_Columns polyglot java import org.enso.table.excel.ExcelRange as Java_Range polyglot java import org.enso.table.excel.ExcelHeaders polyglot java import org.enso.table.read.ExcelReader polyglot java import org.enso.table.write.ExcelWriter +polyglot java import org.enso.table.write.ExistingDataMode polyglot java import org.enso.table.error.ExistingDataException polyglot java import org.enso.table.error.RangeExceededException polyglot java import org.enso.table.error.InvalidLocationException +polyglot java import org.enso.table.error.ColumnCountMismatchException +polyglot java import org.enso.table.error.ColumnNameMismatchException polyglot java import java.lang.IllegalArgumentException polyglot java import java.lang.IllegalStateException @@ -195,28 +199,35 @@ read_excel file section headers on_problems xls_format=False = Excel_Range _ -> ExcelReader.readRange stream address.java_range (here.make_java_headers headers) skip_rows row_limit xls_format Text -> ExcelReader.readRangeByName stream address (here.make_java_headers headers) skip_rows row_limit xls_format - here.read_excel_file file reader + here.handle_reader file reader -write_excel : File -> Table -> Existing_File_Behavior -> (Sheet | Cell_Range) -> (Boolean|Infer) -> Problem_Behavior -> Boolean -write_excel file table on_existing_file section headers _ xls_format=False = - if on_existing_file == Existing_File_Behavior.Append then Errors.unimplemented "Appending to an existing File_Format.Delimited file is not implemented yet." else - workbook = if file.exists.not then ExcelWriter.createWorkbook xls_format else - here.read_excel_file file stream->(ExcelReader.getWorkbook stream xls_format) +## PRIVATE + Writes a Table to an Excel file. - replace = (on_existing_file == Existing_File_Behavior.Overwrite) || (on_existing_file == Existing_File_Behavior.Backup) - java_headers = here.make_java_headers headers - if ExcelWriter.getEnsoToTextCallback == Nothing then ExcelWriter.getEnsoToTextCallback (.to_text) - result = here.handle_writer <| case section of - Sheet sheet skip_rows row_limit -> - ExcelWriter.writeTableToSheet workbook sheet replace skip_rows table.java_table row_limit java_headers - Cell_Range address skip_rows row_limit -> case address of - Excel_Range _ -> ExcelWriter.writeTableToRange workbook address.java_range replace skip_rows table.java_table row_limit java_headers - Text -> ExcelWriter.writeTableToRange workbook address replace skip_rows table.java_table row_limit java_headers + Arguments: +write_excel : File -> Table -> Existing_File_Behavior -> (Sheet | Cell_Range) -> (Boolean|Infer) -> Match_Columns -> Problem_Behavior -> Boolean +write_excel file table on_existing_file section headers match_columns _ xls_format=False = + workbook = if file.exists.not then ExcelWriter.createWorkbook xls_format else + here.handle_reader file stream->(ExcelReader.getWorkbook stream xls_format) - if result.is_error then result else - write_stream stream = stream.with_java_stream java_stream-> - workbook.write java_stream - on_existing_file.write file write_stream + existing_data_mode = here.make_java_existing_data_mode on_existing_file match_columns + java_headers = here.make_java_headers headers + ExcelWriter.setEnsoToTextCallbackIfUnset (.to_text) + result = here.handle_writer <| case section of + Sheet sheet skip_rows row_limit -> + ExcelWriter.writeTableToSheet workbook sheet existing_data_mode skip_rows table.java_table row_limit java_headers + Cell_Range address skip_rows row_limit -> case address of + Excel_Range java_range -> ExcelWriter.writeTableToRange workbook java_range existing_data_mode skip_rows table.java_table row_limit java_headers + Text -> ExcelWriter.writeTableToRange workbook address existing_data_mode skip_rows table.java_table row_limit java_headers + + if result.is_error then result else + write_stream stream = stream.with_java_stream java_stream-> + workbook.write java_stream + case on_existing_file of + Existing_File_Behavior.Append -> + ## Special handling - have successfully added the extra sheet/range so now overwrite file with backup. + Existing_File_Behavior.Backup.write file write_stream + _ -> on_existing_file.write file write_stream ## PRIVATE prepare_reader_table : Problem_Behavior -> Any -> Table @@ -237,7 +248,19 @@ make_java_headers headers = case headers of False -> ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES ## PRIVATE -read_excel_file file reader = +make_java_existing_data_mode : Existing_File_Behavior -> Match_Columns -> ExcelWriter.ExistingDataMode +make_java_existing_data_mode on_existing_file match_columns = case on_existing_file of + Existing_File_Behavior.Error -> ExistingDataMode.ERROR + Existing_File_Behavior.Overwrite -> ExistingDataMode.REPLACE + Existing_File_Behavior.Backup -> ExistingDataMode.REPLACE + Existing_File_Behavior.Append -> case match_columns of + Match_Columns.By_Name -> ExistingDataMode.APPEND_BY_NAME + Match_Columns.By_Position -> ExistingDataMode.APPEND_BY_INDEX + +## PRIVATE + Handle and map the Java errors when reading an Excel file +handle_reader : File -> (Input_Stream -> (Table | Vector)) -> (Table | Vector) +handle_reader file reader = bad_format caught_panic = Error.throw (File.Io_Error file caught_panic.payload.cause.getMessage) handle_bad_format = Panic.catch UnsupportedFileFormatException handler=bad_format @@ -249,6 +272,7 @@ read_excel_file file reader = stream.with_java_stream reader ## PRIVATE + Handle and map the Java errors when writing an Excel file handle_writer ~writer = bad_location caught_panic = Error.throw (Invalid_Location caught_panic.payload.cause.getCause) handle_bad_location = Panic.catch InvalidLocationException handler=bad_location @@ -259,7 +283,23 @@ handle_writer ~writer = throw_existing_data caught_panic = Error.throw (Existing_Data caught_panic.payload.cause.getMessage) handle_existing_data = Panic.catch ExistingDataException handler=throw_existing_data + throw_column_count_mismatch caught_panic = + cause = caught_panic.payload.cause + Error.throw (Column_Count_Mismatch cause.getExpected cause.getActual) + handle_column_count_mismatch = Panic.catch ColumnCountMismatchException handler=throw_column_count_mismatch + + throw_column_name_mismatch caught_panic = + cause = caught_panic.payload.cause + Error.throw (Column_Name_Mismatch (Vector.Vector cause.getMissing) (Vector.Vector cause.getExtras) cause.getMessage) + handle_column_name_mismatch = Panic.catch ColumnNameMismatchException handler=throw_column_name_mismatch + + ## Illegal argument can occur if appending in an invalid mode + illegal_argument caught_panic = Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage caught_panic.payload.cause) + handle_illegal_argument = Panic.catch IllegalArgumentException handler=illegal_argument + + ## Should be impossible - occurs if no fallback serializer is provided. throw_illegal_state caught_panic = Panic.throw (Illegal_State_Error caught_panic.payload.cause.getMessage) handle_illegal_state = Panic.catch IllegalStateException handler=throw_illegal_state - handle_illegal_state <| handle_bad_location <| handle_range_exceeded <| handle_existing_data <| writer + handle_illegal_state <| handle_column_name_mismatch <| handle_column_count_mismatch <| handle_bad_location <| + handle_illegal_argument <| handle_range_exceeded <| handle_existing_data <| writer diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso index ff663e9623..cc91bff896 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso @@ -2,6 +2,7 @@ from Standard.Base import all import Standard.Table import Standard.Base.Error.Common as Errors +import Standard.Table.Data.Match_Columns from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding import Standard.Base.Runtime.Ref @@ -47,10 +48,10 @@ type Auto materialised.read file on_problems ## Implements the `Table.write` for this `File_Format`. - write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing - write_table file table on_existing_file column_mapping on_problems = + write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing + write_table file table on_existing_file match_columns on_problems = materialised = self.materialise file - materialised.write_table file table on_existing_file column_mapping on_problems + materialised.write_table file table on_existing_file match_columns on_problems ## Reads the file to a `Vector` of bytes. type Bytes @@ -62,7 +63,7 @@ type Bytes file.read_bytes ## Implements the `Table.write` for this `File_Format`. - write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing + write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing write_table _ _ _ _ _ = Error.throw (Illegal_Argument_Error "Saving a Table as Bytes is not supported.") @@ -76,7 +77,7 @@ type Plain_Text file.read_text self.encoding on_problems ## Implements the `Table.write` for this `File_Format`. - write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing + write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing write_table _ _ _ _ _ = Error.throw (Illegal_Argument_Error "Saving a Table as Plain_Text is not directly supported. You may convert the Table to a Text using `Text.from` and then use `Text.write` to write it.") @@ -116,7 +117,7 @@ type Delimited Delimited_Reader.read_file self file on_problems ## Implements the `Table.write` for this `File_Format`. - write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing + write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing write_table file table on_existing_file _ on_problems = Delimited_Writer.write_file table self file on_existing_file on_problems @@ -190,13 +191,14 @@ type Excel Excel_Module.read_excel file self.section self.headers on_problems format ## Implements the `Table.write` for this `File_Format`. - write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing - write_table file table on_existing_file _ on_problems = + write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing + write_table file table on_existing_file match_columns on_problems = format = Excel.is_xls_format self.xls_format file + case self.section of Excel_Module.Sheet_Names -> Error.throw (Illegal_Argument_Error "Sheet_Names cannot be used for `write`.") Excel_Module.Range_Names -> Error.throw (Illegal_Argument_Error "Range_Names cannot be used for `write`.") - _ -> Excel_Module.write_excel file table on_existing_file self.section self.headers on_problems format + _ -> Excel_Module.write_excel file table on_existing_file self.section self.headers match_columns on_problems format ## PRIVATE Resolve the xls_format setting to a boolean. diff --git a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java index 20ea2a1927..f060d24550 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoFile.java @@ -19,6 +19,8 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.file.CopyOption; import java.nio.file.OpenOption; +import java.time.ZonedDateTime; +import java.time.ZoneOffset; /** * A wrapper for {@link TruffleFile} objects exposed to the language. For methods documentation @@ -64,6 +66,20 @@ public class EnsoFile implements TruffleObject { return truffleFile.exists(); } + @Builtin.Method(name = "creation_time_builtin") + @Builtin.WrapException(from = IOException.class, to = PolyglotError.class, propagate = true) + @Builtin.ReturningGuestObject + public ZonedDateTime getCreationTime() throws IOException { + return ZonedDateTime.ofInstant(truffleFile.getCreationTime().toInstant(), ZoneOffset.UTC); + } + + @Builtin.Method(name = "last_modified_time_builtin") + @Builtin.WrapException(from = IOException.class, to = PolyglotError.class, propagate = true) + @Builtin.ReturningGuestObject + public ZonedDateTime getLastModifiedTime() throws IOException { + return ZonedDateTime.ofInstant(truffleFile.getLastModifiedTime().toInstant(), ZoneOffset.UTC); + } + @Builtin.Method(name = "parent") public EnsoFile getParent() { return new EnsoFile(this.truffleFile.getParent()); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java index 146cb6caee..87f5c473c1 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java @@ -1,7 +1,6 @@ package org.enso.table.data.column.storage; import java.util.BitSet; -import java.util.Comparator; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.MapOperation; @@ -217,8 +216,7 @@ public class BoolStorage extends Storage { @Override public Storage runZip(BoolStorage storage, Storage arg) { - if (arg instanceof BoolStorage) { - BoolStorage v = (BoolStorage) arg; + if (arg instanceof BoolStorage v) { BitSet missing = v.isMissing.get(0, storage.size); missing.or(storage.isMissing); BitSet out = v.values.get(0, storage.size); @@ -261,8 +259,7 @@ public class BoolStorage extends Storage { @Override public Storage runZip(BoolStorage storage, Storage arg) { - if (arg instanceof BoolStorage) { - BoolStorage v = (BoolStorage) arg; + if (arg instanceof BoolStorage v) { BitSet missing = v.isMissing.get(0, storage.size); missing.or(storage.isMissing); BitSet out = v.values.get(0, storage.size); @@ -302,12 +299,6 @@ public class BoolStorage extends Storage { return mask; } - @SuppressWarnings("unchecked") - @Override - public Comparator getDefaultComparator() { - return Comparator.naturalOrder(); - } - @Override public BoolStorage slice(int offset, int limit) { int newSize = Math.min(size - offset, limit); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java index 9d67d3da86..cf0039eeaf 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java @@ -1,7 +1,6 @@ package org.enso.table.data.column.storage; import java.util.BitSet; -import java.util.Comparator; import org.enso.table.data.column.builder.object.NumericBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; @@ -161,11 +160,6 @@ public class DoubleStorage extends NumericStorage { return new DoubleStorage(newData, total, newMissing); } - @Override - public Comparator getDefaultComparator() { - return Comparator.naturalOrder(); - } - public BitSet getIsMissing() { return isMissing; } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java index 9c041f7f99..bdc412b60f 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java @@ -1,6 +1,7 @@ package org.enso.table.data.column.storage; -import java.util.*; +import java.util.BitSet; +import java.util.OptionalLong; import java.util.stream.LongStream; import org.enso.table.data.column.builder.object.NumericBuilder; @@ -220,12 +221,6 @@ public class LongStorage extends NumericStorage { return new LongStorage(newData, total, newMissing); } - @SuppressWarnings("unchecked") - @Override - public Comparator getDefaultComparator() { - return Comparator.naturalOrder(); - } - public BitSet getIsMissing() { return isMissing; } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java index a0ca0df248..d9d5ae10ff 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java @@ -6,7 +6,6 @@ import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import java.util.BitSet; -import java.util.Comparator; /** A column storing arbitrary objects. */ public class ObjectStorage extends Storage { @@ -123,11 +122,6 @@ public class ObjectStorage extends Storage { return data; } - @Override - public Comparator getDefaultComparator() { - return null; - } - private static MapOpStorage buildOps() { MapOpStorage ops = new MapOpStorage<>(); ops.add( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java index 962ea03776..804d44f002 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java @@ -266,12 +266,6 @@ public abstract class Storage { */ public abstract Storage countMask(int[] counts, int total); - /** - * @return a comparator comparing objects in this storage in a natural order. May be {@code null} - * to specify no natural ordering. - */ - public abstract Comparator getDefaultComparator(); - /** @return a copy of the storage containing a slice of the original data */ public abstract Storage slice(int offset, int limit); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java index 098e748848..375d845cd6 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java @@ -1,7 +1,10 @@ package org.enso.table.data.column.storage; import java.lang.reflect.Array; -import java.util.*; +import java.util.Collection; +import java.util.List; +import java.util.ListIterator; +import java.util.Objects; /** * Wraps a storage in a list. Used for exposing a polyglot array interface back to Enso. This list @@ -70,10 +73,9 @@ public class StorageListView implements List { @SuppressWarnings("rawtypes") @Override public boolean equals(Object obj) { - if (!(obj instanceof List)) { + if (!(obj instanceof List that)) { return false; } - List that = (List) obj; if (that.size() != size()) { return false; } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java index 67610da3cf..022ecf736f 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java @@ -1,7 +1,6 @@ package org.enso.table.data.column.storage; import java.util.BitSet; -import java.util.Comparator; import org.enso.table.data.column.builder.object.StringBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.MapOperation; @@ -77,11 +76,6 @@ public class StringStorage extends ObjectStorage { return new StringStorage(storage.getData(), total); } - @Override - public Comparator getDefaultComparator() { - return Comparator.naturalOrder(); - } - private static MapOpStorage buildOps() { MapOpStorage t = ObjectStorage.ops.makeChild(); t.add( diff --git a/std-bits/table/src/main/java/org/enso/table/error/ColumnCountMismatchException.java b/std-bits/table/src/main/java/org/enso/table/error/ColumnCountMismatchException.java new file mode 100644 index 0000000000..2bedcc123a --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/error/ColumnCountMismatchException.java @@ -0,0 +1,20 @@ +package org.enso.table.error; + +public class ColumnCountMismatchException extends Exception { + private final int expected; + private final int actual; + + public ColumnCountMismatchException(int expected, int actual) { + super(String.format("Expected %d columns, got %d.", expected, actual)); + this.expected = expected; + this.actual = actual; + } + + public int getExpected() { + return expected; + } + + public int getActual() { + return actual; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/error/ColumnNameMismatchException.java b/std-bits/table/src/main/java/org/enso/table/error/ColumnNameMismatchException.java new file mode 100644 index 0000000000..0e7657b0b8 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/error/ColumnNameMismatchException.java @@ -0,0 +1,30 @@ +package org.enso.table.error; + +import java.util.Arrays; + +public class ColumnNameMismatchException extends Exception { + private final String[] missing; + private final String[] extras; + + public ColumnNameMismatchException(String[] missingNames, String[] extraNames) { + super( + "Columns mismatch." + + (missingNames.length == 0 + ? "" + : " Missing from new data: " + Arrays.toString(missingNames)) + + (extraNames.length == 0 + ? "" + : " Extras in new data: " + Arrays.toString(extraNames))); + + this.missing = missingNames; + this.extras = extraNames; + } + + public String[] getMissing() { + return missing; + } + + public String[] getExtras() { + return extras; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/excel/ExcelRange.java b/std-bits/table/src/main/java/org/enso/table/excel/ExcelRange.java index fd1173e878..de6ec7d084 100644 --- a/std-bits/table/src/main/java/org/enso/table/excel/ExcelRange.java +++ b/std-bits/table/src/main/java/org/enso/table/excel/ExcelRange.java @@ -1,6 +1,6 @@ package org.enso.table.excel; -import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.util.CellReference; import java.util.Optional; @@ -324,6 +324,18 @@ public class ExcelRange { return isWholeColumn() ? Integer.MAX_VALUE : bottomRow - topRow + 1; } + public int getLastNonEmptyRow(ExcelSheet sheet) { + int lastRow = + Math.min(sheet.getLastRow(), isWholeColumn() ? sheet.getLastRow() : bottomRow) + 1; + + while (lastRow > topRow + && sheet.get(lastRow - 1).isEmpty(leftColumn, isWholeRow() ? -1 : rightColumn)) { + lastRow--; + } + + return lastRow - 1; + } + public boolean isSingleCell() { return this.singleCell; } @@ -343,4 +355,14 @@ public class ExcelRange { return sheetNameEscaped + "!" + range; } + + public ExcelRange getAbsoluteRange(Workbook workbook) { + int topRow = isWholeColumn() ? 1 : getTopRow(); + int bottomRow = + isWholeColumn() ? workbook.getSpreadsheetVersion().getMaxRows() : getBottomRow(); + int leftColumn = isWholeRow() ? 1 : getLeftColumn(); + int rightColumn = + isWholeRow() ? workbook.getSpreadsheetVersion().getMaxColumns() : getRightColumn(); + return new ExcelRange(getSheetName(), leftColumn, topRow, rightColumn, bottomRow); + } } diff --git a/std-bits/table/src/main/java/org/enso/table/excel/ExcelRow.java b/std-bits/table/src/main/java/org/enso/table/excel/ExcelRow.java index d75158586f..3ff49eec2d 100644 --- a/std-bits/table/src/main/java/org/enso/table/excel/ExcelRow.java +++ b/std-bits/table/src/main/java/org/enso/table/excel/ExcelRow.java @@ -73,7 +73,9 @@ public class ExcelRow { public boolean isEmpty(int start, int end) { int currentEnd = end == -1 ? getLastColumn() : end; - for (int column = start; column <= currentEnd; column++) { + for (int column = Math.max(getFirstColumn(), start); + column <= Math.min(getLastColumn(), currentEnd); + column++) { if (!isEmpty(column)) { return false; } diff --git a/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java b/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java index af685196e1..9c49789ce3 100644 --- a/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java @@ -40,19 +40,11 @@ public class OrderBuilder { * Builds an index-comparing comparator, that will sort array indexes according to the specified * ordering of the underlying column. * - * @param fallbackComparator a base value comparator, used in case the column does not define a - * natural ordering. * @return a comparator with properties described above */ - public Comparator toComparator(Comparator fallbackComparator) { + public Comparator toComparator() { final Storage storage = column.getStorage(); Comparator itemCmp = customComparator; - if (itemCmp == null) { - itemCmp = column.getStorage().getDefaultComparator(); - } - if (itemCmp == null) { - itemCmp = fallbackComparator; - } if (!ascending) { itemCmp = itemCmp.reversed(); } @@ -63,9 +55,7 @@ public class OrderBuilder { } final Comparator cmp = itemCmp; - Comparator result = - (i, j) -> cmp.compare(storage.getItemBoxed(i), storage.getItemBoxed(j)); - return result; + return (i, j) -> cmp.compare(storage.getItemBoxed(i), storage.getItemBoxed(j)); } } @@ -77,18 +67,12 @@ public class OrderBuilder { * elements are then internally reordered according to the second rule etc. The ordering is * stable, i.e. if no rule disambiguates the ordering, the original position in the storage is * used instead. - * @param fallbackComparator a comparator that should be used for columns that do not define a - * natural ordering. * @return an order mask that will result in sorting any storage according to the specified rules. */ - public static OrderMask buildOrderMask( - List rules, Comparator fallbackComparator) { + public static OrderMask buildOrderMask(List rules) { int size = rules.get(0).column.getSize(); Comparator comparator = - rules.stream() - .map(rule -> rule.toComparator(fallbackComparator)) - .reduce(Comparator::thenComparing) - .get(); + rules.stream().map(OrderRule::toComparator).reduce(Comparator::thenComparing).get(); int[] positions = IntStream.range(0, size).boxed().sorted(comparator).mapToInt(i -> i).toArray(); diff --git a/std-bits/table/src/main/java/org/enso/table/util/ColumnMapper.java b/std-bits/table/src/main/java/org/enso/table/util/ColumnMapper.java new file mode 100644 index 0000000000..9889d4cb5a --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/util/ColumnMapper.java @@ -0,0 +1,66 @@ +package org.enso.table.util; + +import org.enso.table.data.table.Column; +import org.enso.table.data.table.Table; +import org.enso.table.error.ColumnCountMismatchException; +import org.enso.table.error.ColumnNameMismatchException; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; + +public class ColumnMapper { + /** + * Match the table's columns against the list of column names and return a reordered table. + * + * @param table the table of data to append. + * @param columnNames set of existing column names. + * @return a new table with columns reordered to match to the given column names. + * @throws ColumnNameMismatchException if the names in the new table do not match the existing + * set. + */ + public static Table mapColumnsByName(Table table, String[] columnNames) + throws ColumnNameMismatchException { + Column[] columns = new Column[columnNames.length]; + Set extras = + Arrays.stream(table.getColumns()).map(Column::getName).collect(Collectors.toSet()); + Set missing = new HashSet<>(); + + for (int i = 0; i < columnNames.length; i++) { + String name = columnNames[i]; + Column column = table.getColumnByName(name); + if (column == null) { + missing.add(name); + } else { + extras.remove(name); + columns[i] = column; + } + } + + if (missing.isEmpty() && extras.isEmpty()) { + return new Table(columns); + } else { + throw new ColumnNameMismatchException( + missing.toArray(String[]::new), extras.toArray(String[]::new)); + } + } + + /** + * Check the column count of the table, if matches return otherwise throw an exception. + * + * @param table the table of data to append. + * @param columnCount exisitng column count. + * @return the input table if column count matches. + * @throws ColumnCountMismatchException if the column counts do not match. + */ + public static Table mapColumnsByPosition(Table table, int columnCount) + throws ColumnCountMismatchException { + int column_length = table.getColumns().length; + if (column_length == columnCount) { + return table; + } + + throw new ColumnCountMismatchException(columnCount, column_length); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java b/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java index 0555afe632..1c78d709c5 100644 --- a/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java +++ b/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java @@ -15,13 +15,12 @@ import org.enso.table.data.column.storage.LongStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.table.Column; import org.enso.table.data.table.Table; -import org.enso.table.error.ExistingDataException; -import org.enso.table.error.RangeExceededException; -import org.enso.table.error.InvalidLocationException; +import org.enso.table.error.*; import org.enso.table.excel.ExcelHeaders; import org.enso.table.excel.ExcelRange; import org.enso.table.excel.ExcelRow; import org.enso.table.excel.ExcelSheet; +import org.enso.table.util.ColumnMapper; import java.time.LocalDate; import java.time.LocalDateTime; @@ -38,12 +37,14 @@ public class ExcelWriter { return ensoToTextCallback; } - public static void getEnsoToTextCallback(Function callback) { - ensoToTextCallback = callback; + public static void setEnsoToTextCallbackIfUnset(Function callback) { + if (ensoToTextCallback == null) { + ensoToTextCallback = callback; + } } - public static void writeTableToSheet(Workbook workbook, int sheetIndex, boolean replace, int firstRow, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers) - throws ExistingDataException, IllegalStateException { + public static void writeTableToSheet(Workbook workbook, int sheetIndex, ExistingDataMode existingDataMode, int firstRow, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers) + throws InvalidLocationException, RangeExceededException, ExistingDataException, IllegalStateException, ColumnNameMismatchException, ColumnCountMismatchException { if (sheetIndex == 0 || sheetIndex > workbook.getNumberOfSheets()) { int i = 1; while (workbook.getSheet("Sheet" + i) != null) { @@ -56,13 +57,9 @@ public class ExcelWriter { } writeTableToSheet(workbook, sheet, firstRow, 1, table, rowLimit, headers != ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES); - } else if (replace) { - if (headers == ExcelHeaders.HeaderBehavior.INFER) { - ExcelSheet excelSheet = new ExcelSheet(workbook, sheetIndex); - headers = shouldWriteHeaders(excelSheet, firstRow + 1, 1, -1) - ? ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS - : ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES; - } + } else if (existingDataMode == ExistingDataMode.REPLACE) { + headers = headers != ExcelHeaders.HeaderBehavior.INFER ? headers : + shouldWriteHeaders(new ExcelSheet(workbook, sheetIndex), firstRow + 1, 1, -1); String sheetName = workbook.getSheetName(sheetIndex - 1); workbook.removeSheetAt(sheetIndex - 1); @@ -70,35 +67,39 @@ public class ExcelWriter { Sheet sheet = workbook.createSheet(sheetName); workbook.setSheetOrder(sheetName, sheetIndex - 1); writeTableToSheet(workbook, sheet, firstRow, 1, table, rowLimit, headers != ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES); - } else { + } else if (existingDataMode == ExistingDataMode.ERROR){ throw new ExistingDataException("Sheet already exists, and cannot be replaced in current mode."); + } else { + // In Append Mode, so lets go to a Range based approach. + ExcelRange range = new ExcelRange(workbook.getSheetName(sheetIndex - 1), 1, 1); + writeTableToRange(workbook, range, existingDataMode, firstRow, table, rowLimit, headers); } } - public static void writeTableToSheet(Workbook workbook, String sheetName, boolean replace, int firstRow, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers) - throws ExistingDataException, IllegalStateException { + public static void writeTableToSheet(Workbook workbook, String sheetName, ExistingDataMode existingDataMode, int firstRow, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers) + throws InvalidLocationException, RangeExceededException, ExistingDataException, IllegalStateException, ColumnNameMismatchException, ColumnCountMismatchException { int sheetIndex = workbook.getNumberOfSheets() == 0 ? -1 : workbook.getSheetIndex(sheetName); if (sheetIndex == -1) { writeTableToSheet(workbook, workbook.createSheet(sheetName), firstRow, 1, table, rowLimit, headers != ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES); - } else if (replace) { - if (headers == ExcelHeaders.HeaderBehavior.INFER) { - ExcelSheet excelSheet = new ExcelSheet(workbook, sheetIndex); - headers = shouldWriteHeaders(excelSheet, firstRow + 1, 1, -1) - ? ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS - : ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES; - } + } else if (existingDataMode == ExistingDataMode.REPLACE) { + headers = headers != ExcelHeaders.HeaderBehavior.INFER ? headers : + shouldWriteHeaders(new ExcelSheet(workbook, sheetIndex), firstRow + 1, 1, -1); workbook.removeSheetAt(sheetIndex); Sheet sheet = workbook.createSheet(sheetName); workbook.setSheetOrder(sheetName, sheetIndex); writeTableToSheet(workbook, sheet, firstRow, 1, table, rowLimit, headers != ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES); + } else if (existingDataMode == ExistingDataMode.ERROR){ + throw new ExistingDataException("Sheet already exists, and cannot be replaced in current mode."); } else { - throw new ExistingDataException("Sheet '" + sheetName + "' already exists, and cannot be replaced in current mode."); + // In Append Mode, so switch to appending from the top left cell of the sheet as this is equivalent to appending to the sheet. + ExcelRange range = new ExcelRange(sheetName, 1, 1); + writeTableToRange(workbook, range, existingDataMode, firstRow, table, rowLimit, headers); } } - public static void writeTableToRange(Workbook workbook, String rangeNameOrAddress, boolean replace, int skipRows, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers) - throws InvalidLocationException, IllegalStateException, RangeExceededException, ExistingDataException { + public static void writeTableToRange(Workbook workbook, String rangeNameOrAddress, ExistingDataMode existingDataMode, int skipRows, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers) + throws InvalidLocationException, IllegalStateException, RangeExceededException, ExistingDataException, ColumnNameMismatchException, ColumnCountMismatchException { Name name = workbook.getName(rangeNameOrAddress); ExcelRange excelRange; try { @@ -106,11 +107,11 @@ public class ExcelWriter { } catch (IllegalArgumentException e) { throw new InvalidLocationException("Invalid range name or address '" + rangeNameOrAddress + "'."); } - writeTableToRange(workbook, excelRange, replace, skipRows, table, rowLimit, headers); + writeTableToRange(workbook, excelRange, existingDataMode, skipRows, table, rowLimit, headers); } - public static void writeTableToRange(Workbook workbook, ExcelRange range, boolean replace, int skipRows, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers) - throws InvalidLocationException, IllegalStateException, RangeExceededException, ExistingDataException { + public static void writeTableToRange(Workbook workbook, ExcelRange range, ExistingDataMode existingDataMode, int skipRows, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers) + throws InvalidLocationException, IllegalStateException, RangeExceededException, ExistingDataException, ColumnNameMismatchException, ColumnCountMismatchException { int sheetIndex = workbook.getSheetIndex(range.getSheetName()); if (sheetIndex == -1) { throw new InvalidLocationException("Unknown sheet '" + range.getSheetName() + "'."); @@ -127,66 +128,15 @@ public class ExcelWriter { } } - if (range.isSingleCell()) { - ExcelRange expanded = ExcelRange.expandSingleCell(range, sheet); + ExcelRange expanded = range.isSingleCell() ? ExcelRange.expandSingleCell(range, sheet) : range; + headers = headers != ExcelHeaders.HeaderBehavior.INFER ? headers : + shouldWriteHeaders(sheet, expanded.getTopRow(), expanded.getLeftColumn(), expanded.getRightColumn()); - if (headers == ExcelHeaders.HeaderBehavior.INFER) { - headers = shouldWriteHeaders(sheet, expanded.getTopRow(), expanded.getLeftColumn(), expanded.getRightColumn()) - ? ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS - : ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES; - } - - // Expand to cover required size - int rowCount = (headers == ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS ? 1 : 0) + table.rowCount(); - if (expanded.getColumnCount() < table.getColumns().length || expanded.getRowCount() < rowCount) { - expanded = new ExcelRange( - expanded.getSheetName(), - expanded.getLeftColumn(), - expanded.getTopRow(), - Math.max(expanded.getRightColumn(), expanded.getLeftColumn() + table.getColumns().length - 1), - Math.max(expanded.getBottomRow(), expanded.getTopRow() + rowCount - 1)); - } - - checkExistingRange(workbook, expanded, replace, sheet); + if ((existingDataMode == ExistingDataMode.APPEND_BY_NAME || existingDataMode == ExistingDataMode.APPEND_BY_INDEX) && + rangeIsNotEmpty(workbook, expanded, sheet)) { + appendRangeWithTable(workbook, range, existingDataMode, table, rowLimit, headers, sheet, expanded); } else { - // Check Size of Range - int rowCount = Math.min(Math.min(workbook.getSpreadsheetVersion().getMaxRows() - range.getTopRow() + 1, rowLimit == null ? Integer.MAX_VALUE : rowLimit.intValue()), table.rowCount()); - if (range.getColumnCount() < table.getColumns().length || range.getRowCount() < rowCount) { - throw new RangeExceededException("Range is too small to fit all columns."); - } - - if (headers == ExcelHeaders.HeaderBehavior.INFER) { - headers = shouldWriteHeaders(sheet, range.getTopRow(), range.getLeftColumn(), range.isWholeRow() ? -1 : range.getRightColumn()) - ? ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS - : ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES; - } - - checkExistingRange(workbook, range, replace, sheet); - } - - writeTableToSheet(workbook, sheet.getSheet(), range.getTopRow() - 1, range.getLeftColumn(), table, rowLimit, headers != ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES); - } - - private static void checkExistingRange(Workbook workbook, ExcelRange range, boolean replace, ExcelSheet sheet) throws ExistingDataException { - int topRow = range.isWholeColumn() ? 1 : range.getTopRow(); - int bottomRow = range.isWholeColumn() ? workbook.getSpreadsheetVersion().getMaxRows() : range.getBottomRow(); - int leftColumn = range.isWholeRow() ? 1 : range.getLeftColumn(); - int rightColumn = range.isWholeRow() ? workbook.getSpreadsheetVersion().getMaxColumns() : range.getRightColumn(); - - for (int row = topRow; row <= bottomRow; row++) { - ExcelRow excelRow = sheet.get(row); - if (excelRow != null) { - for (int column = leftColumn; column <= rightColumn; column++) { - Cell cell = excelRow.get(column); - if (cell != null) { - if (replace) { - cell.setBlank(); - } else { - throw new ExistingDataException("Range is not empty, and cannot be replaced in current mode."); - } - } - } - } + updateRangeWithTable(workbook, expanded, range.isSingleCell(), existingDataMode, table, rowLimit, headers, sheet); } } @@ -199,6 +149,106 @@ public class ExcelWriter { return xls_format ? new HSSFWorkbook() : new XSSFWorkbook(); } + private static void appendRangeWithTable(Workbook workbook, ExcelRange range, ExistingDataMode existingDataMode, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers, ExcelSheet sheet, ExcelRange expanded) + throws RangeExceededException, ExistingDataException, ColumnNameMismatchException, ColumnCountMismatchException { + Table mappedTable = switch (existingDataMode) { + case APPEND_BY_INDEX -> ColumnMapper.mapColumnsByPosition(table, expanded.getColumnCount()); + case APPEND_BY_NAME -> { + if (headers == ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES) { + throw new IllegalArgumentException("Cannot append by name when headers are not present in the existing data."); + } + String[] currentHeaders = sheet.get(expanded.getTopRow()).getCellsAsText(expanded.getLeftColumn(), expanded.getRightColumn()); + yield ColumnMapper.mapColumnsByName(table, currentHeaders); + } + default -> + throw new IllegalArgumentException("Internal Error: appendRangeWithTable called with illegal existing data mode '" + existingDataMode + "'."); + }; + + if (range.isSingleCell()) { + int bottomRow = expanded.getBottomRow(); + int requiredRows = Math.min(mappedTable.rowCount(), rowLimit == null ? Integer.MAX_VALUE : rowLimit.intValue()); + expanded = new ExcelRange(expanded.getSheetName(), expanded.getLeftColumn(), bottomRow + 1, expanded.getRightColumn(), bottomRow + requiredRows); + } else { + int finalRow = expanded.getLastNonEmptyRow(sheet); + if (finalRow == expanded.getBottomRow()) { + throw new RangeExceededException("The range is already full."); + } + + expanded = new ExcelRange(expanded.getSheetName(), expanded.getLeftColumn(), finalRow + 1, expanded.getRightColumn(), expanded.getBottomRow()); + } + + updateRangeWithTable(workbook, expanded, false, existingDataMode, mappedTable, rowLimit, ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES, sheet); + } + + private static void updateRangeWithTable(Workbook workbook, ExcelRange range, boolean singleCell, ExistingDataMode existingDataMode, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers, ExcelSheet sheet) + throws RangeExceededException, ExistingDataException { + boolean writeHeaders = headers == ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS; + int requiredRows = Math.min(table.rowCount(), rowLimit == null ? Integer.MAX_VALUE : rowLimit.intValue()) + (writeHeaders ? 1 : 0); + + if (singleCell) { + range = new ExcelRange( + range.getSheetName(), + range.getLeftColumn(), + range.getTopRow(), + Math.max(range.getRightColumn(), range.getLeftColumn() + table.getColumns().length - 1), + Math.max(range.getBottomRow(), range.getTopRow() + requiredRows - 1)); + } + + int finalRow = range.isWholeColumn() ? workbook.getSpreadsheetVersion().getMaxRows() : range.getBottomRow(); + int availableRows = finalRow - range.getTopRow() + 1; + if (range.getColumnCount() < table.getColumns().length || availableRows < requiredRows) { + throw new RangeExceededException("Range is too small to fit all data."); + } + + if (existingDataMode == ExistingDataMode.REPLACE) { + clearRange(workbook, range, sheet); + } else if (rangeIsNotEmpty(workbook, range, sheet)) { + throw new ExistingDataException("Range is not empty, and cannot be replaced in current mode."); + } + + writeTableToSheet(workbook, sheet.getSheet(), range.getTopRow() - 1, range.getLeftColumn(), table, rowLimit, writeHeaders); + } + + /*** + * Checks if a range is empty. + * @param workbook The workbook to check. + * @param range The range to check. + * @param sheet Sheet containing the range. + * @return True if range is empty and clear is False, otherwise returns False. + */ + private static boolean rangeIsNotEmpty(Workbook workbook, ExcelRange range, ExcelSheet sheet) { + ExcelRange fullRange = range.getAbsoluteRange(workbook); + for (int row = fullRange.getTopRow(); row <= fullRange.getBottomRow(); row++) { + ExcelRow excelRow = sheet.get(row); + if (excelRow != null && !excelRow.isEmpty(fullRange.getLeftColumn(), fullRange.getRightColumn())) { + return true; + } + } + return false; + } + + /*** + * Clears a range of any content. + * @param workbook The workbook to clear. + * @param range The range to clear. + * @param sheet Sheet containing the range. + */ + private static void clearRange(Workbook workbook, ExcelRange range, ExcelSheet sheet) { + ExcelRange fullRange = range.getAbsoluteRange(workbook); + for (int row = fullRange.getTopRow(); row <= fullRange.getBottomRow(); row++) { + ExcelRow excelRow = sheet.get(row); + if (excelRow != null) { + for (int column = fullRange.getLeftColumn(); column <= fullRange.getRightColumn(); column++) { + Cell cell = excelRow.get(column); + if (cell != null) { + cell.setBlank(); + } + } + } + } + } + + private static void writeTableToSheet(Workbook workbook, Sheet sheet, int firstRow, int firstColumn, Table table, Long rowLimit, boolean headers) throws IllegalStateException { int rowCount = Math.min(Math.min(workbook.getSpreadsheetVersion().getMaxRows() - firstRow, rowLimit == null ? Integer.MAX_VALUE : rowLimit.intValue()), table.rowCount()); @@ -297,28 +347,30 @@ public class ExcelWriter { /** * Determines if headers should be written for the given range in {@code INFER} mode. * - * Unlike in the {@code ExcelReader}, if empty this will default to True. + * Unlike in the {@code ExcelReader}, if empty this will default to having headers. * @param excelSheet the Excel sheet to check. * @param topRow top row index (1-based) of the range to check. * @param startCol start column index (1-based) of the range to check. * @param endCol end column index (1-based) of the range to check. If -1 will continue until end of row. - * @return true if the range has headers. + * @return EXCEL_COLUMN_NAMES if the range has headers, otherwise USE_FIRST_ROW_AS_HEADERS. */ - private static boolean shouldWriteHeaders(ExcelSheet excelSheet, int topRow, int startCol, int endCol) { + private static ExcelHeaders.HeaderBehavior shouldWriteHeaders(ExcelSheet excelSheet, int topRow, int startCol, int endCol) { ExcelRow row = excelSheet.get(topRow); - // If the first row is missing or empty, return true as defaults to writing headers. + // If the first row is missing or empty, should write headers. if (row == null || row.isEmpty(startCol, endCol)) { - return true; + return ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS; } - // If the first row is not empty, check if all text. + // If the first row is not empty but not all text, should not write headers. if (row.getCellsAsText(startCol, endCol) == null) { - return false; + return ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES; } - // If the second row is missing or empty or contains text, return false. + // If the second row is missing, empty, or not all text, should write headers. ExcelRow nextRow = excelSheet.get(topRow + 1); - return (nextRow != null && nextRow.getCellsAsText(startCol, endCol) == null); + return (nextRow != null && nextRow.getCellsAsText(startCol, endCol) == null) + ? ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS + : ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES; } } diff --git a/std-bits/table/src/main/java/org/enso/table/write/ExistingDataMode.java b/std-bits/table/src/main/java/org/enso/table/write/ExistingDataMode.java new file mode 100644 index 0000000000..21c30d641c --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/write/ExistingDataMode.java @@ -0,0 +1,20 @@ +package org.enso.table.write; + +public enum ExistingDataMode { + /** Throw an ExistingDataException if the target already contains data. */ + ERROR, + /** The current data is replaced by the new data. */ + REPLACE, + /** + * The new data is appended to the current data based on column position. If the new data has a + * different number of columns than the existing data, a ColumnCountMismatch is exception is + * thrown. + */ + APPEND_BY_INDEX, + /** + * The new data is appended to the current data based on column name. If an existing column cannot + * be found or if a new column does not exist in the existing table, a ColumnNameMismatchException + * is thrown. + */ + APPEND_BY_NAME +} diff --git a/test/Table_Tests/data/TestSheet.xlsx b/test/Table_Tests/data/TestSheet.xlsx index 8a80b27916..79252b1947 100644 Binary files a/test/Table_Tests/data/TestSheet.xlsx and b/test/Table_Tests/data/TestSheet.xlsx differ diff --git a/test/Table_Tests/data/TestSheetOld.xls b/test/Table_Tests/data/TestSheetOld.xls index ff8fac5a59..5d438e2f7f 100644 Binary files a/test/Table_Tests/data/TestSheetOld.xls and b/test/Table_Tests/data/TestSheetOld.xls differ diff --git a/test/Table_Tests/src/Common_Table_Spec.enso b/test/Table_Tests/src/Common_Table_Spec.enso index b1231ea5f1..c3a9608f11 100644 --- a/test/Table_Tests/src/Common_Table_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Spec.enso @@ -3,7 +3,7 @@ import Standard.Test import Standard.Test.Problems import Standard.Base.Error.Problem_Behavior -import Standard.Table.Data.Column_Mapping +import Standard.Table.Data.Column_Name_Mapping from Standard.Table.Error as Table_Errors import all from Standard.Table.Data.Column_Selector as Column_Selector_Module import all from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering @@ -433,45 +433,45 @@ spec prefix table_builder test_selection pending=Nothing = Test.specify "should work as shown in the doc examples" <| expect_column_names ["FirstColumn", "beta", "gamma", "delta"] <| - table.rename_columns (Column_Mapping.By_Position ["FirstColumn"]) + table.rename_columns (Column_Name_Mapping.By_Position ["FirstColumn"]) Test.specify "should work by index" <| map = Map.from_vector [[0, "FirstColumn"], [-2, "Another"]] expect_column_names ["FirstColumn", "beta", "Another", "delta"] <| - table.rename_columns (Column_Mapping.By_Index map) + table.rename_columns (Column_Name_Mapping.By_Index map) Test.specify "should work by position" <| vec = ["one", "two", "three"] expect_column_names ["one", "two", "three", "delta"] <| - table.rename_columns (Column_Mapping.By_Position vec) + table.rename_columns (Column_Name_Mapping.By_Position vec) Test.specify "should work by name" <| map = Map.from_vector [["alpha", "FirstColumn"], ["delta", "Another"]] expect_column_names ["FirstColumn", "beta", "gamma", "Another"] <| - table.rename_columns (Column_Mapping.By_Name map (Text_Matcher True)) + table.rename_columns (Column_Name_Mapping.By_Name map (Text_Matcher True)) Test.specify "should work by name case-insensitively" <| map = Map.from_vector [["ALPHA", "FirstColumn"], ["DELTA", "Another"]] expect_column_names ["FirstColumn", "beta", "gamma", "Another"] <| - table.rename_columns (Column_Mapping.By_Name map (Text_Matcher Case_Insensitive)) + table.rename_columns (Column_Name_Mapping.By_Name map (Text_Matcher Case_Insensitive)) Test.specify "should work by name using regex" <| map = Map.from_vector [["a.*", "FirstColumn"]] expect_column_names ["FirstColumn", "beta", "gamma", "delta"] <| - table.rename_columns (Column_Mapping.By_Name map Regex_Matcher) + table.rename_columns (Column_Name_Mapping.By_Name map Regex_Matcher) Test.specify "should work by name using regex substitution" <| map = Map.from_vector [["a(.*)", "$1"]] expect_column_names ["lpha", "beta", "gamma", "delta"] <| - table.rename_columns (Column_Mapping.By_Name map Regex_Matcher) + table.rename_columns (Column_Name_Mapping.By_Name map Regex_Matcher) Test.specify "should work by column" <| vec = [[table.at "alpha", "FirstColumn"], [table.at "delta", "Another"]] expect_column_names ["FirstColumn", "beta", "gamma", "Another"] <| - table.rename_columns (Column_Mapping.By_Column vec) + table.rename_columns (Column_Name_Mapping.By_Column vec) Test.specify "should correctly handle problems: duplicate columns" <| - map = Column_Mapping.By_Column [[table.at "alpha", "FirstColumn"], [table.at "alpha", "Another"]] + map = Column_Name_Mapping.By_Column [[table.at "alpha", "FirstColumn"], [table.at "alpha", "Another"]] action = table.rename_columns map on_problems=_ tester = expect_column_names ["FirstColumn", "beta", "gamma", "delta"] problems = [Duplicate_Column_Selectors ["alpha"]] @@ -479,49 +479,49 @@ spec prefix table_builder test_selection pending=Nothing = Test.specify "should correctly handle problems: unmatched names" <| weird_name = '.*?-!@#!"' - map = Column_Mapping.By_Name (Map.from_vector [["alpha", "FirstColumn"], ["omicron", "Another"], [weird_name, "Fixed"]]) + map = Column_Name_Mapping.By_Name (Map.from_vector [["alpha", "FirstColumn"], ["omicron", "Another"], [weird_name, "Fixed"]]) action = table.rename_columns map on_problems=_ tester = expect_column_names ["FirstColumn", "beta", "gamma", "delta"] problems = [Missing_Input_Columns [weird_name, "omicron"]] Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: out of bounds indices" <| - map = Column_Mapping.By_Index (Map.from_vector [[0, "FirstColumn"], [-1, "Another"], [100, "Boo"], [-200, "Nothing"], [300, "Here"]]) + map = Column_Name_Mapping.By_Index (Map.from_vector [[0, "FirstColumn"], [-1, "Another"], [100, "Boo"], [-200, "Nothing"], [300, "Here"]]) action = table.rename_columns map on_problems=_ tester = expect_column_names ["FirstColumn", "beta", "gamma", "Another"] problems = [Column_Indexes_Out_Of_Range [-200, 100, 300]] Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: aliased indices" <| - map = Column_Mapping.By_Index (Map.from_vector [[1, "FirstColumn"], [-3, "Another"]]) + map = Column_Name_Mapping.By_Index (Map.from_vector [[1, "FirstColumn"], [-3, "Another"]]) action = table.rename_columns map on_problems=_ tester = expect_column_names ["alpha", "Another", "gamma", "delta"] problems = [Input_Indices_Already_Matched [1]] Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: invalid names ''" <| - map = Column_Mapping.By_Index (Map.from_vector [[1, ""]]) + map = Column_Name_Mapping.By_Index (Map.from_vector [[1, ""]]) action = table.rename_columns map on_problems=_ tester = expect_column_names ["alpha", "Column_1", "gamma", "delta"] problems = [Invalid_Output_Column_Names [""]] Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: invalid names Nothing" <| - map = Column_Mapping.By_Position ["alpha", Nothing] + map = Column_Name_Mapping.By_Position ["alpha", Nothing] action = table.rename_columns map on_problems=_ tester = expect_column_names ["alpha", "Column_1", "gamma", "delta"] problems = [Invalid_Output_Column_Names [Nothing]] Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: duplicate names" <| - map = Column_Mapping.By_Position ["Test", "Test", "Test", "Test"] + map = Column_Name_Mapping.By_Position ["Test", "Test", "Test", "Test"] action = table.rename_columns map on_problems=_ tester = expect_column_names ["Test", "Test_1", "Test_2", "Test_3"] problems = [Duplicate_Output_Column_Names ["Test", "Test", "Test"]] Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: too many input names" <| - map = Column_Mapping.By_Position ["A", "B", "C", "D", "E", "F"] + map = Column_Name_Mapping.By_Position ["A", "B", "C", "D", "E", "F"] action = table.rename_columns map on_problems=_ tester = expect_column_names ["A", "B", "C", "D"] problems = [Too_Many_Column_Names_Provided ["E", "F"]] diff --git a/test/Table_Tests/src/Excel_Spec.enso b/test/Table_Tests/src/Excel_Spec.enso index 0e20e8c439..3d0e447dbf 100644 --- a/test/Table_Tests/src/Excel_Spec.enso +++ b/test/Table_Tests/src/Excel_Spec.enso @@ -3,12 +3,14 @@ import Standard.Base.System.File.Existing_File_Behavior from Standard.Base.System.File import File_Already_Exists_Error import Standard.Base.Data.Time.Date +import Standard.Table import Standard.Table.Io.File_Read import Standard.Table.Io.File_Format -import Standard.Table.Data.Column_Mapping +import Standard.Table.Data.Column_Name_Mapping +import Standard.Table.Data.Match_Columns from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Index from Standard.Table.Io.Excel import Excel_Range, Sheet_Names, Range_Names, Sheet, Cell_Range -from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data +from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter import Standard.Test @@ -75,6 +77,7 @@ spec_write suffix test_sheet_name = https://www.pivotaltracker.com/story/show/181755990 no_dates = File_Format.Delimited "," value_formatter=(Data_Formatter date_formats=[] time_formats=[] datetime_formats=[]) out = Enso_Project.data / ('out.' + suffix) + out_bak = Enso_Project.data / ('out.' + suffix + '.bak') table = Enso_Project.data/'varied_column.csv' . read (format = no_dates) clothes = Enso_Project.data/'clothes.csv' . read (format = no_dates) sub_clothes = clothes.select_columns (By_Index [0, 1]) @@ -86,6 +89,29 @@ spec_write suffix test_sheet_name = written.should_equal table out.delete_if_exists + Test.specify 'should write a table to non-existent file in append mode as a new sheet with headers' <| + out.delete_if_exists + table.write out on_existing_file=Existing_File_Behavior.Append + written = out.read + written.should_equal table + out.delete_if_exists + + Test.specify 'should write a table to existing file in overwrite mode as a new sheet with headers' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + table.write out (File_Format.Excel (Sheet "Another")) on_existing_file=Existing_File_Behavior.Overwrite + written = out.read (File_Format.Excel (Sheet "Another")) + written.should_equal table + out.delete_if_exists + + Test.specify 'should write a table to existing file in overwrite mode as a new sheet without headers' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + table.write out (File_Format.Excel (Sheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Overwrite + written = out.read (File_Format.Excel (Sheet "NoHeaders")) + written.should_equal (table.rename_columns (Column_Name_Mapping.By_Position ['A', 'B', 'C', 'D', 'E', 'F'])) + out.delete_if_exists + Test.specify 'should create new sheets at the start if index is 0' <| out.delete_if_exists table.write out (File_Format.Excel (Sheet 0)) @@ -139,24 +165,175 @@ spec_write suffix test_sheet_name = Test.specify 'should result in Existing_Data error if in Error mode and trying to replace' <| out.delete_if_exists (Enso_Project.data / test_sheet_name) . copy_to out + lmd = out.last_modified_time sub_clothes.write out (File_Format.Excel (Sheet 1)) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data sub_clothes.write out (File_Format.Excel (Sheet "Another")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data sub_clothes.write out (File_Format.Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data sub_clothes.write out (File_Format.Excel (Cell_Range "Sheet1!A9")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data + out.last_modified_time.should_equal lmd out.delete_if_exists Test.specify 'should not allow adding a new sheet if in Error mode and not clashing' <| out.delete_if_exists (Enso_Project.data / test_sheet_name) . copy_to out + lmd = out.last_modified_time sub_clothes.write out (File_Format.Excel (Sheet "Testing")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Already_Exists_Error + out.last_modified_time.should_equal lmd + out.delete_if_exists Test.specify 'should write a table to non-existent file as a new sheet without headers' <| out.delete_if_exists table.write out (File_Format.Excel (Sheet "Sheet1") headers=False) written = out.read - written.should_equal (table.rename_columns (Column_Mapping.By_Position ['A', 'B', 'C', 'D', 'E', 'F'])) + written.should_equal (table.rename_columns (Column_Name_Mapping.By_Position ['A', 'B', 'C', 'D', 'E', 'F'])) out.delete_if_exists + Test.specify 'should be able to append to a sheet by name' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]] + expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] + extra_another.write out (File_Format.Excel (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append + written = out.read (File_Format.Excel (Sheet "Another")) . select_columns (By_Index [0, 1, 2]) + written.should_equal expected + out.delete_if_exists + + Test.specify 'should be able to append to a sheet by position' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]] + expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] + extra_another.write out (File_Format.Excel (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position + written = out.read (File_Format.Excel (Sheet "Another")) . select_columns (By_Index [0, 1, 2]) + written.should_equal expected + out.delete_if_exists + + Test.specify 'should be able to append to a sheet by name out of order' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]] + expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] + extra_another.write out (File_Format.Excel (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append + written = out.read (File_Format.Excel (Sheet "Another")) . select_columns (By_Index [0, 1, 2]) + written.should_equal expected + out.delete_if_exists + + Test.specify 'should be able to append to a single cell by name' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]] + expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] + extra_another.write out (File_Format.Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append + written = out.read (File_Format.Excel (Sheet "Another")) . select_columns (By_Index [0, 1, 2]) + written.should_equal expected + out.delete_if_exists + + Test.specify 'should be able to append to a single cell by position' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]] + expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] + extra_another.write out (File_Format.Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position + written = out.read (File_Format.Excel (Sheet "Another")) . select_columns (By_Index [0, 1, 2]) + written.should_equal expected + out.delete_if_exists + + Test.specify 'should be able to append to a single cell by name out of order' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]] + expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] + extra_another.write out (File_Format.Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append + written = out.read (File_Format.Excel (Sheet "Another")) . select_columns (By_Index [0, 1, 2]) + written.should_equal expected + out.delete_if_exists + + Test.specify 'should be able to append to a range by name' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]] + expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] + extra_another.write out (File_Format.Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append + written = out.read (File_Format.Excel (Sheet "Another")) . select_columns (By_Index [0, 1, 2]) + written.should_equal expected + out.delete_if_exists + + Test.specify 'should be able to append to a range by position' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]] + expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] + extra_another.write out (File_Format.Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position + written = out.read (File_Format.Excel (Sheet "Another")) . select_columns (By_Index [0, 1, 2]) + written.should_equal expected + out.delete_if_exists + + Test.specify 'should be able to append to a range by name out of order' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]] + expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] + extra_another.write out (File_Format.Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append + written = out.read (File_Format.Excel (Sheet "Another")) . select_columns (By_Index [0, 1, 2]) + written.should_equal expected + out.delete_if_exists + + Test.specify 'should fail to append to a sheet by name if missing columns' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']]] + extra_another.write out (File_Format.Excel (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch + out.delete_if_exists + + Test.specify 'should fail to append to a sheet by name if extra columns' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + lmd = out.last_modified_time + extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]] + extra_another.write out (File_Format.Excel (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch + out.last_modified_time.should_equal lmd + out.delete_if_exists + + Test.specify 'should fail to append to a sheet by name if no headers' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + lmd = out.last_modified_time + extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]] + extra_another.write out (File_Format.Excel (Sheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error + extra_another.write out (File_Format.Excel (Sheet "Another") False) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error + out.last_modified_time.should_equal lmd + out.delete_if_exists + + Test.specify 'should fail to append to a sheet by position if too few columns' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + lmd = out.last_modified_time + extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']]] + extra_another.write out (File_Format.Excel (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch + out.last_modified_time.should_equal lmd + out.delete_if_exists + + Test.specify 'should fail to append to a sheet by position if too many columns' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + lmd = out.last_modified_time + extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]] + extra_another.write out (File_Format.Excel (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch + out.last_modified_time.should_equal lmd + out.delete_if_exists + + Test.specify 'should fail to append to a range by name if not large enough' <| + out.delete_if_exists + (Enso_Project.data / test_sheet_name) . copy_to out + lmd = out.last_modified_time + extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]] + extra_another.write out (File_Format.Excel (Cell_Range "Another!A1:D5")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Range_Exceeded + out.last_modified_time.should_equal lmd + out.delete_if_exists + + out.delete_if_exists + out_bak.delete_if_exists + spec = Test.group 'Excel Range' <| check_range excel_range sheet_name tlbr_vector single_cell=False = @@ -289,7 +466,7 @@ spec = check_table <| File.read xls_path File_Format.Excel Test.specify "should let you read the sheet names" <| - sheet_names = ["Sheet1", "Another"] + sheet_names = ["Sheet1", "Another", "NoHeaders"] xlsx_sheet.read (File_Format.Excel Sheet_Names) . should_equal sheet_names xls_sheet.read (File_Format.Excel Sheet_Names) . should_equal sheet_names @@ -420,5 +597,4 @@ spec = here.spec_write "xlsx" 'TestSheet.xlsx' here.spec_write "xls" 'TestSheetOld.xls' - main = Test.Suite.run_main here.spec