Add scaffolding for Table.write function (#3521)

Implements https://www.pivotaltracker.com/story/show/182309559 This task implements common scaffolding for the `Table.write`, so that the particular implementations for Delimited and Excel file formats can be done in parallel.
2024-11-23 08:08:34 +03:00 · 2022-06-14 13:29:03 +02:00 · 2022-06-14 13:29:03 +02:00 · e83c36d9d6
commit e83c36d9d6
parent 825eaed4f5
9 changed files with 139 additions and 159 deletions
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
@ -9,6 +9,8 @@ import Standard.Table.Data.Table as Materialized_Table
 import Standard.Table.Internal.Java_Exports
 import Standard.Table.Internal.Table_Helpers
 import Standard.Table.Internal.Problem_Builder
+import Standard.Table.Io.File_Format
+import Standard.Base.System.File.Existing_File_Behavior

 import Standard.Table.Data.Aggregate_Column
 import Standard.Table.Internal.Aggregate_Column_Helper
@ -918,6 +920,65 @@ type Table
            False -> Error.throw <| Illegal_State_Error "The update unexpectedly affected "+affected_rows.to_text+" rows."
            True -> Nothing

+    ## This function writes the table into a file.
+
+       The specific behavior of the various `File_Format`s is specified below.
+
+       Arguments:
+       - path: The path to the output file.
+       - format: The format of the file.
+         If `File_Format.Auto` is specified; the file extension determines the
+         specific type and uses the default settings for that type to be used.
+         Details of this type are below.
+       - on_existing_file: Specified how to handle if the file already exists.
+       - column_matching: Specifies how to map columns against an existing file.
+         If `Column_Matching.By_Name` - the columns are mapped by name against
+         an existing file.
+         If `Column_Matching.By_Position` - the columns are mapped by position
+         against an existing file.
+         If there is a mismatch, then a `Column_Mismatch` error is raised.
+       - on_problems: Specifies how to handle if a problem occurs, raising as a
+         warning by default. The specific issues depend on the `File_Format`
+         argument.
+
+       Returns:
+       - If an unsupported `File_Format` is specified, an
+         `Illegal_Argument_Error` is raised.
+       - If the path to the parent location cannot be found or the filename is
+         invalid, a `File_Not_Found` is raised.
+       - If another IO error occurs, such as access denied, an `Io_Error` is
+         raised.
+       - If appending and the columns do not match, a `Column_Mismatch` is
+         raised.
+       - Other specific errors or warnings that can be raised depend on the
+         format argument.
+       - Otherwise, the file is loaded following the rules of the format
+         parameter.
+
+       ? `File_Format` write behaviors
+
+         - `File_Format.Auto`: The file format is determined by the file
+           extension of the path argument.
+         - `File_Format.Bytes` and `File_Format.Text`: The Table does not
+           support these types in the `write` function. If passed as format, an
+           `Illegal_Argument_Error` is raised. To write out the table as plain
+           text, the user needs to call the `Text.from Table` method and then
+           use the `Text.write` function.
+
+       > Example
+         Write a database table to a CSV file.
+
+             import Standard.Examples
+             import Standard.Database
+
+             example_to_csv =
+                 connection = Database.open_sqlite_file (File.new "db.sqlite")
+                 table = connection.access_table "Table"
+                 table.write (Enso_Project.data / "example_csv_output.csv")
+    write : File|Text -> File_Format -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | Io_Error
+    write path format=File_Format.Auto on_existing_file=Existing_File_Behavior.Backup column_mapping=Column_Mapping.By_Name on_problems=Report_Warning =
+        # TODO This should ideally be done in a streaming manner, or at least respect the row limits.
+        this.to_dataframe.write path format on_existing_file column_mapping on_problems

 ## Represents a table with grouped rows.
 type Aggregate_Table
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
@ -5,8 +5,9 @@ import Standard.Table.Data.Column
 import Standard.Visualization
 from Standard.Base.Data.Time.Date as Date_Module import Date
 import Standard.Table.Io.Spreadsheet_Write_Mode
-import Standard.Table.Io.Format
 import Standard.Table.Io.File_Format
+import Standard.Base.System.File
+import Standard.Base.System.File.Existing_File_Behavior
 import Standard.Table.Internal.Table_Helpers
 import Standard.Table.Internal.Aggregate_Column_Helper
 import Standard.Table.Internal.Parse_Values_Helper
@ -1308,45 +1309,69 @@ type Table
    write_json : File.File -> Nothing
    write_json file = this.to_json.to_text.write file

-    ## UNSTABLE
+    ## This function writes a table from memory into a file.

-       Writes the table to a specified file with the given serialization
-       settings.
+       The specific behavior of the various `File_Format`s is specified below.

       Arguments:
-       - file: the file to write to.
-       - format: the format settings to use.
+       - path: The path to the output file.
+       - format: The format of the file.
+         If `File_Format.Auto` is specified; the file extension determines the
+         specific type and uses the default settings for that type to be used.
+         Details of this type are below.
+       - on_existing_file: Specified how to handle if the file already exists.
+       - column_matching: Specifies how to map columns against an existing file.
+         If `Column_Matching.By_Name` - the columns are mapped by name against
+         an existing file.
+         If `Column_Matching.By_Position` - the columns are mapped by position
+         against an existing file.
+         If there is a mismatch, then a `Column_Mismatch` error is raised.
+       - on_problems: Specifies how to handle if a problem occurs, raising as a
+         warning by default. The specific issues depend on the `File_Format`
+         argument.

+       Returns:
+       - If an unsupported `File_Format` is specified, an
+         `Illegal_Argument_Error` is raised.
+       - If the path to the parent location cannot be found or the filename is
+         invalid, a `File_Not_Found` is raised.
+       - If another IO error occurs, such as access denied, an `Io_Error` is
+         raised.
+       - If appending and the columns do not match, a `Column_Mismatch` is
+         raised.
+       - Other specific errors or warnings that can be raised depend on the
+         format argument.
+       - Otherwise, the file is loaded following the rules of the format
+         parameter.
+
+       ? `File_Format` write behaviors
+
+         - `File_Format.Auto`: The file format is determined by the file
+           extension of the path argument.
+         - `File_Format.Bytes` and `File_Format.Text`: The Table does not
+           support these types in the `write` function. If passed as format, an
+           `Illegal_Argument_Error` is raised. To write out the table as plain
+           text, the user needs to call the `Text.from Table` method and then
+           use the `Text.write` function.

       > Example
         Write a table to a CSV file, without writing the header.

             import Standard.Examples
-             import Table
+             import Standard.Table

-             example_to_csv = Examples.inventory_table.write (Enso_Project.data / "example_csv_output.csv") (Table.Format.Csv include_header=False)
+             example_to_csv = Examples.inventory_table.write (Enso_Project.data / "example_csv_output.csv") (File_Format.Delimited delimiter="," headers=False)

       > Example
-         Write a table to an XLSX file, without writing the header.
+         Write a table to an XLSX file.

             import Standard.Examples
-             import Table
-
-             example_to_xlsx = Examples.inventory_table.write (Enso_Project.data / "example_xlsx_output.xlsx") (Table.Format.Xlsx include_header=False)
-
-       > Example
-         Write a table to a JSON file.
-
-             import Standard.Examples
-             import Table
-
-             example_to_json = Examples.inventory_table.write (Enso_Project.data / "example_output.json") Table.Format.Json
-    write : File.File -> Format.Format -> Nothing
-    write file format = case format of
-        Format.Csv header quote sep line max -> this.write_csv file header quote sep line max
-        Format.Xlsx sheet mode header max -> this.write_xlsx file sheet mode header max
-        Format.Json -> this.write_json file
+             import Standard.Table

+             example_to_xlsx = Examples.inventory_table.write (Enso_Project.data / "example_xlsx_output.xlsx") File_Format.Excel
+    write : File|Text -> File_Format -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | Io_Error
+    write path format=File_Format.Auto on_existing_file=Existing_File_Behavior.Backup column_mapping=Column_Mapping.By_Name on_problems=Report_Warning =
+        format.write_table (File.new path) this on_existing_file column_mapping on_problems

 ## UNSTABLE

--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Format.enso
@ -45,6 +45,12 @@ type Auto
        materialised = this.materialise file
        materialised.read file on_problems

+    ## Implements the `Table.write` for this `File_Format`.
+    write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing
+    write_table file table on_existing_file column_mapping on_problems =
+        materialised = this.materialise file
+        materialised.write_table file table on_existing_file column_mapping on_problems
+
 ## Reads the file to a `Vector` of bytes.
 type Bytes
    type Bytes
@ -54,6 +60,11 @@ type Bytes
    read file _ =
        file.read_bytes

+    ## Implements the `Table.write` for this `File_Format`.
+    write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing
+    write_table _ _ _ _ _ =
+        Error.throw (Illegal_Argument_Error "Saving a Table as Bytes is not supported.")
+
 ## Reads the file to a `Text` with specified encoding.
 type Plain_Text
    type Plain_Text (encoding:Encoding=Encoding.utf_8)
@ -63,6 +74,11 @@ type Plain_Text
    read file on_problems =
        file.read_text this.encoding on_problems

+    ## Implements the `Table.write` for this `File_Format`.
+    write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing
+    write_table _ _ _ _ _ =
+        Error.throw (Illegal_Argument_Error "Saving a Table as Plain_Text is not directly supported. You may convert the Table to a Text using `Text.from` and then use `Text.write` to write it.")
+
 ## Read delimited files such as CSVs into a Table.
 type Delimited
    ## Read delimited files such as CSVs into a Table.
@ -108,6 +124,11 @@ type Delimited
    read file on_problems =
        Delimited_Reader.read_file this file on_problems

+    ## Implements the `Table.write` for this `File_Format`.
+    write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing
+    write_table _ _ _ _ _ =
+        Errors.unimplemented "`Table.write` for the `Delimited` format is not implemented yet."
+
    ## PRIVATE
     Clone the instance with some properties overridden.
     Note: This function is internal until such time as Atom cloning with modification is built into Enso.
@ -169,3 +190,8 @@ type Excel
            (extension.equals_ignore_case ".xls") || (extension.equals_ignore_case ".xlt")

        Excel_Module.read_excel file this.section on_problems format
+
+    ## Implements the `Table.write` for this `File_Format`.
+    write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing
+    write_table _ _ _ _ _ =
+        Errors.unimplemented "`Table.write` for the `Excel` format is not implemented yet."
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Read.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/File_Read.enso
@ -23,10 +23,7 @@ import Standard.Table.Io.File_Format
     later, however, will still work.
 File.read : (Text | File) -> File_Format -> Problem_Behavior -> Any ! File_Error
 File.read path (format=File_Format.Auto) (on_problems=Report_Warning) =
-    file = case path of
-        Text -> (File.new path)
-        File.File -> path
-        _ -> Error.throw (Illegal_Argument_Error "path should be either a File or a Text")
+    file = File.new path
    file.read format on_problems

 ## Read a file using the specified file format
@ -41,3 +38,4 @@ File.read path (format=File_Format.Auto) (on_problems=Report_Warning) =
 File.File.read : File_Format -> Problem_Behavior -> Any ! File_Error
 File.File.read (format=File_Format.Auto) (on_problems=Report_Warning) =
    format.read this on_problems
+
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Format.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Format.enso
@ -1,86 +0,0 @@
-from Standard.Base import all
-import Standard.Table.Io.Spreadsheet_Write_Mode
-
-## Specifies the different output formats for serializing tables.
-type Format
-
-    ## UNSTABLE
-
-       Specifies the CSV output format settings.
-
-       Arguments:
-       - include_header: Specifies whether the first line of generated CSV
-         should contain the column names.
-       - always_quote: Specifies whether all fields in the resulting CSV should
-         be quoted. When this is set to `False`, only the fields containing the
-         `separator` in their contents will be quoted.
-       - separator: a sequence used to separate fields within a single row.
-       - line_ending: the style of line-endings to use in the generated CSV.
-       - max_rows_per_file: specifies the maximum number of rows that can be
-         written to a single file. If this option is set, instead of writing the
-         contents directly to a file, its name is parsed and a numbered series
-         of files with names based on it is written to instead. For example,
-         if file is `~/my_data/output.csv`, the table contains 250 rows, and
-         `max_rows_per_file` is set to `100`, 3 different files will be written:
-         - `~/my_data/output_1.csv`, containing rows 0 through 99;
-         - `~/my_data/output_2.csv`, containing rows 100 through 199;
-         - `~/my_data/output_3.csv`, containing rows 200 through 249.
-
-       > Example
-         Write a table to a CSV file, without writing the header.
-
-             import Standard.Examples
-             import Table
-
-             example_to_csv = Examples.inventory_table.write (Enso_Project.data / "example_csv_output.csv") (Table.Format.Csv include_header=False)
-    type Csv include_header=True always_quote=False separator=',' line_ending=Line_Ending_Style.Unix max_rows_per_file=Nothing
-
-    ## UNSTABLE
-
-       Specifies XLSX format settings.
-
-       Arguments:
-       - sheet: the name of the sheet to use for writing the data.
-       - write_mode: specifies this method's behavior if the specified file and
-         sheet already exist. Can be one of:
-         - Spreadsheet_Write_Mode.Create: this is the default value. This
-           setting will create a new sheet in the file, with a name chosen such
-           that the clash is avoided.
-         - Spreadsheet_Write_Mode.Overwrite: will result in removing all
-           contents of the existing sheet and replacing it with the new data.
-         - Spreadsheet_Write_Mode.Append: will append this data to the existing
-           sheet, such that the new data starts after the last row containing
-           any data.
-       - include_header: Specifies whether the first line of generated CSV
-         should contain the column names.
-       - max_rows_per_file: specifies the maximum number of rows that can be
-         written to a single file. If this option is set, instead of writing the
-         contents directly to the file, its name is parsed and a numbered series
-         of files with names based on it is written to instead. For example, if
-         the file is `~/my_data/output.xlsx`, the table contains 250 rows, and
-         `max_rows_per_file` is set to `100`, 3 different files will be written:
-         - `~/my_data/output_1.xlsx`, containing rows 0 through 99;
-         - `~/my_data/output_2.xlsx`, containing rows 100 through 199;
-         - `~/my_data/output_3.xlsx`, containing rows 200 through 249.
-
-       > Example
-         Write a table to an XLSX file, without writing the header.
-
-             import Standard.Examples
-             import Table
-
-             example_to_xlsx = Examples.inventory_table.write (Enso_Project.data / "example_xlsx_output.xlsx") (Table.Format.Xlsx include_header=False)
-    type Xlsx sheet='Data' write_mode=Spreadsheet_Write_Mode.Create include_header=True max_rows_per_file=Nothing
-
-    ## UNSTABLE
-
-       Specifies that the table should be written to a JSON file.
-
-       > Example
-         Write a table to a JSON file.
-
-             import Standard.Examples
-             import Table
-
-             example_to_json = Examples.inventory_table.write (Enso_Project.data / "example_output.json") Table.Format.Json
-    type Json
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso
@ -1,7 +1,6 @@
 from Standard.Base import all

 import Standard.Geo.Geo_Json
-import Standard.Table.Io.Format
 import Standard.Table.Io.File_Read
 import Standard.Table.Io.Excel
 import Standard.Table.Io.Spreadsheet
@ -14,7 +13,6 @@ import Standard.Table.Model
 from Standard.Table.Io.Excel export Excel_Section, Excel_Range, read_excel
 from Standard.Table.Io.Spreadsheet export all hiding Reader

-export Standard.Table.Io.Format
 export Standard.Table.Io.Spreadsheet_Write_Mode
 export Standard.Table.Data.Column
 export Standard.Table.Model
--- a/test/Table_Tests/src/Csv_Spec.enso
+++ b/test/Table_Tests/src/Csv_Spec.enso
@ -140,32 +140,5 @@ spec =
            out_2.delete_if_exists
            out_3.delete_if_exists

-        Test.specify 'should be possible through the write method' <|
-            varied_column = (Enso_Project.data / "varied_column.csv") . read
-            out = Enso_Project.data / 'out.csv'
-            out_1 = Enso_Project.data / 'out_1.csv'
-            out_2 = Enso_Project.data / 'out_2.csv'
-            out_3 = Enso_Project.data / 'out_3.csv'
-            out_1.delete_if_exists
-            out_2.delete_if_exists
-            out_3.delete_if_exists
-            varied_column.write out (Table.Format.Csv include_header=False separator=';' max_rows_per_file=3)
-            exp_1 = '''
-                2005-02-25;2005-02-25;1;1;1.0;1
-                2005-02-28;2005-02-28;2;2;2.0;2
-                4;2005-03-01;3;3;3.0;3\n
-            exp_2 = '''
-                2005-03-02;;4;4;4.0;4
-                ;2005-03-03;5;5;5.0;5
-                2005-03-04;2005-03-04;;6;6.25;6.25\n
-            exp_3 = '''
-                2005-03-07;2005-03-07;7;7;7.0;7
-                2005-03-08;2005-03-08;8;8;8.0;osiem\n
-            out_1.read_text.should_equal exp_1
-            out_2.read_text.should_equal exp_2
-            out_3.read_text.should_equal exp_3
-            out_1.delete_if_exists
-            out_2.delete_if_exists
-            out_3.delete_if_exists

 main = Test.Suite.run_main here.spec
--- a/test/Table_Tests/src/Json_Spec.enso
+++ b/test/Table_Tests/src/Json_Spec.enso
@ -19,11 +19,4 @@ spec = Test.group 'JSON conversion' <|
        (Json.parse out.read_text).to_table ['a', 'b', 'c'] . should_equal simple_empty
        out.delete_if_exists

-    Test.specify 'should write JSON tables to disk using the write method' <|
-        out = Enso_Project.data / 'out.json'
-        out.delete_if_exists
-        simple_empty.write out Table.Format.Json
-        (Json.parse out.read_text).to_table ['a', 'b', 'c'] . should_equal simple_empty
-        out.delete_if_exists
-
 main = Test.Suite.run_main here.spec
--- a/test/Table_Tests/src/Spreadsheet_Spec.enso
+++ b/test/Table_Tests/src/Spreadsheet_Spec.enso
@ -102,14 +102,6 @@ spec =
            read . should_equal (clothes.concat clothes)
            out.delete_if_exists

-        Test.specify 'should allow writing using the generic write method' <|
-            out.delete_if_exists
-            clothes.write out (Table.Format.Xlsx sheet='Foo')
-            clothes.write out (Table.Format.Xlsx sheet='Foo' write_mode=Table.Spreadsheet_Write_Mode.Append include_header=False)
-            read = out.read_xlsx sheet='Foo'
-            read . should_equal (clothes.concat clothes)
-            out.delete_if_exists
-
        Test.specify 'should write multiple files if row limit is specified' <|
            out_1 = Enso_Project.data / 'out_1.xlsx'
            out_2 = Enso_Project.data / 'out_2.xlsx'