mirror of
https://github.com/enso-org/enso.git
synced 2024-11-27 05:23:48 +03:00
Add scaffolding for Table.write
function (#3521)
Implements https://www.pivotaltracker.com/story/show/182309559 This task implements common scaffolding for the `Table.write`, so that the particular implementations for Delimited and Excel file formats can be done in parallel.
This commit is contained in:
parent
825eaed4f5
commit
e83c36d9d6
@ -9,6 +9,8 @@ import Standard.Table.Data.Table as Materialized_Table
|
||||
import Standard.Table.Internal.Java_Exports
|
||||
import Standard.Table.Internal.Table_Helpers
|
||||
import Standard.Table.Internal.Problem_Builder
|
||||
import Standard.Table.Io.File_Format
|
||||
import Standard.Base.System.File.Existing_File_Behavior
|
||||
|
||||
import Standard.Table.Data.Aggregate_Column
|
||||
import Standard.Table.Internal.Aggregate_Column_Helper
|
||||
@ -918,6 +920,65 @@ type Table
|
||||
False -> Error.throw <| Illegal_State_Error "The update unexpectedly affected "+affected_rows.to_text+" rows."
|
||||
True -> Nothing
|
||||
|
||||
## This function writes the table into a file.
|
||||
|
||||
The specific behavior of the various `File_Format`s is specified below.
|
||||
|
||||
Arguments:
|
||||
- path: The path to the output file.
|
||||
- format: The format of the file.
|
||||
If `File_Format.Auto` is specified; the file extension determines the
|
||||
specific type and uses the default settings for that type to be used.
|
||||
Details of this type are below.
|
||||
- on_existing_file: Specified how to handle if the file already exists.
|
||||
- column_matching: Specifies how to map columns against an existing file.
|
||||
If `Column_Matching.By_Name` - the columns are mapped by name against
|
||||
an existing file.
|
||||
If `Column_Matching.By_Position` - the columns are mapped by position
|
||||
against an existing file.
|
||||
If there is a mismatch, then a `Column_Mismatch` error is raised.
|
||||
- on_problems: Specifies how to handle if a problem occurs, raising as a
|
||||
warning by default. The specific issues depend on the `File_Format`
|
||||
argument.
|
||||
|
||||
Returns:
|
||||
- If an unsupported `File_Format` is specified, an
|
||||
`Illegal_Argument_Error` is raised.
|
||||
- If the path to the parent location cannot be found or the filename is
|
||||
invalid, a `File_Not_Found` is raised.
|
||||
- If another IO error occurs, such as access denied, an `Io_Error` is
|
||||
raised.
|
||||
- If appending and the columns do not match, a `Column_Mismatch` is
|
||||
raised.
|
||||
- Other specific errors or warnings that can be raised depend on the
|
||||
format argument.
|
||||
- Otherwise, the file is loaded following the rules of the format
|
||||
parameter.
|
||||
|
||||
? `File_Format` write behaviors
|
||||
|
||||
- `File_Format.Auto`: The file format is determined by the file
|
||||
extension of the path argument.
|
||||
- `File_Format.Bytes` and `File_Format.Text`: The Table does not
|
||||
support these types in the `write` function. If passed as format, an
|
||||
`Illegal_Argument_Error` is raised. To write out the table as plain
|
||||
text, the user needs to call the `Text.from Table` method and then
|
||||
use the `Text.write` function.
|
||||
|
||||
> Example
|
||||
Write a database table to a CSV file.
|
||||
|
||||
import Standard.Examples
|
||||
import Standard.Database
|
||||
|
||||
example_to_csv =
|
||||
connection = Database.open_sqlite_file (File.new "db.sqlite")
|
||||
table = connection.access_table "Table"
|
||||
table.write (Enso_Project.data / "example_csv_output.csv")
|
||||
write : File|Text -> File_Format -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | Io_Error
|
||||
write path format=File_Format.Auto on_existing_file=Existing_File_Behavior.Backup column_mapping=Column_Mapping.By_Name on_problems=Report_Warning =
|
||||
# TODO This should ideally be done in a streaming manner, or at least respect the row limits.
|
||||
this.to_dataframe.write path format on_existing_file column_mapping on_problems
|
||||
|
||||
## Represents a table with grouped rows.
|
||||
type Aggregate_Table
|
||||
|
@ -5,8 +5,9 @@ import Standard.Table.Data.Column
|
||||
import Standard.Visualization
|
||||
from Standard.Base.Data.Time.Date as Date_Module import Date
|
||||
import Standard.Table.Io.Spreadsheet_Write_Mode
|
||||
import Standard.Table.Io.Format
|
||||
import Standard.Table.Io.File_Format
|
||||
import Standard.Base.System.File
|
||||
import Standard.Base.System.File.Existing_File_Behavior
|
||||
import Standard.Table.Internal.Table_Helpers
|
||||
import Standard.Table.Internal.Aggregate_Column_Helper
|
||||
import Standard.Table.Internal.Parse_Values_Helper
|
||||
@ -1308,45 +1309,69 @@ type Table
|
||||
write_json : File.File -> Nothing
|
||||
write_json file = this.to_json.to_text.write file
|
||||
|
||||
## UNSTABLE
|
||||
## This function writes a table from memory into a file.
|
||||
|
||||
Writes the table to a specified file with the given serialization
|
||||
settings.
|
||||
The specific behavior of the various `File_Format`s is specified below.
|
||||
|
||||
Arguments:
|
||||
- file: the file to write to.
|
||||
- format: the format settings to use.
|
||||
- path: The path to the output file.
|
||||
- format: The format of the file.
|
||||
If `File_Format.Auto` is specified; the file extension determines the
|
||||
specific type and uses the default settings for that type to be used.
|
||||
Details of this type are below.
|
||||
- on_existing_file: Specified how to handle if the file already exists.
|
||||
- column_matching: Specifies how to map columns against an existing file.
|
||||
If `Column_Matching.By_Name` - the columns are mapped by name against
|
||||
an existing file.
|
||||
If `Column_Matching.By_Position` - the columns are mapped by position
|
||||
against an existing file.
|
||||
If there is a mismatch, then a `Column_Mismatch` error is raised.
|
||||
- on_problems: Specifies how to handle if a problem occurs, raising as a
|
||||
warning by default. The specific issues depend on the `File_Format`
|
||||
argument.
|
||||
|
||||
Returns:
|
||||
- If an unsupported `File_Format` is specified, an
|
||||
`Illegal_Argument_Error` is raised.
|
||||
- If the path to the parent location cannot be found or the filename is
|
||||
invalid, a `File_Not_Found` is raised.
|
||||
- If another IO error occurs, such as access denied, an `Io_Error` is
|
||||
raised.
|
||||
- If appending and the columns do not match, a `Column_Mismatch` is
|
||||
raised.
|
||||
- Other specific errors or warnings that can be raised depend on the
|
||||
format argument.
|
||||
- Otherwise, the file is loaded following the rules of the format
|
||||
parameter.
|
||||
|
||||
? `File_Format` write behaviors
|
||||
|
||||
- `File_Format.Auto`: The file format is determined by the file
|
||||
extension of the path argument.
|
||||
- `File_Format.Bytes` and `File_Format.Text`: The Table does not
|
||||
support these types in the `write` function. If passed as format, an
|
||||
`Illegal_Argument_Error` is raised. To write out the table as plain
|
||||
text, the user needs to call the `Text.from Table` method and then
|
||||
use the `Text.write` function.
|
||||
|
||||
> Example
|
||||
Write a table to a CSV file, without writing the header.
|
||||
|
||||
import Standard.Examples
|
||||
import Table
|
||||
import Standard.Table
|
||||
|
||||
example_to_csv = Examples.inventory_table.write (Enso_Project.data / "example_csv_output.csv") (Table.Format.Csv include_header=False)
|
||||
example_to_csv = Examples.inventory_table.write (Enso_Project.data / "example_csv_output.csv") (File_Format.Delimited delimiter="," headers=False)
|
||||
|
||||
> Example
|
||||
Write a table to an XLSX file, without writing the header.
|
||||
Write a table to an XLSX file.
|
||||
|
||||
import Standard.Examples
|
||||
import Table
|
||||
|
||||
example_to_xlsx = Examples.inventory_table.write (Enso_Project.data / "example_xlsx_output.xlsx") (Table.Format.Xlsx include_header=False)
|
||||
|
||||
> Example
|
||||
Write a table to a JSON file.
|
||||
|
||||
import Standard.Examples
|
||||
import Table
|
||||
|
||||
example_to_json = Examples.inventory_table.write (Enso_Project.data / "example_output.json") Table.Format.Json
|
||||
write : File.File -> Format.Format -> Nothing
|
||||
write file format = case format of
|
||||
Format.Csv header quote sep line max -> this.write_csv file header quote sep line max
|
||||
Format.Xlsx sheet mode header max -> this.write_xlsx file sheet mode header max
|
||||
Format.Json -> this.write_json file
|
||||
import Standard.Table
|
||||
|
||||
example_to_xlsx = Examples.inventory_table.write (Enso_Project.data / "example_xlsx_output.xlsx") File_Format.Excel
|
||||
write : File|Text -> File_Format -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | Io_Error
|
||||
write path format=File_Format.Auto on_existing_file=Existing_File_Behavior.Backup column_mapping=Column_Mapping.By_Name on_problems=Report_Warning =
|
||||
format.write_table (File.new path) this on_existing_file column_mapping on_problems
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
|
@ -45,6 +45,12 @@ type Auto
|
||||
materialised = this.materialise file
|
||||
materialised.read file on_problems
|
||||
|
||||
## Implements the `Table.write` for this `File_Format`.
|
||||
write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing
|
||||
write_table file table on_existing_file column_mapping on_problems =
|
||||
materialised = this.materialise file
|
||||
materialised.write_table file table on_existing_file column_mapping on_problems
|
||||
|
||||
## Reads the file to a `Vector` of bytes.
|
||||
type Bytes
|
||||
type Bytes
|
||||
@ -54,6 +60,11 @@ type Bytes
|
||||
read file _ =
|
||||
file.read_bytes
|
||||
|
||||
## Implements the `Table.write` for this `File_Format`.
|
||||
write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing
|
||||
write_table _ _ _ _ _ =
|
||||
Error.throw (Illegal_Argument_Error "Saving a Table as Bytes is not supported.")
|
||||
|
||||
## Reads the file to a `Text` with specified encoding.
|
||||
type Plain_Text
|
||||
type Plain_Text (encoding:Encoding=Encoding.utf_8)
|
||||
@ -63,6 +74,11 @@ type Plain_Text
|
||||
read file on_problems =
|
||||
file.read_text this.encoding on_problems
|
||||
|
||||
## Implements the `Table.write` for this `File_Format`.
|
||||
write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing
|
||||
write_table _ _ _ _ _ =
|
||||
Error.throw (Illegal_Argument_Error "Saving a Table as Plain_Text is not directly supported. You may convert the Table to a Text using `Text.from` and then use `Text.write` to write it.")
|
||||
|
||||
## Read delimited files such as CSVs into a Table.
|
||||
type Delimited
|
||||
## Read delimited files such as CSVs into a Table.
|
||||
@ -108,6 +124,11 @@ type Delimited
|
||||
read file on_problems =
|
||||
Delimited_Reader.read_file this file on_problems
|
||||
|
||||
## Implements the `Table.write` for this `File_Format`.
|
||||
write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing
|
||||
write_table _ _ _ _ _ =
|
||||
Errors.unimplemented "`Table.write` for the `Delimited` format is not implemented yet."
|
||||
|
||||
## PRIVATE
|
||||
Clone the instance with some properties overridden.
|
||||
Note: This function is internal until such time as Atom cloning with modification is built into Enso.
|
||||
@ -169,3 +190,8 @@ type Excel
|
||||
(extension.equals_ignore_case ".xls") || (extension.equals_ignore_case ".xlt")
|
||||
|
||||
Excel_Module.read_excel file this.section on_problems format
|
||||
|
||||
## Implements the `Table.write` for this `File_Format`.
|
||||
write_table : File -> Table -> Existing_File_Behavior -> Column_Mapping -> Problem_Behavior -> Nothing
|
||||
write_table _ _ _ _ _ =
|
||||
Errors.unimplemented "`Table.write` for the `Excel` format is not implemented yet."
|
||||
|
@ -23,10 +23,7 @@ import Standard.Table.Io.File_Format
|
||||
later, however, will still work.
|
||||
File.read : (Text | File) -> File_Format -> Problem_Behavior -> Any ! File_Error
|
||||
File.read path (format=File_Format.Auto) (on_problems=Report_Warning) =
|
||||
file = case path of
|
||||
Text -> (File.new path)
|
||||
File.File -> path
|
||||
_ -> Error.throw (Illegal_Argument_Error "path should be either a File or a Text")
|
||||
file = File.new path
|
||||
file.read format on_problems
|
||||
|
||||
## Read a file using the specified file format
|
||||
@ -41,3 +38,4 @@ File.read path (format=File_Format.Auto) (on_problems=Report_Warning) =
|
||||
File.File.read : File_Format -> Problem_Behavior -> Any ! File_Error
|
||||
File.File.read (format=File_Format.Auto) (on_problems=Report_Warning) =
|
||||
format.read this on_problems
|
||||
|
||||
|
@ -1,86 +0,0 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Table.Io.Spreadsheet_Write_Mode
|
||||
|
||||
## Specifies the different output formats for serializing tables.
|
||||
type Format
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Specifies the CSV output format settings.
|
||||
|
||||
Arguments:
|
||||
- include_header: Specifies whether the first line of generated CSV
|
||||
should contain the column names.
|
||||
- always_quote: Specifies whether all fields in the resulting CSV should
|
||||
be quoted. When this is set to `False`, only the fields containing the
|
||||
`separator` in their contents will be quoted.
|
||||
- separator: a sequence used to separate fields within a single row.
|
||||
- line_ending: the style of line-endings to use in the generated CSV.
|
||||
- max_rows_per_file: specifies the maximum number of rows that can be
|
||||
written to a single file. If this option is set, instead of writing the
|
||||
contents directly to a file, its name is parsed and a numbered series
|
||||
of files with names based on it is written to instead. For example,
|
||||
if file is `~/my_data/output.csv`, the table contains 250 rows, and
|
||||
`max_rows_per_file` is set to `100`, 3 different files will be written:
|
||||
- `~/my_data/output_1.csv`, containing rows 0 through 99;
|
||||
- `~/my_data/output_2.csv`, containing rows 100 through 199;
|
||||
- `~/my_data/output_3.csv`, containing rows 200 through 249.
|
||||
|
||||
> Example
|
||||
Write a table to a CSV file, without writing the header.
|
||||
|
||||
import Standard.Examples
|
||||
import Table
|
||||
|
||||
example_to_csv = Examples.inventory_table.write (Enso_Project.data / "example_csv_output.csv") (Table.Format.Csv include_header=False)
|
||||
type Csv include_header=True always_quote=False separator=',' line_ending=Line_Ending_Style.Unix max_rows_per_file=Nothing
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Specifies XLSX format settings.
|
||||
|
||||
Arguments:
|
||||
- sheet: the name of the sheet to use for writing the data.
|
||||
- write_mode: specifies this method's behavior if the specified file and
|
||||
sheet already exist. Can be one of:
|
||||
- Spreadsheet_Write_Mode.Create: this is the default value. This
|
||||
setting will create a new sheet in the file, with a name chosen such
|
||||
that the clash is avoided.
|
||||
- Spreadsheet_Write_Mode.Overwrite: will result in removing all
|
||||
contents of the existing sheet and replacing it with the new data.
|
||||
- Spreadsheet_Write_Mode.Append: will append this data to the existing
|
||||
sheet, such that the new data starts after the last row containing
|
||||
any data.
|
||||
- include_header: Specifies whether the first line of generated CSV
|
||||
should contain the column names.
|
||||
- max_rows_per_file: specifies the maximum number of rows that can be
|
||||
written to a single file. If this option is set, instead of writing the
|
||||
contents directly to the file, its name is parsed and a numbered series
|
||||
of files with names based on it is written to instead. For example, if
|
||||
the file is `~/my_data/output.xlsx`, the table contains 250 rows, and
|
||||
`max_rows_per_file` is set to `100`, 3 different files will be written:
|
||||
- `~/my_data/output_1.xlsx`, containing rows 0 through 99;
|
||||
- `~/my_data/output_2.xlsx`, containing rows 100 through 199;
|
||||
- `~/my_data/output_3.xlsx`, containing rows 200 through 249.
|
||||
|
||||
> Example
|
||||
Write a table to an XLSX file, without writing the header.
|
||||
|
||||
import Standard.Examples
|
||||
import Table
|
||||
|
||||
example_to_xlsx = Examples.inventory_table.write (Enso_Project.data / "example_xlsx_output.xlsx") (Table.Format.Xlsx include_header=False)
|
||||
type Xlsx sheet='Data' write_mode=Spreadsheet_Write_Mode.Create include_header=True max_rows_per_file=Nothing
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Specifies that the table should be written to a JSON file.
|
||||
|
||||
> Example
|
||||
Write a table to a JSON file.
|
||||
|
||||
import Standard.Examples
|
||||
import Table
|
||||
|
||||
example_to_json = Examples.inventory_table.write (Enso_Project.data / "example_output.json") Table.Format.Json
|
||||
type Json
|
@ -1,7 +1,6 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Geo.Geo_Json
|
||||
import Standard.Table.Io.Format
|
||||
import Standard.Table.Io.File_Read
|
||||
import Standard.Table.Io.Excel
|
||||
import Standard.Table.Io.Spreadsheet
|
||||
@ -14,7 +13,6 @@ import Standard.Table.Model
|
||||
from Standard.Table.Io.Excel export Excel_Section, Excel_Range, read_excel
|
||||
from Standard.Table.Io.Spreadsheet export all hiding Reader
|
||||
|
||||
export Standard.Table.Io.Format
|
||||
export Standard.Table.Io.Spreadsheet_Write_Mode
|
||||
export Standard.Table.Data.Column
|
||||
export Standard.Table.Model
|
||||
|
@ -140,32 +140,5 @@ spec =
|
||||
out_2.delete_if_exists
|
||||
out_3.delete_if_exists
|
||||
|
||||
Test.specify 'should be possible through the write method' <|
|
||||
varied_column = (Enso_Project.data / "varied_column.csv") . read
|
||||
out = Enso_Project.data / 'out.csv'
|
||||
out_1 = Enso_Project.data / 'out_1.csv'
|
||||
out_2 = Enso_Project.data / 'out_2.csv'
|
||||
out_3 = Enso_Project.data / 'out_3.csv'
|
||||
out_1.delete_if_exists
|
||||
out_2.delete_if_exists
|
||||
out_3.delete_if_exists
|
||||
varied_column.write out (Table.Format.Csv include_header=False separator=';' max_rows_per_file=3)
|
||||
exp_1 = '''
|
||||
2005-02-25;2005-02-25;1;1;1.0;1
|
||||
2005-02-28;2005-02-28;2;2;2.0;2
|
||||
4;2005-03-01;3;3;3.0;3\n
|
||||
exp_2 = '''
|
||||
2005-03-02;;4;4;4.0;4
|
||||
;2005-03-03;5;5;5.0;5
|
||||
2005-03-04;2005-03-04;;6;6.25;6.25\n
|
||||
exp_3 = '''
|
||||
2005-03-07;2005-03-07;7;7;7.0;7
|
||||
2005-03-08;2005-03-08;8;8;8.0;osiem\n
|
||||
out_1.read_text.should_equal exp_1
|
||||
out_2.read_text.should_equal exp_2
|
||||
out_3.read_text.should_equal exp_3
|
||||
out_1.delete_if_exists
|
||||
out_2.delete_if_exists
|
||||
out_3.delete_if_exists
|
||||
|
||||
main = Test.Suite.run_main here.spec
|
||||
|
@ -19,11 +19,4 @@ spec = Test.group 'JSON conversion' <|
|
||||
(Json.parse out.read_text).to_table ['a', 'b', 'c'] . should_equal simple_empty
|
||||
out.delete_if_exists
|
||||
|
||||
Test.specify 'should write JSON tables to disk using the write method' <|
|
||||
out = Enso_Project.data / 'out.json'
|
||||
out.delete_if_exists
|
||||
simple_empty.write out Table.Format.Json
|
||||
(Json.parse out.read_text).to_table ['a', 'b', 'c'] . should_equal simple_empty
|
||||
out.delete_if_exists
|
||||
|
||||
main = Test.Suite.run_main here.spec
|
||||
|
@ -102,14 +102,6 @@ spec =
|
||||
read . should_equal (clothes.concat clothes)
|
||||
out.delete_if_exists
|
||||
|
||||
Test.specify 'should allow writing using the generic write method' <|
|
||||
out.delete_if_exists
|
||||
clothes.write out (Table.Format.Xlsx sheet='Foo')
|
||||
clothes.write out (Table.Format.Xlsx sheet='Foo' write_mode=Table.Spreadsheet_Write_Mode.Append include_header=False)
|
||||
read = out.read_xlsx sheet='Foo'
|
||||
read . should_equal (clothes.concat clothes)
|
||||
out.delete_if_exists
|
||||
|
||||
Test.specify 'should write multiple files if row limit is specified' <|
|
||||
out_1 = Enso_Project.data / 'out_1.xlsx'
|
||||
out_2 = Enso_Project.data / 'out_2.xlsx'
|
||||
|
Loading…
Reference in New Issue
Block a user