Restructure File.read into the new design (#3701)

Changes following Marcin's work. Should be back to very similar public API as before.

- Add an "interface" type: `Standard.Base.System.File_Format.File_Format`.
- All `File_Format` types now have a `can_read` method to decide if they can read a file.
- Move `Standard.Table.IO.File_Format.Text.Text_Data` to `Standard.Base.System.File_Format.Plain_Text_Format.Plain_Text`.
- Move `Standard.Table.IO.File_Format.Bytes` to `Standard.Base.System.File_Format.Bytes`.
- Move `Standard.Table.IO.File_Format.Infer` to `Standard.Base.System.File_Format.Infer`. **(doesn't belong here...)**
- Move `Standard.Table.IO.File_Format.Unsupported_File_Type` to `Standard.Base.Error.Common.Unsupported_File_Type`.
- Add `Infer`, `File_Format`, `Bytes`, `Plain_Text`, `Plain_Text_Format` to `Standard.Base` exports.
- Fold extension methods of `Standard.Base.Meta.Unresolved_Symbol` into type.
- Move `Standard.Table.IO.File_Format.Auto` to `Standard.Table.IO.Auto_Detect.Auto_Detect`.
- Added a `types` Vector of all the built in formats.
- `Auto_Detect` asks each type if they `can_read` a file.
- Broke up and moved `Standard.Table.IO.Excel` into `Standard.Table.Excel`:
- Moved `Standard.Table.IO.File_Format.Excel.Excel_Data` to `Standard.Table.Excel.Excel_Format.Excel_Format.Excel`.
- Renamed `Sheet` to `Worksheet`.
- Internal types `Reader` and `Writer` providing the actual read and write methods.
- Created `Standard.Table.Delimited` with similar structure to `Standard.Table.Excel`:
- Moved `Standard.Table.IO.File_Format.Delimited.Delimited_Data` to `Standard.Table.Delimited.Delimited_Format.Delimited_Format.Delimited`.
- Moved `Standard.Table.IO.Quote_Style` to `Standard.Table.Delimited.Quote_Style`.
- Moved the `Reader` and `Writer` internal types into here. Renamed methods to have unique names.
- Add `Aggregate_Column`, `Auto_Detect`, `Delimited`, `Delimited_Format`, `Excel`, `Excel_Format`, `Sheet_Names`, `Range_Names`, `Worksheet` and `Cell_Range` to `Standard.Table` exports.
This commit is contained in:
James Dunkerley 2022-09-15 15:48:46 +01:00 committed by GitHub
parent a04425576a
commit 0126f02e7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
49 changed files with 969 additions and 924 deletions

View File

@ -672,3 +672,11 @@ type Time_Error
Arguments:
- error_message: The message for the error.
Time_Error_Data error_message
## Indicates that the given file's type is not supported.
type Unsupported_File_Type
Unsupported_File_Type_Data filename
to_display_text : Text
to_display_text self =
"The "+self.filename+" has a type that is not supported."

View File

@ -45,6 +45,7 @@ import project.Runtime.Extensions
import project.Runtime.State
import project.System.Environment
import project.System.File
import project.System.File_Format
import project.System.File.Existing_File_Behavior
import project.Data.Text.Regex
import project.Data.Text.Regex.Mode as Regex_Mode
@ -109,4 +110,5 @@ from project.Function export all
from project.Nothing export all
from project.Polyglot export all
from project.Runtime.Extensions export all
from project.System.File_Format export File_Format, Plain_Text_Format, Plain_Text, Bytes, Infer
from project.Data.Index_Sub_Range export First, Last

View File

@ -40,6 +40,32 @@ type Primitive
type Unresolved_Symbol
Unresolved_Symbol_Data value
## UNSTABLE
ADVANCED
Returns a new unresolved symbol with its name changed to the provided
argument.
Arguments:
- new_name: The new name for the unresolved symbol.
rename : Text -> Any
rename self new_name =
create_unresolved_symbol new_name self.scope
## UNSTABLE
ADVANCED
Returns the name of an unresolved symbol.
name : Text
name self = get_unresolved_symbol_name self.value
## UNSTABLE
ADVANCED
Returns the definition scope of an unresolved symbol.
scope : Any
scope self = get_unresolved_symbol_scope self.value
## UNSTABLE
ADVANCED
@ -142,33 +168,6 @@ get_unresolved_symbol_name symbol = @Builtin_Method "Meta.get_unresolved_symbol_
get_unresolved_symbol_scope : Unresolved_Symbol -> Module_Scope
get_unresolved_symbol_scope symbol = @Builtin_Method "Meta.get_unresolved_symbol_scope"
## UNSTABLE
ADVANCED
Returns a new unresolved symbol with its name changed to the provided
argument.
Arguments:
- new_name: The new name for the unresolved symbol.
Unresolved_Symbol.rename : Text -> Any
Unresolved_Symbol.rename self new_name =
create_unresolved_symbol new_name self.scope
## UNSTABLE
ADVANCED
Returns the name of an unresolved symbol.
Unresolved_Symbol.name : Text
Unresolved_Symbol.name self = get_unresolved_symbol_name self.value
## UNSTABLE
ADVANCED
Returns the definition scope of an unresolved symbol.
Unresolved_Symbol.scope : Any
Unresolved_Symbol.scope self = get_unresolved_symbol_scope self.value
# Constructor methods
## PRIVATE
@ -349,7 +348,7 @@ type Language
## UNSTABLE
ADVANCED
The Java laguage.
The Java language.
Java
## UNSTABLE

View File

@ -0,0 +1,42 @@
from Standard.Base import all
from Standard.Base.Error.Common import unimplemented
type File_Format
## PRIVATE
Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read _ _ = unimplemented "This is an interface only."
type Plain_Text_Format
Plain_Text (encoding:Encoding=Encoding.utf_8)
## If the File_Format supports reading from the file, return a configured instance.
for_file : File -> Plain_Text_Format | Nothing
for_file file =
case file.extension of
".txt" -> Plain_Text
".log" -> Plain_Text
_ -> Nothing
## PRIVATE
Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read self file on_problems =
file.read_text self.encoding on_problems
type Bytes
## If the File_Format supports reading from the file, return a configured instance.
for_file : File -> Bytes | Nothing
for_file file =
case file.extension of
".dat" -> Bytes
_ -> Nothing
## PRIVATE
Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read self file _ =
file.read_bytes
## A setting to infer the default behaviour of some option.
type Infer

View File

@ -5,8 +5,7 @@ import Standard.Database.Data.Sql
import Standard.Database.Data.Table as Database_Table
import Standard.Table.Data.Table as Materialized_Table
import Standard.Table.Data.Column_Name_Mapping
import Standard.Table.Data.Column_Selector
from Standard.Table import Column_Selector, Column_Name_Mapping
from Standard.Database.Data.Sql import Sql_Type, Sql_Type_Data
from Standard.Database.Internal.JDBC_Connection import create_table_statement, handle_sql_errors

View File

@ -1,7 +1,7 @@
from Standard.Base import all
import Standard.Base.Error.Common as Errors
import Standard.Table.Data.Aggregate_Column
from Standard.Table import Aggregate_Column
import Standard.Database.Data.Sql
import Standard.Database.Data.Internal.IR
import Standard.Database.Data.Dialect.Postgres

View File

@ -1,7 +1,7 @@
from Standard.Base import all
import Standard.Base.Error.Common as Errors
import Standard.Table.Data.Aggregate_Column
from Standard.Table import Aggregate_Column
import Standard.Database.Data.Sql
import Standard.Database.Data.Dialect
import Standard.Database.Data.Dialect.Postgres

View File

@ -8,25 +8,21 @@ import Standard.Database.Data.Sql
import Standard.Table.Data.Column as Materialized_Column
import Standard.Table.Data.Table as Materialized_Table
import Standard.Table.IO.File_Format
from Standard.Table import Auto_Detect, Aggregate_Column, Data_Formatter, Column_Name_Mapping, Sort_Column_Selector, Sort_Column, Match_Columns
from Standard.Table.Errors import No_Such_Column_Error, No_Such_Column_Error_Data
from Standard.Table.Data.Column_Selector import Column_Selector, By_Index, By_Name
import Standard.Table.Internal.Java_Exports
import Standard.Table.Internal.Table_Helpers
import Standard.Table.Internal.Problem_Builder
import Standard.Table.Data.Aggregate_Column
import Standard.Table.Internal.Aggregate_Column_Helper
from Standard.Database.Data.Column import Column, Aggregate_Column_Builder, Column_Data
from Standard.Database.Data.Internal.IR import Internal_Column, Internal_Column_Data
from Standard.Table.Errors import No_Such_Column_Error, No_Such_Column_Error_Data
from Standard.Table.Data.Column_Selector import Column_Selector, By_Index, By_Name
from Standard.Table.Data.Data_Formatter import Data_Formatter
from Standard.Database.Errors import Unsupported_Database_Operation_Error_Data
import Standard.Table.Data.Column_Name_Mapping
import Standard.Table.Data.Position
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
import Standard.Table.Data.Match_Columns
polyglot java import java.sql.JDBCType
@ -929,9 +925,9 @@ type Table
Arguments:
- path: The path to the output file.
- format: The format of the file.
If `File_Format.Auto` is specified; the file extension determines the
specific type and uses the default settings for that type to be used.
Details of this type are below.
If `Auto_Detect` is specified; the provided file determines the
specific type and configures it appropriately. Details of this type are
below.
- on_existing_file: Specified how to handle if the file already exists.
- match_columns: Specifies how to match columns against an existing file.
If `Match_Columns.By_Name` - the columns are mapped by name against an
@ -960,13 +956,12 @@ type Table
? `File_Format` write behaviors
- `File_Format.Auto`: The file format is determined by the file
extension of the path argument.
- `File_Format.Bytes` and `File_Format.Text`: The Table does not
support these types in the `write` function. If passed as format, an
`Illegal_Argument_Error` is raised. To write out the table as plain
text, the user needs to call the `Text.from Table` method and then
use the `Text.write` function.
- `Auto_Detect`: The file format is determined by the provided file.
- `Bytes` and `Plain_Text`: The Table does not support these types in
the `write` function. If passed as format, an
`Illegal_Argument_Error` is raised. To write out the table as plain
text, the user needs to call the `Text.from Table` method and then
use the `Text.write` function.
> Example
Write a database table to a CSV file.
@ -979,7 +974,7 @@ type Table
table = connection.access_table "Table"
table.write (enso_project.data / "example_csv_output.csv")
write : File|Text -> File_Format -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | IO_Error
write self path format=File_Format.Auto on_existing_file=Existing_File_Behavior.Backup match_columns=Match_Columns.By_Name on_problems=Report_Warning =
write self path format=Auto_Detect on_existing_file=Existing_File_Behavior.Backup match_columns=Match_Columns.By_Name on_problems=Report_Warning =
# TODO This should ideally be done in a streaming manner, or at least respect the row limits.
self.to_dataframe.write path format on_existing_file match_columns on_problems

View File

@ -49,8 +49,7 @@
break ties in descending order.
import Standard.Examples
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
from Standard.Table import Sort_Column, Sort_Column_Selector
example_sort =
table = Examples.inventory_table

View File

@ -27,8 +27,7 @@
Sort the shop inventory based on the per-item price in descending order.
import Standard.Examples
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
from Standard.Table import Sort_Column_Selector, Sort_Column
example_sort =
table = Examples.inventory_table

View File

@ -121,7 +121,7 @@ type Data_Formatter
Arguments:
- locale: The locale to use when parsing dates and times.
with_locale : Locale -> Delimited
with_locale : Locale -> Data_Formatter_Data
with_locale self datetime_locale = self.clone datetime_locale=datetime_locale
## PRIVATE

View File

@ -7,14 +7,17 @@ import Standard.Base.Data.Text.Case
import Standard.Base.System.Platform
import Standard.Table.Data.Column
import Standard.Table.IO.File_Format
import Standard.Table.Internal.Table_Helpers
import Standard.Table.Internal.Aggregate_Column_Helper
import Standard.Table.Internal.Parse_Values_Helper
import Standard.Table.Internal.Delimited_Reader
import Standard.Table.Internal.Delimited_Writer
import Standard.Table.Internal.Problem_Builder
from Standard.Table.IO.Auto_Detect import Auto_Detect
from Standard.Table.Delimited.Delimited_Format import Delimited_Format, Delimited
import Standard.Table.Delimited.Delimited_Reader
import Standard.Table.Delimited.Delimited_Writer
from Standard.Table.Data.Column_Selector import Column_Selector, By_Index, By_Name
from Standard.Table.Data.Column_Type_Selection import Column_Type_Selection, Auto
from Standard.Table.Data.Data_Formatter import Data_Formatter, Data_Formatter_Data
@ -1062,9 +1065,9 @@ type Table
Arguments:
- path: The path to the output file.
- format: The format of the file.
If `File_Format.Auto` is specified; the file extension determines the
specific type and uses the default settings for that type to be used.
Details of this type are below.
If `Auto_Detect` is specified; the provided file determines the
specific type and configures it appropriately. Details of this type are
below.
- on_existing_file: Specified how to handle if the file already exists.
- match_columns: Specifies how to match columns against an existing file.
If `Match_Columns.By_Name` - the columns are mapped by name against an
@ -1093,10 +1096,9 @@ type Table
? `File_Format` write behaviors
- `File_Format.Auto`: The file format is determined by the file
extension of the path argument.
- `File_Format.Bytes` and `File_Format.Text`: The Table does not
support these types in the `write` function. If passed as format, an
- `Auto_Detect`: The file format is determined by the provided file.
- `Bytes` and `Plain_Text`: The Table does not support these types in
the `write` function. If passed as format, an
`Illegal_Argument_Error` is raised. To write out the table as plain
text, the user needs to call the `Text.from Table` method and then
use the `Text.write` function.
@ -1106,23 +1108,25 @@ type Table
import Standard.Examples
import Standard.Table
from Standard.Table import Delimited
example_to_csv = Examples.inventory_table.write (Enso_Project.data / "example_csv_output.csv") (File_Format.Delimited_Data delimiter="," headers=False)
example_to_csv = Examples.inventory_table.write (Enso_Project.data / "example_csv_output.csv") (Delimited delimiter="," headers=False)
> Example
Write a table to an XLSX file.
import Standard.Examples
import Standard.Table
from Standard.Table import Excel
example_to_xlsx = Examples.inventory_table.write (enso_project.data / "example_xlsx_output.xlsx") File_Format.Excel
example_to_xlsx = Examples.inventory_table.write (enso_project.data / "example_xlsx_output.xlsx") Excel
write : File|Text -> File_Format -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing ! Column_Mismatch | Illegal_Argument_Error | File_Not_Found | IO_Error
write self path format=File_Format.Auto on_existing_file=Existing_File_Behavior.Backup match_columns=Match_Columns.By_Name on_problems=Report_Warning =
write self path format=Auto_Detect on_existing_file=Existing_File_Behavior.Backup match_columns=Match_Columns.By_Name on_problems=Report_Warning =
format.write_table (File.new path) self on_existing_file match_columns on_problems
## Creates a text representation of the table using the CSV format.
to_csv : Text
to_csv self = Text.from self (File_Format.Delimited_Data delimiter=",")
to_csv self = Text.from self (Delimited delimiter=",")
## UNSTABLE
@ -1191,12 +1195,12 @@ print_table header rows indices_count format_term =
" " + y
([" " + header_line, divider] + row_lines).join '\n'
Table.from (that : Text) (format:File_Format.Delimited|File_Format.Fixed_Width = File_Format.Delimited_Data '\t') (on_problems:Problem_Behavior=Report_Warning) =
if format.is_a File_Format.Delimited_Data then Delimited_Reader.read_text that format on_problems else
Table.from (that : Text) (format:Delimited_Format = Delimited '\t') (on_problems:Problem_Behavior=Report_Warning) =
if format.is_a Delimited then Delimited_Reader.read_text that format on_problems else
Errors.unimplemented "Table.from for fixed-width files is not yet implemented."
Text.from (that : Table) (format:File_Format.Delimited|File_Format.Fixed_Width = File_Format.Delimited_Data '\t') =
if format.is_a File_Format.Delimited_Data then Delimited_Writer.write_text that format else
Text.from (that : Table) (format:Delimited_Format = Delimited '\t') =
if format.is_a Delimited then Delimited_Writer.write_text that format else
Errors.unimplemented "Text.from for fixed-width files is not yet implemented."
## PRIVATE

View File

@ -0,0 +1,121 @@
from Standard.Base import all
import Standard.Table.Data.Table
import Standard.Table.Data.Match_Columns
import Standard.Table.Delimited.Delimited_Reader
import Standard.Table.Delimited.Delimited_Writer
from Standard.Table.Data.Data_Formatter import Data_Formatter, Data_Formatter_Data
import Standard.Table.Delimited.Quote_Style
## Read delimited files such as CSVs into a Table.
type Delimited_Format
## Read delimited files such as CSVs into a Table.
If a row does not match the first row's column count, the function raises
an `Invalid_Row`. If a quote is opened and never closed, a
`Mismatched_Quote` warning occurs.
Arguments:
- delimiter: The delimiter character to split the file into columns. An
`Illegal_Argument_Error` error is returned if this is an empty string.
- encoding: The encoding to use when reading the file.
- skip_rows: The number of rows to skip from the top of the file.
- row_limit: The maximum number of rows to read from the file. This count
does not include the header row (if applicable).
- quote_style: Specifies the style of quotes used for reading and
writing.
- headers: If set to `True`, the first row is used as column names. If
set to `False`, the column names are generated by adding increasing
numeric suffixes to the base name `Column` (i.e. `Column_1`,
`Column_2` etc.). If set to `Infer`, the process tries to infer if
headers are present on the first row. If the column names are not
unique, numeric suffixes will be appended to disambiguate them.
- value_formatter: Formatter to parse text values into numbers, dates,
times, etc. If `Nothing` values are left as Text.
- keep_invalid_rows: Specifies whether rows that contain less or more
columns than expected should be kept (setting the missing columns to
`Nothing` or dropping the excess columns) or dropped.
- line_endings: Sets the line ending style to use. Defaults to `Infer` -
when reading a file or appending to an existing file, the line endings
are detected from file contents; when writing a new file in `Infer`
mode the `Unix` line endings are used.
- comment_character: Sets the character which indicates the start of a
comment within a delimited file. Any line that begins with the comment
character is skipped. The comment character is treated as any other
character if it anywhere else than at the beginning of the line. This
option is only applicable for read mode and does not affect writing. It
defaults to `Nothing` which means that comments are disabled.
Delimited (delimiter:Text) (encoding:Encoding=Encoding.utf_8) (skip_rows:Integer=0) (row_limit:Integer|Nothing=Nothing) (quote_style:Quote_Style=Quote_Style.With_Quotes) (headers:Boolean|Infer=Infer) (value_formatter:Data_Formatter|Nothing=Data_Formatter_Data) (keep_invalid_rows:Boolean=True) (line_endings:Line_Ending_Style=Infer) (comment_character:Text|Nothing=Nothing)
## If the File_Format supports reading from the file, return a configured instance.
for_file : File -> Delimited_Format | Nothing
for_file file =
case file.extension of
".csv" -> Delimited ','
".tsv" -> Delimited '\t'
_ -> Nothing
## Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read self file on_problems =
Delimited_Reader.read_file self file on_problems
## Implements the `Table.write` for this `File_Format`.
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing
write_table self file table on_existing_file match_columns on_problems =
Delimited_Writer.write_file table self file on_existing_file match_columns on_problems
## PRIVATE
Clone the instance with some properties overridden.
Note: This function is internal until such time as Atom cloning with modification is built into Enso.
clone : Text -> Text -> (Boolean|Infer) -> Data_Formatter -> Boolean -> (Text|Nothing) -> (Text|Nothing) -> Delimited_Format
clone self (quote_style=self.quote_style) (headers=self.headers) (value_formatter=self.value_formatter) (keep_invalid_rows=self.keep_invalid_rows) (line_endings=self.line_endings) (comment_character=self.comment_character) =
Delimited self.delimiter self.encoding self.skip_rows self.row_limit quote_style headers value_formatter keep_invalid_rows line_endings comment_character
## Create a clone of this with specified quoting settings.
with_quotes : Text -> Text -> Boolean -> Delimited_Format
with_quotes self quote='"' quote_escape=quote always_quote=False =
self.clone quote_style=(Quote_Style.With_Quotes always_quote=always_quote quote=quote quote_escape=quote_escape)
## Create a clone of this with specified quoting settings.
without_quotes : Delimited_Format
without_quotes self =
self.clone quote_style=Quote_Style.No_Quotes
## Create a clone of this with first row treated as header.
with_headers : Delimited_Format
with_headers self = self.clone headers=True
## Create a clone of this where the first row is treated as data, not a
header.
without_headers : Delimited_Format
without_headers self = self.clone headers=False
## Create a clone of this with value parsing.
A custom `Data_Formatter` can be provided to customize parser options.
with_parsing : Data_Formatter -> Delimited_Format
with_parsing self (value_formatter=Data_Formatter_Data) =
self.clone value_formatter=value_formatter
## Create a clone of this without value parsing.
without_parsing : Delimited_Format
without_parsing self =
self.clone value_formatter=Nothing
## Creates a clone of this with a changed line ending style.
with_line_endings : Line_Ending_Style -> Delimited_Format
with_line_endings self line_endings=Infer =
self.clone line_endings=line_endings
## Creates a clone of this with comment parsing enabled.
with_comments : Text -> Delimited_Format
with_comments self comment_character='#' =
self.clone comment_character=comment_character
## Creates a clone of this with comment parsing disabled.
without_comments : Delimited_Format
without_comments self =
self.clone comment_character=Nothing

View File

@ -4,9 +4,9 @@ from Standard.Base.Error.Problem_Behavior import Ignore, Report_Error
import Standard.Table.Data.Table
from Standard.Table.Errors import Duplicate_Output_Column_Names_Data, Invalid_Output_Column_Names_Data, Invalid_Row_Data, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows_Data
from Standard.Table.IO.File_Format import Infer
from Standard.Table.Data.Data_Formatter import Data_Formatter_Data
import Standard.Table.IO.Quote_Style
from Standard.Table.Delimited.Delimited_Format import Delimited_Format
import Standard.Table.Delimited.Quote_Style
polyglot java import org.enso.base.encoding.NewlineDetector
polyglot java import org.enso.table.read.DelimitedReader
@ -35,7 +35,7 @@ polyglot java import org.enso.table.read.QuoteStrippingParser
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
read_file : Delimited -> File -> Problem_Behavior -> Any
read_file : Delimited_Format -> File -> Problem_Behavior -> Any
read_file format file on_problems =
## We use the default `max_columns` setting. If we want to be able to
read files with unlimited column limits (risking OutOfMemory
@ -44,7 +44,7 @@ read_file format file on_problems =
file.with_input_stream [File.Option.Read] stream->
read_stream format stream on_problems related_file=file
read_text : Text -> Delimited -> Problem_Behavior -> Table
read_text : Text -> Delimited_Format -> Problem_Behavior -> Table
read_text text format on_problems =
java_reader = StringReader.new text
read_from_reader format java_reader on_problems
@ -64,7 +64,7 @@ read_text text format on_problems =
integer.
- related_file: The file related to the provided `java_stream`, if available,
or `Nothing`. It is used for more detailed error reporting.
read_stream : Delimited -> Input_Stream -> Problem_Behavior -> Integer -> File | Nothing -> Any
read_stream : Delimited_Format -> Input_Stream -> Problem_Behavior -> Integer -> File | Nothing -> Any
read_stream format stream on_problems max_columns=default_max_columns related_file=Nothing =
handle_io_exception related_file <|
stream.with_stream_decoder format.encoding on_problems reporting_stream_decoder->
@ -86,16 +86,16 @@ read_stream format stream on_problems max_columns=default_max_columns related_fi
- max_columns: Specifies the limit of columns to read. The limit is set to
avoid `OutOfMemory` errors on malformed files. It must be a positive
integer.
read_from_reader : Delimited -> Reader -> Problem_Behavior -> Integer -> Any
read_from_reader : Delimited_Format -> Reader -> Problem_Behavior -> Integer -> Any
read_from_reader format java_reader on_problems max_columns=4096 =
Illegal_Argument_Error.handle_java_exception <| handle_parsing_failure <| handle_parsing_exception <|
reader = prepare_delimited_reader java_reader format max_columns on_problems
reader = prepare_reader java_reader format max_columns on_problems
result_with_problems = reader.read
parsing_problems = Vector.from_polyglot_array (result_with_problems.problems) . map translate_reader_problem
on_problems.attach_problems_after (Table.Table_Data result_with_problems.value) parsing_problems
## PRIVATE
prepare_delimited_reader java_reader format max_columns on_problems newline_override=Nothing =
prepare_reader java_reader format max_columns on_problems newline_override=Nothing =
java_headers = case format.headers of
True -> DelimitedReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Infer -> DelimitedReader.HeaderBehavior.INFER
@ -161,7 +161,7 @@ type Detected_File_Metadata
## PRIVATE
Reads the beginning of the file to detect the existing headers and column
count.
detect_metadata : File -> File_Format.Delimited -> Detected_Headers
detect_metadata : File -> Delimited_Format -> Detected_Headers
detect_metadata file format =
on_problems = Ignore
result = handle_io_exception file <| Illegal_Argument_Error.handle_java_exception <| handle_parsing_failure <| handle_parsing_exception <|
@ -176,7 +176,7 @@ detect_metadata file format =
read files with unlimited column limits (risking OutOfMemory
exceptions), we can catch the exception indicating the limit has been
reached and restart parsing with an increased limit.
reader = prepare_delimited_reader newline_detecting_reader format max_columns=default_max_columns on_problems newline_override=trailing_line_separator
reader = prepare_reader newline_detecting_reader format max_columns=default_max_columns on_problems newline_override=trailing_line_separator
defined_columns = reader.getDefinedColumnNames
headers = case defined_columns of
Nothing ->

View File

@ -3,13 +3,12 @@ import Standard.Base.System
import Standard.Base.Error.Common as Errors
import Standard.Table.Data.Table
from Standard.Table.Errors as Table_Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows, Column_Count_Mismatch, Column_Name_Mismatch
from Standard.Table.IO.File_Format import Infer
from Standard.Table.Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows, Column_Count_Mismatch, Column_Name_Mismatch
from Standard.Table.Data.Data_Formatter import Data_Formatter
import Standard.Table.Data.Storage
import Standard.Table.IO.Quote_Style
from Standard.Table.Internal.Delimited_Reader import Existing_Headers, No_Headers
import Standard.Table.Internal.Delimited_Reader
from Standard.Table.Delimited.Delimited_Format import Delimited_Format
import Standard.Table.Delimited.Quote_Style
from Standard.Table.Delimited.Delimited_Reader import Existing_Headers, No_Headers, detect_metadata
import Standard.Table.Data.Match_Columns
polyglot java import org.enso.table.write.DelimitedWriter
@ -31,7 +30,7 @@ polyglot java import java.io.IOException
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
write_file : Table -> File_Format.Delimited -> File -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Any
write_file : Table -> Delimited_Format -> File -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Any
write_file table format file on_existing_file match_columns on_problems =
case on_existing_file of
Existing_File_Behavior.Append ->
@ -45,11 +44,11 @@ write_file table format file on_existing_file match_columns on_problems =
against the ones already in the file.
If the file does not exist or is empty, it acts like a regular overwrite.
append_to_file : Table -> File_Format.Delimited -> File -> Match_Columns -> Problem_Behavior -> Any
append_to_file : Table -> Delimited_Format -> File -> Match_Columns -> Problem_Behavior -> Any
append_to_file table format file match_columns on_problems =
Column_Name_Mismatch.handle_java_exception <| Column_Count_Mismatch.handle_java_exception <| Panic.recover Illegal_Argument_Error_Data <|
inferring_format = format.with_line_endings Infer
metadata = Delimited_Reader.detect_metadata file inferring_format
metadata = detect_metadata file inferring_format
preexisting_headers = metadata.headers
effective_line_separator = case format.line_endings of
Infer -> metadata.line_separator.if_nothing default_line_separator_for_writing.to_text
@ -87,7 +86,7 @@ append_to_file table format file match_columns on_problems =
## PRIVATE
Returns a Text value representing the table in the delimited format.
write_text : Table -> File_Format.Delimited -> Text
write_text : Table -> Delimited_Format -> Text
write_text table format =
java_writer = StringWriter.new
write_to_writer table format java_writer
@ -108,7 +107,7 @@ write_text table format =
or `Nothing`. It is used for more detailed error reporting.
- separator_override: An optional override for the line separator to use
instead of the one from `format`.
write_to_stream : Table -> File_Format.Delimited -> Output_Stream -> Problem_Behavior -> File | Nothing -> Text | Nothing -> Boolean -> Any
write_to_stream : Table -> Delimited_Format -> Output_Stream -> Problem_Behavior -> File | Nothing -> Text | Nothing -> Boolean -> Any
write_to_stream table format stream on_problems related_file=Nothing separator_override=Nothing needs_leading_newline=False =
handle_io_exception ~action = Panic.catch IOException action caught_panic->
Error.throw (File.wrap_io_exception related_file caught_panic.payload.cause)
@ -129,7 +128,7 @@ write_to_stream table format stream on_problems related_file=Nothing separator_o
- java_writer: A Java `Writer` to which characters will be written.
- separator_override: An optional override for the line separator to use
instead of the one from `format`.
write_to_writer : Table -> File_Format.Delimited -> Writer -> Text | Nothing -> Boolean -> Any
write_to_writer : Table -> Delimited_Format -> Writer -> Text | Nothing -> Boolean -> Any
write_to_writer table format java_writer separator_override=Nothing needs_leading_newline=False =
column_formatters = Panic.recover Illegal_Argument_Error_Data <| case format.value_formatter of
Nothing -> table.columns.map column-> case column.storage_type of

View File

@ -210,14 +210,6 @@ type Leading_Zeros
type Duplicate_Type_Selector
Duplicate_Type_Selector_Data column:Text ambiguous:Boolean
## Indicates that the given file type is not supported by the `Auto` format.
type Unsupported_File_Type
Unsupported_File_Type_Data filename
to_display_text : Text
to_display_text self =
"The "+self.filename+" has a type that is not supported by the Auto format."
## Indicates that the target range contains existing data and the user did not
specify to overwrite.
type Existing_Data

View File

@ -0,0 +1,64 @@
from Standard.Base import all
import Standard.Table.Data.Table
from Standard.Table.Excel.Section import Excel_Section, Worksheet, Sheet_Names, Range_Names
import Standard.Table.Excel.Excel_Reader
import Standard.Table.Excel.Excel_Writer
## PRIVATE
Resolve the xls_format setting to a boolean.
should_treat_as_xls_format : (Boolean|Infer) -> File -> Boolean | Illegal_Argument
should_treat_as_xls_format xls_format file =
if xls_format != Infer then xls_format else
case file.extension of
".xlsx" -> False
".xlsm" -> False
".xls" -> True
".xlt" -> True
_ -> Error.throw (Illegal_Argument_Error_Data ("Unknown file extension for Excel file (" + file.extension + ")"))
## Read the file to a `Table` from an Excel file
type Excel_Format
## Read Excels files into a Table or Vector.
Arguments:
- section: The `Excel_Section` to read from the workbook.
This can be one of:
- `Sheet_Names` - outputs a `Vector` of sheet names.
- `Range_Names` - outputs a `Vector` of range names.
- `Worksheet` - outputs a `Table` containing the specified sheet.
- `Cell_Range` - outputs a `Table` containing the specified range.
- headers: If set to `True`, the first row is used as column names. If
set to `False`, the column names are Excel column names. If set to
`Infer`, the process tries to infer if headers are present on the first
row. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- xls_format:
If set to `True`, the file is read as an Excel 95-2003 format.
If set to `False`, the file is read as an Excel 2007+ format.
`Infer` will attempt to deduce this from the extension of the filename.
Excel (section:Excel_Section=Worksheet) (headers:(Boolean|Infer)=Infer) (xls_format:(Boolean|Infer)=Infer)
## If the File_Format supports reading from the file, return a configured instance.
for_file : File -> Excel_Format | Nothing
for_file file =
is_xls = should_treat_as_xls_format Infer file
if is_xls.is_error then Nothing else
Excel xls_format=is_xls
## Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read self file on_problems =
format = should_treat_as_xls_format self.xls_format file
Excel_Reader.read_file file self.section self.headers on_problems format
## Implements the `Table.write` for this `File_Format`.
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing
write_table self file table on_existing_file match_columns on_problems =
format = should_treat_as_xls_format self.xls_format file
case self.section of
Sheet_Names -> Error.throw (Illegal_Argument_Error "Sheet_Names cannot be used for `write`.")
Range_Names -> Error.throw (Illegal_Argument_Error "Range_Names cannot be used for `write`.")
_ -> Excel_Writer.write_file file table on_existing_file self.section self.headers match_columns on_problems format

View File

@ -0,0 +1,77 @@
from Standard.Base import all
import Standard.Base.System.File.Option
import Standard.Table.Data.Table
from Standard.Table.Excel.Range import Excel_Range_Data
from Standard.Table.Excel.Section import Excel_Section, Sheet_Names, Range_Names, Worksheet, Cell_Range
from Standard.Table.Errors import Invalid_Location_Data, Duplicate_Output_Column_Names_Data, Invalid_Output_Column_Names_Data
polyglot java import org.enso.table.excel.ExcelHeaders
polyglot java import org.enso.table.read.ExcelReader
polyglot java import org.enso.table.error.InvalidLocationException
polyglot java import org.apache.poi.UnsupportedFileFormatException
polyglot java import org.enso.table.util.problems.DuplicateNames
polyglot java import org.enso.table.util.problems.InvalidNames
## PRIVATE
prepare_reader_table : Problem_Behavior -> Any -> Table
prepare_reader_table on_problems result_with_problems =
map_problem java_problem =
if Java.is_instance java_problem DuplicateNames then Duplicate_Output_Column_Names_Data (Vector.from_polyglot_array java_problem.duplicatedNames) else
if Java.is_instance java_problem InvalidNames then Invalid_Output_Column_Names_Data (Vector.from_polyglot_array java_problem.invalidNames) else
java_problem
parsing_problems = Vector.from_polyglot_array (result_with_problems.problems) . map map_problem
on_problems.attach_problems_after (Table.Table_Data result_with_problems.value) parsing_problems
## PRIVATE
Convert Boolean|Infer to the correct HeaderBehavior
make_java_headers : (Boolean|Infer) -> ExcelHeaders.HeaderBehavior
make_java_headers headers = case headers of
True -> ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Infer -> ExcelHeaders.HeaderBehavior.INFER
False -> ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES
## PRIVATE
Handle and map the Java errors when reading an Excel file
handle_reader : File -> (Input_Stream -> (Table | Vector)) -> (Table | Vector)
handle_reader file reader =
bad_format caught_panic = Error.throw (File.IO_Error file caught_panic.payload.cause.getMessage)
handle_bad_format = Panic.catch UnsupportedFileFormatException handler=bad_format
bad_argument caught_panic = Error.throw (Invalid_Location_Data caught_panic.payload.cause.getCause)
handle_bad_argument = Panic.catch InvalidLocationException handler=bad_argument
File.handle_java_exceptions file <| handle_bad_argument <| handle_bad_format <|
file.with_input_stream [File.Option.Read] stream->
stream.with_java_stream reader
## PRIVATE
Reads an input Excel file according to the provided section.
Arguments:
- file: The File object to read.
- section: The part of the Excel document to read.
- on_problems: Specifies the behavior when a problem occurs during the
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
- xls_format: If `True` then the file is read in using Excel 95-2003 format
otherwise reads in Excel 2007+ format.
read_file : File -> Excel_Section -> (Boolean|Infer) -> Problem_Behavior -> Boolean -> (Table | Vector)
read_file file section headers on_problems xls_format=False =
reader stream = case section of
Sheet_Names -> Vector.from_polyglot_array (ExcelReader.readSheetNames stream xls_format)
Range_Names -> Vector.from_polyglot_array (ExcelReader.readRangeNames stream xls_format)
Worksheet sheet skip_rows row_limit ->
prepare_reader_table on_problems <| case sheet of
Integer -> ExcelReader.readSheetByIndex stream sheet (make_java_headers headers) skip_rows row_limit xls_format
Text -> ExcelReader.readSheetByName stream sheet (make_java_headers headers) skip_rows row_limit xls_format
Cell_Range address skip_rows row_limit ->
prepare_reader_table on_problems <| case address of
Excel_Range_Data _ -> ExcelReader.readRange stream address.java_range (make_java_headers headers) skip_rows row_limit xls_format
Text -> ExcelReader.readRangeByName stream address (make_java_headers headers) skip_rows row_limit xls_format
handle_reader file reader

View File

@ -0,0 +1,78 @@
from Standard.Base import all
import Standard.Table.Data.Table
from Standard.Table.Excel.Excel_Reader import handle_reader, make_java_headers
from Standard.Table.Excel.Section import Worksheet, Cell_Range
from Standard.Table.Excel.Range import Excel_Range_Data
from Standard.Table.Errors import Invalid_Location_Data, Range_Exceeded_Data, Existing_Data_Data, Column_Count_Mismatch, Column_Name_Mismatch
import Standard.Table.Data.Match_Columns
polyglot java import org.enso.table.read.ExcelReader
polyglot java import org.enso.table.write.ExcelWriter
polyglot java import org.enso.table.write.ExistingDataMode
polyglot java import org.enso.table.error.ExistingDataException
polyglot java import org.enso.table.error.RangeExceededException
polyglot java import org.enso.table.error.InvalidLocationException
polyglot java import java.lang.IllegalStateException
## PRIVATE
make_java_existing_data_mode : Existing_File_Behavior -> Match_Columns -> ExcelWriter.ExistingDataMode
make_java_existing_data_mode on_existing_file match_columns = case on_existing_file of
Existing_File_Behavior.Error -> ExistingDataMode.ERROR
Existing_File_Behavior.Overwrite -> ExistingDataMode.REPLACE
Existing_File_Behavior.Backup -> ExistingDataMode.REPLACE
Existing_File_Behavior.Append -> case match_columns of
Match_Columns.By_Name -> ExistingDataMode.APPEND_BY_NAME
Match_Columns.By_Position -> ExistingDataMode.APPEND_BY_INDEX
## PRIVATE
Writes a Table to an Excel file.
Arguments:
write_file : File -> Table -> Existing_File_Behavior -> (Sheet | Cell_Range) -> (Boolean|Infer) -> Match_Columns -> Problem_Behavior -> Boolean
write_file file table on_existing_file section headers match_columns _ xls_format=False =
workbook = if file.exists.not then ExcelWriter.createWorkbook xls_format else
handle_reader file stream->(ExcelReader.getWorkbook stream xls_format)
existing_data_mode = make_java_existing_data_mode on_existing_file match_columns
java_headers = make_java_headers headers
ExcelWriter.setEnsoToTextCallbackIfUnset (.to_text)
result = handle_writer <| case section of
Worksheet sheet skip_rows row_limit ->
ExcelWriter.writeTableToSheet workbook sheet existing_data_mode skip_rows table.java_table row_limit java_headers
Cell_Range address skip_rows row_limit -> case address of
Excel_Range_Data java_range -> ExcelWriter.writeTableToRange workbook java_range existing_data_mode skip_rows table.java_table row_limit java_headers
Text -> ExcelWriter.writeTableToRange workbook address existing_data_mode skip_rows table.java_table row_limit java_headers
if result.is_error then result else
write_stream stream = stream.with_java_stream java_stream->
workbook.write java_stream
case on_existing_file of
Existing_File_Behavior.Append ->
## Special handling - have successfully added the extra sheet/range so now overwrite file with backup.
Existing_File_Behavior.Backup.write file write_stream
_ -> on_existing_file.write file write_stream
## PRIVATE
Handle and map the Java errors when writing an Excel file
handle_writer ~writer =
bad_location caught_panic = Error.throw (Invalid_Location_Data caught_panic.payload.cause.getCause)
handle_bad_location = Panic.catch InvalidLocationException handler=bad_location
throw_range_exceeded caught_panic = Error.throw (Range_Exceeded_Data caught_panic.payload.cause.getMessage)
handle_range_exceeded = Panic.catch RangeExceededException handler=throw_range_exceeded
throw_existing_data caught_panic = Error.throw (Existing_Data_Data caught_panic.payload.cause.getMessage)
handle_existing_data = Panic.catch ExistingDataException handler=throw_existing_data
## Should be impossible - occurs if no fallback serializer is provided.
throw_illegal_state caught_panic = Panic.throw (Illegal_State_Error_Data caught_panic.payload.cause.getMessage)
handle_illegal_state = Panic.catch IllegalStateException handler=throw_illegal_state
handle_illegal_state <| Column_Name_Mismatch.handle_java_exception <|
Column_Count_Mismatch.handle_java_exception <| handle_bad_location <|
Illegal_Argument_Error.handle_java_exception <| handle_range_exceeded <| handle_existing_data <|
writer

View File

@ -0,0 +1,134 @@
from Standard.Base import all
polyglot java import org.enso.table.excel.ExcelRange as Java_Range
## PRIVATE
Wrapper for validation of a value prior to execution.
validate : Boolean -> Text -> Any
validate validation ~error_message ~wrapped =
if validation then wrapped else Error.throw (Illegal_Argument_Error_Data error_message)
## PRIVATE
excel_2007_column_limit = 16384
## PRIVATE
excel_2007_row_limit = 1048576
type Excel_Range
## Specifies a range within an Excel Workbook.
Excel_Range_Data java_range:Java_Range
## Gets the name of the sheet.
sheet_name : Text
sheet_name self = self.java_range.getSheetName
## Gets the index (1-based) of the top row of the range.
Returns `Nothing` if referring to a complete column.
top_row : Integer | Nothing
top_row self = if self.java_range.isWholeColumn then Nothing else
self.java_range.getTopRow
## Gets the index (1-based) of the bottom row of the range.
Returns `Nothing` if referring to a complete column.
bottom_row : Integer | Nothing
bottom_row self = if self.java_range.isWholeColumn then Nothing else
self.java_range.getBottomRow
## Gets the index (1-based) of the left column of the range.
Returns `Nothing` if referring to a complete row.
left_column : Integer | Nothing
left_column self = if self.java_range.isWholeRow then Nothing else
self.java_range.getLeftColumn
## Gets the index (1-based) of the right column of the range.
Returns `Nothing` if referring to a complete row.
right_column : Integer | Nothing
right_column self = if self.java_range.isWholeRow then Nothing else
self.java_range.getRightColumn
## Is the Excel_Range referring to a single cell
is_single_cell : Boolean
is_single_cell self = self.java_range.isSingleCell
## Gets the address to this in A1 format.
address : Text
address self = self.java_range.getAddress
## Displays the Excel_Range.
to_text : Text
to_text self = "Excel_Range " + self.address
## Validates if a column index (1-based) is within the valid range for
Excel.
Arguments:
- column: 1-based index to check.
is_valid_column : Integer -> Boolean
is_valid_column column =
(column > 0) && (column <= excel_2007_column_limit)
## Validates if a row index (1-based) is within the valid range for Excel.
Arguments:
- row: 1-based index to check.
is_valid_row : Integer -> Boolean
is_valid_row row =
(row > 0) && (row <= excel_2007_row_limit)
## Given a column name, parses to the index (1-based) or return index
unchanged.
column_index : (Text|Integer) -> Integer
column_index column =
if column.is_an Integer then column else Java_Range.parseA1Column column
## Creates a Range from an address.
from_address : Text -> Excel_Range
from_address address =
Illegal_Argument_Error.handle_java_exception <|
Excel_Range_Data (Java_Range.new address)
## Create a Range for a single cell.
for_cell : Text -> (Text|Integer) -> Integer -> Excel_Range
for_cell sheet column row =
col_index = Excel_Range.column_index column
col_valid = validate (Excel_Range.is_valid_column col_index) ("Invalid column for Excel: " + column.to_text + ".")
row_valid = validate (Excel_Range.is_valid_row row) ("Invalid row for Excel: " + row.to_text + ".")
col_valid <| row_valid <|
Excel_Range_Data (Java_Range.new sheet col_index row)
## Create an Excel_Range for a range of cells.
for_range : Text -> (Text|Integer) -> Integer -> (Text|Integer) -> Integer -> Excel_Range
for_range sheet left top right bottom =
left_index = Excel_Range.column_index left
right_index = Excel_Range.column_index right
left_valid = validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".")
right_valid = validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".")
top_valid = validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".")
bottom_valid = validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".")
left_valid <| right_valid <| top_valid <| bottom_valid <|
Excel_Range_Data (Java_Range.new sheet left_index top right_index bottom)
## Create an Excel_Range for a set of columns.
for_columns : Text -> (Text|Integer) -> (Text|Integer) -> Excel_Range
for_columns sheet left (right=left) =
left_index = Excel_Range.column_index left
right_index = Excel_Range.column_index right
left_valid = validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".")
right_valid = validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".")
left_valid <| right_valid <|
Excel_Range_Data (Java_Range.forColumns sheet left_index right_index)
## Create an Excel_Range for a set of rows.
for_rows : Text -> Integer -> Integer -> Excel_Range
for_rows sheet top (bottom=top) =
top_valid = validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".")
bottom_valid = validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".")
top_valid <| bottom_valid <|
Excel_Range_Data (Java_Range.forRows sheet top bottom)

View File

@ -0,0 +1,20 @@
from Standard.Base import all
from Standard.Table.Excel.Range import Excel_Range
type Excel_Section
## Gets a list of sheets within a workbook.
Sheet_Names
## Gets a list of named ranges within a workbook.
Range_Names
## Gets the data from a specific sheet. Column names are the Excel column
names.
Worksheet (sheet:(Integer|Text)=1) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
## Gets a specific range (taking either a defined name or external style
address) from the workbook.
If it is a single cell, it will be treated as the top left cell and will
expand right and down to cover the connected cells.
Cell_Range (address:(Text|Excel_Range)) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)

View File

@ -0,0 +1,39 @@
from Standard.Base import Any, Problem_Behavior, Nothing, Error, Panic, Meta, File, File_Format, Plain_Text_Format, Bytes
from Standard.Base.Error.Common import Unsupported_File_Type, Unsupported_File_Type_Data, No_Such_Method_Error_Data, Illegal_Argument_Error_Data
from Standard.Table.Delimited.Delimited_Format import Delimited_Format
from Standard.Table.Excel.Excel_Format import Excel_Format
## PRIVATE
Set of File_Format types for read files.
format_types = [Plain_Text_Format, Bytes, Delimited_Format, Excel_Format]
## PRIVATE
get_format : File -> Any | Nothing
get_format file =
reader idx =
if idx >= format_types.length then Nothing else
format = format_types.at idx . for_file file
if format.is_nothing.not then format else
@Tail_Call reader (idx + 1)
reader 0
type Auto_Detect
## PRIVATE
Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any ! Unsupported_File_Type
read self file on_problems =
reader = get_format file
if reader == Nothing then Error.throw (Unsupported_File_Type_Data ("No File_Format supports '" + file.extension + "'")) else
reader.read file on_problems
## PRIVATE
Implements the `Table.write` for this `File_Format`.
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing
write_table self file table on_existing_file match_columns on_problems =
format = get_format file
if format == Nothing then Error.throw (Unsupported_File_Type_Data ("No File_Format supports '" + file.extension + "'")) else
Panic.catch No_Such_Method_Error_Data (format.write_table file table on_existing_file match_columns on_problems) _->
name = Meta.get_constructor_name (Meta.meta format)
Error.throw (Illegal_Argument_Error_Data ("Saving a Table as " + name + " is not supported."))

View File

@ -1,287 +0,0 @@
from Standard.Base import all
import Standard.Base.System.File.Option
import Standard.Base.Error.Common as Errors
import Standard.Table.Data.Table
from Standard.Table.IO.File_Format import Infer
from Standard.Table.Errors import Invalid_Location_Data, Duplicate_Output_Column_Names_Data, Invalid_Output_Column_Names_Data, Range_Exceeded_Data, Existing_Data_Data, Column_Count_Mismatch, Column_Name_Mismatch
import Standard.Table.Data.Match_Columns
polyglot java import org.enso.table.excel.ExcelRange as Java_Range
polyglot java import org.enso.table.excel.ExcelHeaders
polyglot java import org.enso.table.read.ExcelReader
polyglot java import org.enso.table.write.ExcelWriter
polyglot java import org.enso.table.write.ExistingDataMode
polyglot java import org.enso.table.error.ExistingDataException
polyglot java import org.enso.table.error.RangeExceededException
polyglot java import org.enso.table.error.InvalidLocationException
polyglot java import java.lang.IllegalStateException
polyglot java import java.io.IOException
polyglot java import org.apache.poi.UnsupportedFileFormatException
polyglot java import org.enso.table.util.problems.DuplicateNames
polyglot java import org.enso.table.util.problems.InvalidNames
type Excel_Section
## Gets a list of sheets within a workbook.
Sheet_Names
## Gets a list of named ranges within a workbook.
Range_Names
## Gets the data from a specific sheet. Column names are the Excel column
names.
Sheet (sheet:(Integer|Text)=1) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
## Gets a specific range (taking either a defined name or external style
address) from the workbook.
If it is a single cell, it will be treated as the top left cell and will
expand right and down to cover the connected cells.
Cell_Range (address:(Text|Excel_Range)) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
type Excel_Range
## Specifies a range within an Excel Workbook.
Excel_Range_Data java_range:Java_Range
## Gets the name of the sheet.
sheet_name : Text
sheet_name self = self.java_range.getSheetName
## Gets the index (1-based) of the top row of the range.
Returns `Nothing` if referring to a complete column.
top_row : Integer | Nothing
top_row self = if self.java_range.isWholeColumn then Nothing else
self.java_range.getTopRow
## Gets the index (1-based) of the bottom row of the range.
Returns `Nothing` if referring to a complete column.
bottom_row : Integer | Nothing
bottom_row self = if self.java_range.isWholeColumn then Nothing else
self.java_range.getBottomRow
## Gets the index (1-based) of the left column of the range.
Returns `Nothing` if referring to a complete row.
left_column : Integer | Nothing
left_column self = if self.java_range.isWholeRow then Nothing else
self.java_range.getLeftColumn
## Gets the index (1-based) of the right column of the range.
Returns `Nothing` if referring to a complete row.
right_column : Integer | Nothing
right_column self = if self.java_range.isWholeRow then Nothing else
self.java_range.getRightColumn
## Is the Excel_Range referring to a single cell
is_single_cell : Boolean
is_single_cell self = self.java_range.isSingleCell
## Gets the address to this in A1 format.
address : Text
address self = self.java_range.getAddress
## Displays the Excel_Range.
to_text : Text
to_text self = "Excel_Range " + self.address
## Validates if a column index (1-based) is within the valid range for
Excel.
Arguments:
- column: 1-based index to check.
is_valid_column : Integer -> Boolean
is_valid_column self column =
excel_2007_column_limit = 16384
(column > 0) && (column <= excel_2007_column_limit)
## Validates if a row index (1-based) is within the valid range for Excel.
Arguments:
- row: 1-based index to check.
is_valid_row : Integer -> Boolean
is_valid_row self row =
excel_2007_row_limit = 1048576
(row > 0) && (row <= excel_2007_row_limit)
## Given a column name, parses to the index (1-based) or return index
unchanged.
column_index : (Text|Integer) -> Integer
column_index self column =
if column.is_an Integer then column else Java_Range.parseA1Column column
## Creates a Range from an address.
from_address : Text -> Excel_Range
from_address address =
Illegal_Argument_Error.handle_java_exception <|
Excel_Range_Data (Java_Range.new address)
## Create a Range for a single cell.
for_cell : Text -> (Text|Integer) -> Integer -> Excel_Range
for_cell self sheet column row =
col_index = Excel_Range.column_index column
col_valid = validate (Excel_Range.is_valid_column col_index) ("Invalid column for Excel: " + column.to_text + ".")
row_valid = validate (Excel_Range.is_valid_row row) ("Invalid row for Excel: " + row.to_text + ".")
col_valid <| row_valid <|
Excel_Range_Data (Java_Range.new sheet col_index row)
## Create an Excel_Range for a range of cells.
for_range : Text -> (Text|Integer) -> Integer -> (Text|Integer) -> Integer -> Excel_Range
for_range self sheet left top right bottom =
left_index = Excel_Range.column_index left
right_index = Excel_Range.column_index right
left_valid = validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".")
right_valid = validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".")
top_valid = validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".")
bottom_valid = validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".")
left_valid <| right_valid <| top_valid <| bottom_valid <|
Excel_Range_Data (Java_Range.new sheet left_index top right_index bottom)
## Create an Excel_Range for a set of columns.
for_columns : Text -> (Text|Integer) -> (Text|Integer) -> Excel_Range
for_columns self sheet left (right=left) =
left_index = Excel_Range.column_index left
right_index = Excel_Range.column_index right
left_valid = validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".")
right_valid = validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".")
left_valid <| right_valid <|
Excel_Range_Data (Java_Range.forColumns sheet left_index right_index)
## Create an Excel_Range for a set of rows.
for_rows : Text -> Integer -> Integer -> Excel_Range
for_rows self sheet top (bottom=top) =
top_valid = validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".")
bottom_valid = validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".")
top_valid <| bottom_valid <|
Excel_Range_Data (Java_Range.forRows sheet top bottom)
## PRIVATE
Wrapper for validation of a value prior to execution.
validate : Boolean -> Text -> Any
validate validation ~error_message ~wrapped =
if validation then wrapped else Error.throw (Illegal_Argument_Error_Data error_message)
## PRIVATE
Reads an input Excel file according to the provided section.
Arguments:
- file: The File object to read.
- section: The part of the Excel document to read.
- on_problems: Specifies the behavior when a problem occurs during the
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
- xls_format: If `True` then the file is read in using Excel 95-2003 format
otherwise reads in Excel 2007+ format.
read_excel : File -> Excel_Section -> (Boolean|Infer) -> Problem_Behavior -> Boolean -> (Table | Vector)
read_excel file section headers on_problems xls_format=False =
reader stream = case section of
Sheet_Names -> Vector.from_polyglot_array (ExcelReader.readSheetNames stream xls_format)
Range_Names -> Vector.from_polyglot_array (ExcelReader.readRangeNames stream xls_format)
Sheet sheet skip_rows row_limit ->
prepare_reader_table on_problems <| case sheet of
Integer -> ExcelReader.readSheetByIndex stream sheet (make_java_headers headers) skip_rows row_limit xls_format
Text -> ExcelReader.readSheetByName stream sheet (make_java_headers headers) skip_rows row_limit xls_format
Cell_Range address skip_rows row_limit ->
prepare_reader_table on_problems <| case address of
Excel_Range_Data _ -> ExcelReader.readRange stream address.java_range (make_java_headers headers) skip_rows row_limit xls_format
Text -> ExcelReader.readRangeByName stream address (make_java_headers headers) skip_rows row_limit xls_format
handle_reader file reader
## PRIVATE
Writes a Table to an Excel file.
Arguments:
write_excel : File -> Table -> Existing_File_Behavior -> (Sheet | Cell_Range) -> (Boolean|Infer) -> Match_Columns -> Problem_Behavior -> Boolean
write_excel file table on_existing_file section headers match_columns _ xls_format=False =
workbook = if file.exists.not then ExcelWriter.createWorkbook xls_format else
handle_reader file stream->(ExcelReader.getWorkbook stream xls_format)
existing_data_mode = make_java_existing_data_mode on_existing_file match_columns
java_headers = make_java_headers headers
ExcelWriter.setEnsoToTextCallbackIfUnset (.to_text)
result = handle_writer <| case section of
Sheet sheet skip_rows row_limit ->
ExcelWriter.writeTableToSheet workbook sheet existing_data_mode skip_rows table.java_table row_limit java_headers
Cell_Range address skip_rows row_limit -> case address of
Excel_Range_Data java_range -> ExcelWriter.writeTableToRange workbook java_range existing_data_mode skip_rows table.java_table row_limit java_headers
Text -> ExcelWriter.writeTableToRange workbook address existing_data_mode skip_rows table.java_table row_limit java_headers
if result.is_error then result else
write_stream stream = stream.with_java_stream java_stream->
workbook.write java_stream
case on_existing_file of
Existing_File_Behavior.Append ->
## Special handling - have successfully added the extra sheet/range so now overwrite file with backup.
Existing_File_Behavior.Backup.write file write_stream
_ -> on_existing_file.write file write_stream
## PRIVATE
prepare_reader_table : Problem_Behavior -> Any -> Table
prepare_reader_table on_problems result_with_problems =
map_problem java_problem =
if Java.is_instance java_problem DuplicateNames then Duplicate_Output_Column_Names_Data (Vector.from_polyglot_array java_problem.duplicatedNames) else
if Java.is_instance java_problem InvalidNames then Invalid_Output_Column_Names_Data (Vector.from_polyglot_array java_problem.invalidNames) else
java_problem
parsing_problems = Vector.from_polyglot_array (result_with_problems.problems) . map map_problem
on_problems.attach_problems_after (Table.Table_Data result_with_problems.value) parsing_problems
## PRIVATE
Convert Boolean|Infer to the correct HeaderBehavior
make_java_headers : (Boolean|Infer) -> ExcelHeaders.HeaderBehavior
make_java_headers headers = case headers of
True -> ExcelHeaders.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Infer -> ExcelHeaders.HeaderBehavior.INFER
False -> ExcelHeaders.HeaderBehavior.EXCEL_COLUMN_NAMES
## PRIVATE
make_java_existing_data_mode : Existing_File_Behavior -> Match_Columns -> ExcelWriter.ExistingDataMode
make_java_existing_data_mode on_existing_file match_columns = case on_existing_file of
Existing_File_Behavior.Error -> ExistingDataMode.ERROR
Existing_File_Behavior.Overwrite -> ExistingDataMode.REPLACE
Existing_File_Behavior.Backup -> ExistingDataMode.REPLACE
Existing_File_Behavior.Append -> case match_columns of
Match_Columns.By_Name -> ExistingDataMode.APPEND_BY_NAME
Match_Columns.By_Position -> ExistingDataMode.APPEND_BY_INDEX
## PRIVATE
Handle and map the Java errors when reading an Excel file
handle_reader : File -> (Input_Stream -> (Table | Vector)) -> (Table | Vector)
handle_reader file reader =
bad_format caught_panic = Error.throw (File.IO_Error file caught_panic.payload.cause.getMessage)
handle_bad_format = Panic.catch UnsupportedFileFormatException handler=bad_format
bad_argument caught_panic = Error.throw (Invalid_Location_Data caught_panic.payload.cause.getCause)
handle_bad_argument = Panic.catch InvalidLocationException handler=bad_argument
File.handle_java_exceptions file <| handle_bad_argument <| handle_bad_format <|
file.with_input_stream [File.Option.Read] stream->
stream.with_java_stream reader
## PRIVATE
Handle and map the Java errors when writing an Excel file
handle_writer ~writer =
bad_location caught_panic = Error.throw (Invalid_Location_Data caught_panic.payload.cause.getCause)
handle_bad_location = Panic.catch InvalidLocationException handler=bad_location
throw_range_exceeded caught_panic = Error.throw (Range_Exceeded_Data caught_panic.payload.cause.getMessage)
handle_range_exceeded = Panic.catch RangeExceededException handler=throw_range_exceeded
throw_existing_data caught_panic = Error.throw (Existing_Data_Data caught_panic.payload.cause.getMessage)
handle_existing_data = Panic.catch ExistingDataException handler=throw_existing_data
## Should be impossible - occurs if no fallback serializer is provided.
throw_illegal_state caught_panic = Panic.throw (Illegal_State_Error_Data caught_panic.payload.cause.getMessage)
handle_illegal_state = Panic.catch IllegalStateException handler=throw_illegal_state
handle_illegal_state <| Column_Name_Mismatch.handle_java_exception <|
Column_Count_Mismatch.handle_java_exception <| handle_bad_location <|
Illegal_Argument_Error.handle_java_exception <| handle_range_exceeded <| handle_existing_data <|
writer

View File

@ -1,229 +0,0 @@
from Standard.Base import all
import Standard.Base.System
import Standard.Base.Runtime.Ref
import Standard.Base.Error.Common as Errors
import Standard.Table.Data.Table
import Standard.Table.Data.Match_Columns
import Standard.Table.Internal.Delimited_Reader
import Standard.Table.Internal.Delimited_Writer
from Standard.Table.Errors import Unsupported_File_Type_Data
from Standard.Table.Data.Data_Formatter import Data_Formatter, Data_Formatter_Data
import Standard.Table.IO.Excel as Excel_Module
import Standard.Table.IO.Quote_Style
## This type needs to be here to allow for the usage of Standard.Table
functions. Ideally, it would be an interface within Standard.Base and
expanded by additional implementations in Standard.Table.
## Determines the format of file to use based on the file extension.
type Auto
## ADVANCED
Gets the underlying File_Format for the specified file
materialise : File -> File_Format
materialise self file =
extension = file.extension
output = Ref.new Nothing
if ".txt".equals_ignore_case extension then output.put File_Format.Plain_Text_Data
if ".log".equals_ignore_case extension then output.put File_Format.Plain_Text_Data
if ".csv".equals_ignore_case extension then output.put (File_Format.Delimited_Data ',')
if ".tsv".equals_ignore_case extension then output.put (File_Format.Delimited_Data '\t')
if ".xlsx".equals_ignore_case extension then output.put File_Format.Excel_Data
if ".xlsm".equals_ignore_case extension then output.put File_Format.Excel_Data
if ".xls".equals_ignore_case extension then output.put File_Format.Excel_Data
if ".xlt".equals_ignore_case extension then output.put File_Format.Excel_Data
output.get.if_nothing <|
Error.throw (Unsupported_File_Type_Data file.name)
## Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read self file on_problems =
materialised = self.materialise file
materialised.read file on_problems
## Implements the `Table.write` for this `File_Format`.
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing
write_table self file table on_existing_file match_columns on_problems =
materialised = self.materialise file
materialised.write_table file table on_existing_file match_columns on_problems
## Reads the file to a `Vector` of bytes.
type Bytes
## Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read self file _ =
file.read_bytes
## Implements the `Table.write` for this `File_Format`.
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing
write_table self _ _ _ _ _ =
Error.throw (Illegal_Argument_Error "Saving a Table as Bytes is not supported.")
## Reads the file to a `Text` with specified encoding.
type Plain_Text
Plain_Text_Data (encoding:Encoding=Encoding.utf_8)
## Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read self file on_problems =
file.read_text self.encoding on_problems
## Implements the `Table.write` for this `File_Format`.
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing
write_table self _ _ _ _ _ =
Error.throw (Illegal_Argument_Error "Saving a Table as Plain_Text is not directly supported. You may convert the Table to a Text using `Text.from` and then use `Text.write` to write it.")
## Read delimited files such as CSVs into a Table.
type Delimited
## Read delimited files such as CSVs into a Table.
If a row does not match the first row's column count, the function raises
an `Invalid_Row`. If a quote is opened and never closed, a
`Mismatched_Quote` warning occurs.
Arguments:
- delimiter: The delimiter character to split the file into columns. An
`Illegal_Argument_Error` error is returned if this is an empty string.
- encoding: The encoding to use when reading the file.
- skip_rows: The number of rows to skip from the top of the file.
- row_limit: The maximum number of rows to read from the file. This count
does not include the header row (if applicable).
- quote_style: Specifies the style of quotes used for reading and
writing.
- headers: If set to `True`, the first row is used as column names. If
set to `False`, the column names are generated by adding increasing
numeric suffixes to the base name `Column` (i.e. `Column_1`,
`Column_2` etc.). If set to `Infer`, the process tries to infer if
headers are present on the first row. If the column names are not
unique, numeric suffixes will be appended to disambiguate them.
- value_formatter: Formatter to parse text values into numbers, dates,
times, etc. If `Nothing` values are left as Text.
- keep_invalid_rows: Specifies whether rows that contain less or more
columns than expected should be kept (setting the missing columns to
`Nothing` or dropping the excess columns) or dropped.
- line_endings: Sets the line ending style to use. Defaults to `Infer` -
when reading a file or appending to an existing file, the line endings
are detected from file contents; when writing a new file in `Infer`
mode the `Unix` line endings are used.
- comment_character: Sets the character which indicates the start of a
comment within a delimited file. Any line that begins with the comment
character is skipped. The comment character is treated as any other
character if it anywhere else than at the beginning of the line. This
option is only applicable for read mode and does not affect writing. It
defaults to `Nothing` which means that comments are disabled.
Delimited_Data (delimiter:Text) (encoding:Encoding=Encoding.utf_8) (skip_rows:Integer=0) (row_limit:Integer|Nothing=Nothing) (quote_style:Quote_Style=Quote_Style.With_Quotes) (headers:Boolean|Infer=Infer) (value_formatter:Data_Formatter|Nothing=Data_Formatter_Data) (keep_invalid_rows:Boolean=True) (line_endings:Line_Ending_Style=Infer) (comment_character:Text|Nothing=Nothing)
## Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read self file on_problems =
Delimited_Reader.read_file self file on_problems
## Implements the `Table.write` for this `File_Format`.
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing
write_table self file table on_existing_file match_columns on_problems =
Delimited_Writer.write_file table self file on_existing_file match_columns on_problems
## PRIVATE
Clone the instance with some properties overridden.
Note: This function is internal until such time as Atom cloning with modification is built into Enso.
clone : Text -> Text -> (Boolean|Infer) -> Data_Formatter -> Boolean -> (Text|Nothing) -> (Text|Nothing) -> Delimited
clone self (quote_style=self.quote_style) (headers=self.headers) (value_formatter=self.value_formatter) (keep_invalid_rows=self.keep_invalid_rows) (line_endings=self.line_endings) (comment_character=self.comment_character) =
Delimited_Data self.delimiter self.encoding self.skip_rows self.row_limit quote_style headers value_formatter keep_invalid_rows line_endings comment_character
## Create a clone of this with specified quoting settings.
with_quotes : Text -> Text -> Boolean -> Delimited
with_quotes self quote='"' quote_escape=quote always_quote=False =
self.clone quote_style=(Quote_Style.With_Quotes always_quote=always_quote quote=quote quote_escape=quote_escape)
## Create a clone of this with specified quoting settings.
without_quotes : Delimited
without_quotes self =
self.clone quote_style=Quote_Style.No_Quotes
## Create a clone of this with first row treated as header.
with_headers : Delimited
with_headers self = self.clone headers=True
## Create a clone of this where the first row is treated as data, not a
header.
without_headers : Delimited
without_headers self = self.clone headers=False
## Create a clone of this with value parsing.
A custom `Data_Formatter` can be provided to customize parser options.
with_parsing : Data_Formatter -> Delimited
with_parsing self (value_formatter=Data_Formatter_Data) =
self.clone value_formatter=value_formatter
## Create a clone of this without value parsing.
without_parsing : Delimited
without_parsing self =
self.clone value_formatter=Nothing
## Creates a clone of this with a changed line ending style.
with_line_endings : Line_Ending_Style -> Delimited
with_line_endings self line_endings=Infer =
self.clone line_endings=line_endings
## Creates a clone of this with comment parsing enabled.
with_comments : Text -> Delimited
with_comments self comment_character='#' =
self.clone comment_character=comment_character
## Creates a clone of this with comment parsing disabled.
without_comments : Delimited
without_comments self =
self.clone comment_character=Nothing
## A setting to infer the default behaviour of some option.
type Infer
## Read the file to a `Table` from an Excel file
type Excel
## Read Excels files into a Table or Vector.
Arguments:
- section: The `Excel_Section` to read from the workbook.
This can be one of:
- `Sheet_Names` - outputs a `Vector` of sheet names.
- `Range_Names` - outputs a `Vector` of range names.
- `Sheet` - outputs a `Table` containing the specified sheet.
- `Cell_Range` - outputs a `Table` containing the specified range.
- headers: If set to `True`, the first row is used as column names. If
set to `False`, the column names are Excel column names. If set to
`Infer`, the process tries to infer if headers are present on the first
row. If the column names are not unique, numeric suffixes will be
appended to disambiguate them.
- xls_format:
If set to `True`, the file is read as an Excel 95-2003 format.
If set to `False`, the file is read as an Excel 2007+ format.
`Infer` will attempt to deduce this from the extension of the filename.
Excel_Data (section:Excel_Section=Excel_Module.Sheet) (headers:(True|False|Infer)=Infer) (xls_format:(True|False|Infer)=Infer)
## Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read self file on_problems =
format = Excel.is_xls_format self.xls_format file
Excel_Module.read_excel file self.section self.headers on_problems format
## Implements the `Table.write` for this `File_Format`.
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing
write_table self file table on_existing_file match_columns on_problems =
format = Excel.is_xls_format self.xls_format file
case self.section of
Excel_Module.Sheet_Names -> Error.throw (Illegal_Argument_Error "Sheet_Names cannot be used for `write`.")
Excel_Module.Range_Names -> Error.throw (Illegal_Argument_Error "Range_Names cannot be used for `write`.")
_ -> Excel_Module.write_excel file table on_existing_file self.section self.headers match_columns on_problems format
## PRIVATE
Resolve the xls_format setting to a boolean.
is_xls_format : (Boolean|Infer) -> File -> Boolean
is_xls_format xls_format file =
if xls_format != Infer then xls_format else
extension = file.extension
(extension.equals_ignore_case ".xls") || (extension.equals_ignore_case ".xlt")

View File

@ -1,7 +1,8 @@
from Standard.Base import all
from Standard.Base import Any, Text, Problem_Behavior, Nothing, Error, Panic, Meta, File, File_Format
from Standard.Base.Error.Problem_Behavior import Report_Warning
from Standard.Base.Error.Common import Unsupported_File_Type_Data, No_Such_Method_Error_Data, Illegal_Argument_Error_Data
import Standard.Table.IO.File_Format
from Standard.Table.IO.Auto_Detect import Auto_Detect
## ALIAS Read Text File, Read File
@ -10,7 +11,10 @@ import Standard.Table.IO.File_Format
Arguments:
- path: The path of the file to open and read the contents of. It will
accept a textual path or a file.
- format: The text encoding to decode the file with. Defaults to UTF-8.
- format: A File_Format object used to read file into memory.
If `Auto_Detect` is specified; the provided file determines the specific
type and configures it appropriately. If there is no matching type then
a `Unsupported_File_Type` error is returned.
- on_problems: Specifies the behavior when a problem occurs during the
function.
By default, a warning is issued, but the operation proceeds.
@ -21,15 +25,18 @@ import Standard.Table.IO.File_Format
If you have a variable `file` of type `File`, we recommend calling the
`.read` method on it directly, rather than using `File.read file`. The
later, however, will still work.
File.read : (Text | File) -> File_Format -> Problem_Behavior -> Any ! File_Error
File.read path (format=File_Format.Auto) (on_problems=Report_Warning) =
File.read : (Text | File.File) -> File_Format -> Problem_Behavior -> Any ! File.File_Error | Unsupported_File_Type
File.read path (format=Auto_Detect) (on_problems=Report_Warning) =
file = File.new path
file.read format on_problems
## Read a file using the specified file format
Arguments:
- format: The text encoding to decode the file with. Defaults to UTF-8.
- format: A File_Format object used to read file into memory.
If `Auto_Detect` is specified; the provided file determines the specific
type and configures it appropriately. If there is no matching type then
a `Unsupported_File_Type` error is returned.
- on_problems: Specifies the behavior when a problem occurs during the
function.
By default, a warning is issued, but the operation proceeds.
@ -48,12 +55,10 @@ File.read path (format=File_Format.Auto) (on_problems=Report_Warning) =
Read the sheet named `Dates` from an XLS and convert it to a table.
import Standard.Table
from Standard.Table.IO.File_Format import Excel
from Standard.Table.IO.Excel import Excel_Section
from Standard.Table import Excel, Worksheet
import Standard.Examples
example_xls_to_table = Examples.xls.read (Excel (Sheet 'Dates'))
File.File.read : File_Format -> Problem_Behavior -> Any ! File_Error
File.File.read self (format=File_Format.Auto) (on_problems=Report_Warning) =
example_xls_to_table = Examples.xls.read (Excel (Worksheet 'Dates'))
File.File.read : File_Format -> Problem_Behavior -> Any ! File.File_Error | Unsupported_File_Type
File.File.read self format=Auto_Detect (on_problems=Report_Warning) =
format.read self on_problems

View File

@ -1,6 +1,6 @@
from Standard.Base import all
from Standard.Table.Errors as Table_Errors import Invalid_Format_Data, Leading_Zeros_Data
from Standard.Table.Errors import Invalid_Format_Data, Leading_Zeros_Data
polyglot java import org.enso.table.parsing.problems.InvalidFormat
polyglot java import org.enso.table.parsing.problems.LeadingZeros

View File

@ -1,4 +1,5 @@
from Standard.Base import all
import Standard.Base.System.File_Format
import project.Data.Table
import project.Data.Column
@ -8,11 +9,17 @@ import project.Data.Sort_Column_Selector
import project.Data.Column_Name_Mapping
import project.Data.Data_Formatter
import project.Data.Match_Columns
import project.Data.Aggregate_Column
import project.IO.File_Read
import project.IO.File_Format
import project.IO.Excel
import project.IO.Quote_Style
import project.IO.Auto_Detect
import project.Delimited.Quote_Style
import project.Delimited.Delimited_Format
import project.Excel.Section
import project.Excel.Range
import project.Excel.Excel_Format
import project.Errors
@ -23,12 +30,18 @@ export project.Data.Sort_Column
export project.Data.Sort_Column_Selector
export project.Data.Column_Name_Mapping
export project.Data.Match_Columns
export project.Data.Aggregate_Column
export project.IO.File_Read
export project.IO.File_Format
export project.IO.Quote_Style
from project.IO.Auto_Detect export Auto_Detect
export project.Delimited.Quote_Style
from project.Delimited.Delimited_Format export Delimited_Format, Delimited
from project.Excel.Excel_Format export Excel_Format, Excel
from project.Excel.Section export Excel_Section, Sheet_Names, Range_Names, Worksheet, Cell_Range
from project.Excel.Range export Excel_Range
from project.IO.Excel export Excel_Section, Excel_Range
from project.Data.Data_Formatter export Data_Formatter, Data_Formatter_Data
import Standard.Geo.Geo_Json
@ -110,3 +123,4 @@ type Invalid_Format_Error
to_display_text : Text
to_display_text self =
"The input " + self.input.to_text + " had an invalid format due to: " + self.message.to_text + "."

View File

@ -1,8 +1,7 @@
from Standard.Base import all
import Standard.Table.Data.Column
from Standard.Table import Table, Column
import Standard.Table.Data.Storage
import Standard.Table.Data.Table
## PRIVATE

View File

@ -1,7 +1,6 @@
from Standard.Base import all
import Standard.Table.Data.Column
import Standard.Table.Data.Table
from Standard.Table import Table, Column
import Standard.Visualization.Helpers
## PRIVATE

View File

@ -1,7 +1,6 @@
from Standard.Base import all
import Standard.Table.Data.Column
import Standard.Table.Data.Table
from Standard.Table import Table, Column
import Standard.Visualization.Helpers
from Standard.Base.Data.Index_Sub_Range import Sample

View File

@ -3,7 +3,7 @@ from Standard.Base.Data.Index_Sub_Range import While, Sample, Every
import Standard.Base.Data.Index_Sub_Range
from Standard.Table import Column_Name_Mapping, Sort_Column, Sort_Column_Selector
from Standard.Table.Errors as Table_Errors import all
from Standard.Table.Errors import all
from Standard.Table.Data.Column_Selector import all
from Standard.Table.Data.Position import all

View File

@ -1,7 +1,7 @@
from Standard.Base import all
import Standard.Table
from Standard.Table import Column, File_Format
from Standard.Table import Column, Delimited
from Standard.Table.Data.Column_Selector import By_Index
import Standard.Test
@ -17,7 +17,7 @@ spec =
Test.group "Table.from Text" <|
Test.specify "should create a table from a textual CSV" <|
file_contents = (enso_project.data / "simple_empty.csv") . read_text
table = Table.Table.from file_contents (format = File_Format.Delimited_Data ",")
table = Table.Table.from file_contents (format = Delimited ",")
table.should_equal expected_table
Test.group "File.read (Delimited)" <|
@ -43,13 +43,13 @@ spec =
csv = """
name,x,y,x,y
foo,10,20,30,20
t = Table.Table.from csv (format = File_Format.Delimited_Data ",")
t = Table.Table.from csv (format = Delimited ",")
t.columns.map .name . should_equal ['name', 'x', 'y', 'x_1', 'y_1']
Test.group 'Writing' <|
Test.specify 'should properly serialize simple tables' <|
varied_column = (enso_project.data / "varied_column.csv") . read
res = Text.from varied_column format=(File_Format.Delimited_Data ",")
res = Text.from varied_column format=(Delimited ",")
exp = normalize_lines <| '''
Column_1,Column_2,Column_3,Column_4,Column_5,Column_6
2005-02-25,2005-02-25,1,1,1.0,1
@ -73,7 +73,7 @@ spec =
"This;Name;;Is""""Strange";20
Marcin,,;"hello;world"
res = Text.from t format=(File_Format.Delimited_Data ";")
res = Text.from t format=(Delimited ";")
res.should_equal expected
Test.specify 'should allow forced quoting of records'
@ -87,7 +87,7 @@ spec =
"This;Name;;Is""""Strange",20
"Marcin,,","hello;world"
res = Text.from t format=(File_Format.Delimited_Data "," . with_quotes always_quote=True)
res = Text.from t format=(Delimited "," . with_quotes always_quote=True)
res.should_equal expected

View File

@ -2,9 +2,9 @@ from Standard.Base import all
import Standard.Table as Materialized_Table
from Standard.Table import Sort_Column, Sort_Column_Selector
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name
from Standard.Table.Data.Column_Selector import By_Name
from Standard.Table.Data.Aggregate_Column import all
from Standard.Table.Errors as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns_Data
from Standard.Table.Errors import No_Input_Columns_Selected, Missing_Input_Columns_Data
from Standard.Database import all

View File

@ -1,8 +1,7 @@
from Standard.Base import all
import Standard.Table
from Standard.Table import Column, File_Format, Data_Formatter_Data, Quote_Style
from Standard.Table.IO.File_Format import Delimited_Data
from Standard.Table import Column, Data_Formatter_Data, Quote_Style, Delimited
from Standard.Table.Errors import all
import Standard.Test
@ -17,7 +16,7 @@ spec =
c_2 = ["b", ['2', Nothing, '8', '11']]
c_3 = ["c", [Nothing, '6', '9', '12']]
expected_table = Table.new [c_1, c_2, c_3]
simple_empty = File.read (enso_project.data / "simple_empty.csv") (Delimited_Data "," headers=True value_formatter=Nothing)
simple_empty = File.read (enso_project.data / "simple_empty.csv") (Delimited "," headers=True value_formatter=Nothing)
simple_empty.should_equal expected_table
Test.specify "should load a simple table without headers" <|
@ -25,11 +24,11 @@ spec =
c_2 = ["Column_2", ['b', '2', Nothing, '8', '11']]
c_3 = ["Column_3", ['c', Nothing, '6', '9', '12']]
expected_table = Table.new [c_1, c_2, c_3]
simple_empty = File.read (enso_project.data / "simple_empty.csv") (Delimited_Data "," headers=False value_formatter=Nothing)
simple_empty = File.read (enso_project.data / "simple_empty.csv") (Delimited "," headers=False value_formatter=Nothing)
simple_empty.should_equal expected_table
Test.specify "should work in presence of missing headers" <|
action on_problems = File.read (enso_project.data / "missing_header.csv") (Delimited_Data "," headers=True value_formatter=Nothing) on_problems
action on_problems = File.read (enso_project.data / "missing_header.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
tester table =
table.columns.map .name . should_equal ["a", "Column_1", "c", "Column_2", "d"]
table.at "a" . to_vector . should_equal ["1"]
@ -41,61 +40,61 @@ spec =
Problems.test_problem_handling action problems tester
Test.specify "should infer headers based on the first two rows" <|
t1 = File.read (enso_project.data / "data_small.csv") (Delimited_Data "," headers=File_Format.Infer)
t1 = File.read (enso_project.data / "data_small.csv") (Delimited ",")
t1.columns.map .name . should_equal ["Code", "Index", "Flag", "Value", "ValueWithNothing", "TextWithNothing", "Hexadecimal", "Leading0s", "QuotedNumbers", "Mixed Types"]
t2 = File.read (enso_project.data / "all_text.csv") (Delimited_Data "," headers=File_Format.Infer)
t2 = File.read (enso_project.data / "all_text.csv") (Delimited ",")
t2.columns.map .name . should_equal ["Column_1", "Column_2"]
t2.at "Column_1" . to_vector . should_equal ["a", "c", "e", "g"]
t2.at "Column_2" . to_vector . should_equal ["b", "d", "f", "h"]
t3 = File.read (enso_project.data / "two_rows1.csv") (Delimited_Data "," headers=File_Format.Infer)
t3 = File.read (enso_project.data / "two_rows1.csv") (Delimited ",")
t3.columns.map .name . should_equal ["a", "b", "c"]
t3.at "a" . to_vector . should_equal ["x"]
t3.at "b" . to_vector . should_equal [Nothing]
t3.at "c" . to_vector . should_equal [Nothing]
t4 = File.read (enso_project.data / "two_rows2.csv") (Delimited_Data "," headers=File_Format.Infer)
t4 = File.read (enso_project.data / "two_rows2.csv") (Delimited ",")
t4.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
t4.at "Column_1" . to_vector . should_equal ["a", "d"]
t4.at "Column_2" . to_vector . should_equal ["b", "e"]
t4.at "Column_3" . to_vector . should_equal ["c", "f"]
t5 = File.read (enso_project.data / "numbers_in_header.csv") (Delimited_Data "," headers=File_Format.Infer)
t5 = File.read (enso_project.data / "numbers_in_header.csv") (Delimited ",")
t5.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
t5.at "Column_1" . to_vector . should_equal ["a", "1"]
t5.at "Column_2" . to_vector . should_equal ["b", "2"]
t5.at "Column_3" . to_vector . should_equal [0, 3]
t6 = File.read (enso_project.data / "quoted_numbers_in_header.csv") (Delimited_Data "," headers=File_Format.Infer)
t6 = File.read (enso_project.data / "quoted_numbers_in_header.csv") (Delimited ",")
t6.columns.map .name . should_equal ["1", "x"]
t6.at "1" . to_vector . should_equal ["y"]
t6.at "x" . to_vector . should_equal [2]
Test.specify "should not use the first row as headers if it is the only row, unless specifically asked to" <|
t1 = File.read (enso_project.data / "one_row.csv") (Delimited_Data "," headers=File_Format.Infer)
t1 = File.read (enso_project.data / "one_row.csv") (Delimited ",")
t1.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
t1.at "Column_1" . to_vector . should_equal ["x"]
t1.at "Column_2" . to_vector . should_equal ["y"]
t1.at "Column_3" . to_vector . should_equal ["z"]
t2 = File.read (enso_project.data / "one_row.csv") (Delimited_Data "," headers=True)
t2 = File.read (enso_project.data / "one_row.csv") (Delimited "," headers=True)
t2.columns.map .name . should_equal ["x", "y", "z"]
t2.row_count . should_equal 0
t2.at "x" . to_vector . should_equal []
Test.specify "should be able to load even an empty file" <|
table = File.read (enso_project.data / "empty.txt") (Delimited_Data "," headers=True value_formatter=Nothing)
table = File.read (enso_project.data / "empty.txt") (Delimited "," headers=True value_formatter=Nothing)
table.columns.map .name . should_equal []
table.row_count . should_equal 0
Test.specify "should correctly handle file opening issues" <|
nonexistent_file = enso_project.data / "a_filename_that_does_not_exist.foobar"
r1 = File.read nonexistent_file (Delimited_Data "," headers=True value_formatter=Nothing)
r1 = File.read nonexistent_file (Delimited "," headers=True value_formatter=Nothing)
r1.should_fail_with File.File_Not_Found
directory = enso_project.data
r2 = File.read directory (Delimited_Data "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error
r2 = File.read directory (Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error
r2.should_fail_with File.IO_Error
Test.specify "should work with all kinds of line endings" <|
@ -106,7 +105,7 @@ spec =
text.write (path name)
test_file name =
table = File.read (path name) (Delimited_Data "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error
table = File.read (path name) (Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error
table.columns.map .name . should_equal ['a', 'b', 'c']
table.at 'a' . to_vector . should_equal ['d', '1']
table.at 'b' . to_vector . should_equal ['e', '2']
@ -121,7 +120,7 @@ spec =
# Currently mixed line endings are not supported.
'a,b,c\nd,e,f\r1,2,3'.write (path 'mixed.csv')
File.read (path 'mixed.csv') (Delimited_Data "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error . should_fail_with Invalid_Row_Data
File.read (path 'mixed.csv') (Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error . should_fail_with Invalid_Row_Data
['crlf.csv', 'lf.csv', 'cr.csv', 'mixed.csv'].each (path >> .delete)
@ -131,7 +130,7 @@ spec =
text = lines.join '\n'
text.write file
format = Delimited_Data ',' headers=False value_formatter=(Data_Formatter_Data trim_values=False)
format = Delimited ',' headers=False value_formatter=(Data_Formatter_Data trim_values=False)
reference_table = Table.new [["Column_1", ["a", "d", "1"]], ["Column_2", ["b", "e", "2"]], ["Column_3", ["c", "f", "3"]]]
collapsed_table = Table.new <|
@ -153,14 +152,14 @@ spec =
file_2.delete
Test.specify "should work with Windows-1252 encoding" <|
table = File.read (enso_project.data / "windows.csv") (Delimited_Data "," headers=True encoding=Encoding.windows_1252) Problem_Behavior.Report_Error
table = File.read (enso_project.data / "windows.csv") (Delimited "," headers=True encoding=Encoding.windows_1252) Problem_Behavior.Report_Error
table.columns.map .name . should_equal ['a', 'b', 'c']
table.at 'a' . to_vector . should_equal ['$¢']
table.at 'b' . to_vector . should_equal ['¤']
table.at 'c' . to_vector . should_equal ['¥']
Test.specify "should work with UTF-16 encoding" <|
table = File.read (enso_project.data / "utf16.csv") (Delimited_Data "," headers=True encoding=Encoding.utf_16_be) Problem_Behavior.Report_Error
table = File.read (enso_project.data / "utf16.csv") (Delimited "," headers=True encoding=Encoding.utf_16_be) Problem_Behavior.Report_Error
table.columns.map .name . should_equal ['ą', '🚀b', 'ć😎']
table.at 'ą' . to_vector . should_equal ['ą']
table.at '🚀b' . to_vector . should_equal ['✨🚀🚧😍😃😍😎😙😉☺']
@ -171,7 +170,7 @@ spec =
utf8_bytes = [97, 44, 98, 44, 99, 10, -60, -123, 44, -17, -65, -65, 44, -61, 40, -61, 40, 10]
utf8_bytes.write_bytes utf8_file
action_1 on_problems =
utf8_file.read (Delimited_Data "," headers=True) on_problems
utf8_file.read (Delimited "," headers=True) on_problems
tester_1 table =
table.columns.map .name . should_equal ['a', 'b', 'c']
table.at 'a' . to_vector . should_equal ['ą']
@ -182,7 +181,7 @@ spec =
utf8_file.delete
action_2 on_problems =
(enso_project.data / "utf16_invalid.csv").read (Delimited_Data "," headers=True encoding=Encoding.utf_16_be) on_problems
(enso_project.data / "utf16_invalid.csv").read (Delimited "," headers=True encoding=Encoding.utf_16_be) on_problems
tester_2 table =
table.columns.map .name . should_equal ['a', 'b', 'c']
# This column does not raise a problem - the '\uFFFD' is simply present in the input file.
@ -194,7 +193,7 @@ spec =
Problems.test_problem_handling action_2 problems_2 tester_2
Test.specify "should handle duplicated columns" <|
action on_problems = File.read (enso_project.data / "duplicated_columns.csv") (Delimited_Data "," headers=True value_formatter=Nothing) on_problems
action on_problems = File.read (enso_project.data / "duplicated_columns.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
tester table =
table.columns.map .name . should_equal ['a', 'b', 'c', 'a_1']
table.at 'a' . to_vector . should_equal ['1']
@ -203,27 +202,27 @@ spec =
Problems.test_problem_handling action problems tester
Test.specify "should handle quotes" <|
t1 = File.read (enso_project.data / "double_quoted.csv") (Delimited_Data "," headers=True value_formatter=Nothing)
t1 = File.read (enso_project.data / "double_quoted.csv") (Delimited "," headers=True value_formatter=Nothing)
t1.at 'a' . to_vector . should_equal ['a, x', '"a']
t1.at 'c' . to_vector . should_equal ['3', '"']
t2 = File.read (enso_project.data / "escape_quoted.csv") (Delimited_Data "," headers=True value_formatter=Nothing . with_quotes quote_escape="\")
t2 = File.read (enso_project.data / "escape_quoted.csv") (Delimited "," headers=True value_formatter=Nothing . with_quotes quote_escape="\")
t2.at 'a' . to_vector . should_equal ['a"b', 'a\\\"z']
t3 = File.read (enso_project.data / "no_quoting.csv") (Delimited_Data "," headers=True value_formatter=Nothing . without_quotes)
t3 = File.read (enso_project.data / "no_quoting.csv") (Delimited "," headers=True value_formatter=Nothing . without_quotes)
t3.at 'a' . to_vector . should_equal ['"y']
t3.at 'b' . to_vector . should_equal ['z"']
t3.at 'c' . to_vector . should_equal ['a']
Test.specify "should support rows spanning multiple lines if quoted" <|
t1 = File.read (enso_project.data / "multiline_quoted.csv") (Delimited_Data "," headers=True value_formatter=Nothing)
t1 = File.read (enso_project.data / "multiline_quoted.csv") (Delimited "," headers=True value_formatter=Nothing)
t1.at 'a' . to_vector . should_equal ['1', '4']
t1.at 'b' . to_vector . should_equal ['start\n\ncontinue', '5']
t1.at 'c' . to_vector . should_equal ['3', '6']
Test.specify "should behave correctly in presence of a mismatched quote" <|
action_1 on_problems =
File.read (enso_project.data / "mismatched_quote.csv") (Delimited_Data "," headers=True value_formatter=Nothing) on_problems
File.read (enso_project.data / "mismatched_quote.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
tester_1 table =
table.columns.map .name . should_equal ['a', 'b', 'c']
@ -234,7 +233,7 @@ spec =
Problems.test_problem_handling action_1 problems_1 tester_1
action_2 on_problems =
File.read (enso_project.data / "mismatched_quote2.csv") (Delimited_Data "," headers=True value_formatter=Nothing) on_problems
File.read (enso_project.data / "mismatched_quote2.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
tester_2 table =
table.columns.map .name . should_equal ['a', 'b', 'c']
@ -246,7 +245,7 @@ spec =
Test.specify "should handle too long and too short rows" <|
action keep_invalid_rows on_problems =
File.read (enso_project.data / "varying_rows.csv") (Delimited_Data "," headers=True keep_invalid_rows=keep_invalid_rows value_formatter=Nothing) on_problems
File.read (enso_project.data / "varying_rows.csv") (Delimited "," headers=True keep_invalid_rows=keep_invalid_rows value_formatter=Nothing) on_problems
tester_kept table =
table.columns.map .name . should_equal ['a', 'b', 'c']
@ -266,7 +265,7 @@ spec =
Test.specify "should aggregate invalid rows over some limit" <|
action on_problems =
File.read (enso_project.data / "many_invalid_rows.csv") (Delimited_Data "," headers=True keep_invalid_rows=False value_formatter=Nothing) on_problems
File.read (enso_project.data / "many_invalid_rows.csv") (Delimited "," headers=True keep_invalid_rows=False value_formatter=Nothing) on_problems
tester table =
table.columns.map .name . should_equal ['a', 'b', 'c']
@ -277,45 +276,45 @@ spec =
Problems.test_problem_handling action problems tester
Test.specify "should allow to skip rows" <|
t1 = File.read (enso_project.data / "simple_empty.csv") (Delimited_Data "," headers=False skip_rows=3 value_formatter=Nothing)
t1 = File.read (enso_project.data / "simple_empty.csv") (Delimited "," headers=False skip_rows=3 value_formatter=Nothing)
t1.at "Column_1" . to_vector . should_equal ['7', '10']
t2 = File.read (enso_project.data / "simple_empty.csv") (Delimited_Data "," headers=True skip_rows=3 value_formatter=Nothing)
t2 = File.read (enso_project.data / "simple_empty.csv") (Delimited "," headers=True skip_rows=3 value_formatter=Nothing)
t2.columns.map .name . should_equal ['7', '8', '9']
t2.at "7" . to_vector . should_equal ['10']
Test.specify "should allow to set a limit of rows to read" <|
t1 = File.read (enso_project.data / "simple_empty.csv") (Delimited_Data "," headers=False row_limit=2 value_formatter=Nothing)
t1 = File.read (enso_project.data / "simple_empty.csv") (Delimited "," headers=False row_limit=2 value_formatter=Nothing)
t1.at "Column_1" . to_vector . should_equal ['a', '1']
t2 = File.read (enso_project.data / "simple_empty.csv") (Delimited_Data "," headers=True row_limit=2 value_formatter=Nothing)
t2 = File.read (enso_project.data / "simple_empty.csv") (Delimited "," headers=True row_limit=2 value_formatter=Nothing)
t2.at "a" . to_vector . should_equal ['1', '4']
t3 = File.read (enso_project.data / "simple_empty.csv") (Delimited_Data "," headers=False skip_rows=3 row_limit=1 value_formatter=Nothing)
t3 = File.read (enso_project.data / "simple_empty.csv") (Delimited "," headers=False skip_rows=3 row_limit=1 value_formatter=Nothing)
t3.at "Column_1" . to_vector . should_equal ['7']
t4 = File.read (enso_project.data / "simple_empty.csv") (Delimited_Data "," headers=False row_limit=0 value_formatter=Nothing)
t4 = File.read (enso_project.data / "simple_empty.csv") (Delimited "," headers=False row_limit=0 value_formatter=Nothing)
t4.columns.map .name . should_equal ['Column_1', 'Column_2', 'Column_3']
t4.row_count . should_equal 0
t5 = File.read (enso_project.data / "simple_empty.csv") (Delimited_Data "," headers=True row_limit=0 value_formatter=Nothing)
t5 = File.read (enso_project.data / "simple_empty.csv") (Delimited "," headers=True row_limit=0 value_formatter=Nothing)
t5.columns.map .name . should_equal ['a', 'b', 'c']
t5.at 'a' . to_vector . should_equal []
t5.row_count . should_equal 0
t6 = File.read (enso_project.data / "simple_empty.csv") (Delimited_Data "," headers=False skip_rows=3 row_limit=1000 value_formatter=Nothing)
t6 = File.read (enso_project.data / "simple_empty.csv") (Delimited "," headers=False skip_rows=3 row_limit=1000 value_formatter=Nothing)
t6.at "Column_1" . to_vector . should_equal ['7', '10']
Test.specify "should check arguments" <|
path = (enso_project.data / "simple_empty.csv")
pb = Problem_Behavior.Report_Error
path.read (Delimited_Data "," headers=False . with_quotes quote='abc') pb . should_fail_with Illegal_Argument_Error_Data
path.read (Delimited_Data "," headers=False . with_quotes quote='🚧') pb . should_fail_with Illegal_Argument_Error_Data
path.read (Delimited_Data "," headers=False . with_quotes quote_escape='//') pb . should_fail_with Illegal_Argument_Error_Data
path.read (Delimited_Data 'a\u{301}' headers=False) pb . should_fail_with Illegal_Argument_Error_Data
path.read (Delimited "," headers=False . with_quotes quote='abc') pb . should_fail_with Illegal_Argument_Error_Data
path.read (Delimited "," headers=False . with_quotes quote='🚧') pb . should_fail_with Illegal_Argument_Error_Data
path.read (Delimited "," headers=False . with_quotes quote_escape='//') pb . should_fail_with Illegal_Argument_Error_Data
path.read (Delimited 'a\u{301}' headers=False) pb . should_fail_with Illegal_Argument_Error_Data
Test.specify "should correctly guess column types" <|
t = (enso_project.data / "data_small.csv") . read (Delimited_Data "," headers=True)
t = (enso_project.data / "data_small.csv") . read (Delimited "," headers=True)
t.at "Code" . to_vector . should_equal ["gxl", "wca", "nfw", "der"]
t.at "Index" . to_vector . should_equal [7, 0, 1, 7]
t.at "Flag" . to_vector . should_equal [True, False, True, True]
@ -327,7 +326,7 @@ spec =
t.at "QuotedNumbers" . to_vector . should_equal ["1", "2", Nothing, "34"]
t.at "Mixed Types" . to_vector . should_equal ["33", Nothing, "45", "True"]
t2 = (enso_project.data / "data_small.csv") . read (Delimited_Data "," headers=True value_formatter=(Data_Formatter_Data allow_leading_zeros=True))
t2 = (enso_project.data / "data_small.csv") . read (Delimited "," headers=True value_formatter=(Data_Formatter_Data allow_leading_zeros=True))
t2.at "Leading0s" . to_vector . should_equal [1, 2, 123, Nothing]
Test.specify "should be able to detect types automatically" <|
@ -346,7 +345,7 @@ spec =
a,b,c
1,2,3
4,5,6
t1 = Table.Table.from text1 (format = Delimited_Data ",")
t1 = Table.Table.from text1 (format = Delimited ",")
t1.columns.map .name . should_equal ["a", "b", "c"]
t1.at "a" . to_vector . should_equal [1, 4]
t1.at "b" . to_vector . should_equal [2, 5]
@ -367,32 +366,32 @@ spec =
table_hash = Table.new [["a", [";1", "5"]], ["42", [2, 6]], ["c # comment??", ["3", "7;comment?"]]]
table_semicolon = Table.new [["#", ["a", "5"]], ["x", [42, 6]], ["y", ["c # comment??", "7;comment?"]]]
File.read (enso_project.data / "comments.csv") (Delimited_Data ',' . with_comments . with_headers) . should_equal table_hash
File.read (enso_project.data / "comments.csv") (Delimited_Data ',' . with_comments ';' . with_headers) . should_equal table_semicolon
File.read (enso_project.data / "comments.csv") (Delimited ',' . with_comments . with_headers) . should_equal table_hash
File.read (enso_project.data / "comments.csv") (Delimited ',' . with_comments ';' . with_headers) . should_equal table_semicolon
Test.specify "should allow to build the Delimited configuration using builders" <|
Delimited_Data "," . clone . should_equal (Delimited_Data ",")
Delimited_Data "," encoding=Encoding.ascii skip_rows=123 row_limit=100 headers=False value_formatter=Nothing . clone . should_equal (Delimited_Data "," headers=False value_formatter=Nothing skip_rows=123 row_limit=100 encoding=Encoding.ascii)
Delimited_Data "," . clone quote_style=Quote_Style.No_Quotes headers=False value_formatter=Nothing . should_equal (Delimited_Data "," headers=False value_formatter=Nothing quote_style=Quote_Style.No_Quotes)
Delimited "," . clone . should_equal (Delimited ",")
Delimited "," encoding=Encoding.ascii skip_rows=123 row_limit=100 headers=False value_formatter=Nothing . clone . should_equal (Delimited "," headers=False value_formatter=Nothing skip_rows=123 row_limit=100 encoding=Encoding.ascii)
Delimited "," . clone quote_style=Quote_Style.No_Quotes headers=False value_formatter=Nothing . should_equal (Delimited "," headers=False value_formatter=Nothing quote_style=Quote_Style.No_Quotes)
Delimited_Data '\t' . with_quotes "|" . should_equal (Delimited_Data '\t' quote_style=(Quote_Style.With_Quotes quote='|' quote_escape='|'))
Delimited_Data '\t' . with_quotes "-" '\\' True . should_equal (Delimited_Data '\t' quote_style=(Quote_Style.With_Quotes always_quote=True quote='-' quote_escape='\\'))
Delimited_Data '\t' . without_quotes . should_equal (Delimited_Data '\t' quote_style=Quote_Style.No_Quotes)
Delimited '\t' . with_quotes "|" . should_equal (Delimited '\t' quote_style=(Quote_Style.With_Quotes quote='|' quote_escape='|'))
Delimited '\t' . with_quotes "-" '\\' True . should_equal (Delimited '\t' quote_style=(Quote_Style.With_Quotes always_quote=True quote='-' quote_escape='\\'))
Delimited '\t' . without_quotes . should_equal (Delimited '\t' quote_style=Quote_Style.No_Quotes)
Delimited_Data ',' . with_headers . should_equal (Delimited_Data ',' headers=True)
Delimited_Data ',' . without_headers . should_equal (Delimited_Data ',' headers=False)
Delimited_Data "," skip_rows=123 headers=False value_formatter=Nothing quote_style=Quote_Style.No_Quotes . with_headers . should_equal (Delimited_Data "," skip_rows=123 value_formatter=Nothing quote_style=Quote_Style.No_Quotes headers=True)
Delimited_Data "," skip_rows=123 headers=True value_formatter=Nothing quote_style=Quote_Style.No_Quotes . without_headers . should_equal (Delimited_Data "," skip_rows=123 value_formatter=Nothing quote_style=Quote_Style.No_Quotes headers=False)
Delimited ',' . with_headers . should_equal (Delimited ',' headers=True)
Delimited ',' . without_headers . should_equal (Delimited ',' headers=False)
Delimited "," skip_rows=123 headers=False value_formatter=Nothing quote_style=Quote_Style.No_Quotes . with_headers . should_equal (Delimited "," skip_rows=123 value_formatter=Nothing quote_style=Quote_Style.No_Quotes headers=True)
Delimited "," skip_rows=123 headers=True value_formatter=Nothing quote_style=Quote_Style.No_Quotes . without_headers . should_equal (Delimited "," skip_rows=123 value_formatter=Nothing quote_style=Quote_Style.No_Quotes headers=False)
Delimited_Data ',' . with_parsing . should_equal (Delimited_Data ',')
Delimited_Data ',' . without_parsing . should_equal (Delimited_Data ',' value_formatter=Nothing)
Delimited ',' . with_parsing . should_equal (Delimited ',')
Delimited ',' . without_parsing . should_equal (Delimited ',' value_formatter=Nothing)
custom_formatter = Data_Formatter_Data true_values=["A", "B", "C"] false_values=["D", "E", "F"]
Delimited_Data ',' . with_parsing custom_formatter . should_equal (Delimited_Data ',' value_formatter=custom_formatter)
Delimited_Data ',' row_limit=456 . without_parsing . should_equal (Delimited_Data ',' value_formatter=Nothing row_limit=456)
Delimited ',' . with_parsing custom_formatter . should_equal (Delimited ',' value_formatter=custom_formatter)
Delimited ',' row_limit=456 . without_parsing . should_equal (Delimited ',' value_formatter=Nothing row_limit=456)
Delimited_Data ',' . with_comments . should_equal (Delimited_Data ',' comment_character='#')
Delimited_Data ',' . with_comments ';' . should_equal (Delimited_Data ',' comment_character=';')
Delimited_Data ',' comment_character='#' . without_comments . should_equal (Delimited_Data ',' comment_character=Nothing)
Delimited_Data ',' . with_line_endings Line_Ending_Style.Unix . should_equal (Delimited_Data ',' line_endings=Line_Ending_Style.Unix)
Delimited ',' . with_comments . should_equal (Delimited ',' comment_character='#')
Delimited ',' . with_comments ';' . should_equal (Delimited ',' comment_character=';')
Delimited ',' comment_character='#' . without_comments . should_equal (Delimited ',' comment_character=Nothing)
Delimited ',' . with_line_endings Line_Ending_Style.Unix . should_equal (Delimited ',' line_endings=Line_Ending_Style.Unix)
main = Test.Suite.run_main spec

View File

@ -3,11 +3,10 @@ import Standard.Base.System
from Standard.Base.Error.Problem_Behavior import all
import Standard.Table
from Standard.Table import Column, Data_Formatter, Data_Formatter_Data, Quote_Style, Column_Name_Mapping, Match_Columns
from Standard.Table import Column, Data_Formatter, Data_Formatter_Data, Quote_Style, Column_Name_Mapping, Match_Columns, Delimited
from Standard.Table.Errors import all
from Standard.Table.IO.File_Format import Delimited_Data
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name
from Standard.Table.Errors as Table_Errors import Column_Count_Mismatch_Data, Column_Name_Mismatch_Data
from Standard.Table.Data.Column_Selector import By_Name
from Standard.Table.Errors import Column_Count_Mismatch_Data, Column_Name_Mismatch_Data
import Standard.Test
import Standard.Test.Problems
@ -49,7 +48,7 @@ spec =
style=setting.first
separator=setting.second
file = (enso_project.data / "transient" / "endings.csv")
table.write file (Delimited_Data ',' line_endings=style) on_problems=Report_Error . should_succeed
table.write file (Delimited ',' line_endings=style) on_problems=Report_Error . should_succeed
text = File.read_text file
text.should_equal (lines.join separator suffix=separator)
file.delete
@ -68,7 +67,7 @@ spec =
table = Table.new [['The Column "Name"', ["foo","'bar'",'"baz"', 'one, two, three']], ["Hello, Column?", [1.0, 1000000.5, 2.2, -1.5]]]
file = (enso_project.data / "transient" / "quotes1.csv")
file.delete_if_exists
table.write file (Delimited_Data "," value_formatter=data_formatter) on_problems=Report_Error . should_succeed
table.write file (Delimited "," value_formatter=data_formatter) on_problems=Report_Error . should_succeed
expected_text = normalize_lines <| """
"The Column ""Name""","Hello, Column?"
foo,"1,0"
@ -84,7 +83,7 @@ spec =
table = Table.new [['"A"', ["foo",'!"baz" ', 'one, two, three', "a;b; c ", "a\b"]], ["B", [1000000.5, 1000.0, 0.0, -1.2, Nothing]]]
file = (enso_project.data / "transient" / "quotes2.csv")
file.delete_if_exists
table.write file (Delimited_Data ";" value_formatter=data_formatter . with_quotes quote='"' quote_escape='\\') on_problems=Report_Error . should_succeed
table.write file (Delimited ";" value_formatter=data_formatter . with_quotes quote='"' quote_escape='\\') on_problems=Report_Error . should_succeed
expected_text = normalize_lines <| """
"\"A\"";B
foo;1'000'000.5
@ -101,7 +100,7 @@ spec =
table = Table.new [['"A"', [Nothing,"The 'thing'.", 'one, "two", three', 'a\tb']], ["B\C", [1000000.5, 1000.0, Nothing, -1.2]]]
file = (enso_project.data / "transient" / "quotes3.csv")
file.delete_if_exists
table.write file (Delimited_Data '\t' value_formatter=data_formatter . with_quotes quote='\'' quote_escape='\'') on_problems=Report_Error . should_succeed
table.write file (Delimited '\t' value_formatter=data_formatter . with_quotes quote='\'' quote_escape='\'') on_problems=Report_Error . should_succeed
expected_text = normalize_lines <| '''
"A"\tB\\C
\t'1''000''000.5'
@ -136,7 +135,7 @@ spec =
text.should_equal expected_text
file.delete
format = Delimited_Data ',' . with_comments
format = Delimited ',' . with_comments
table.write file format on_problems=Report_Error . should_succeed
expected_text_2 = normalize_lines <| """
"#",B
@ -148,7 +147,7 @@ spec =
file.delete
Test.specify 'should not quote values if quoting is disabled' <|
format = Delimited_Data "," value_formatter=(Data_Formatter_Data decimal_point=",") . without_quotes
format = Delimited "," value_formatter=(Data_Formatter_Data decimal_point=",") . without_quotes
table = Table.new [['The Column "Name"', ["foo","'bar'",'"baz"', 'one, two, three']], ["Hello, Column?", [1.0, 1000000.5, 2.2, -1.5]]]
file = (enso_project.data / "transient" / "quote_disabled.csv")
file.delete_if_exists
@ -164,7 +163,7 @@ spec =
file.delete
Test.specify 'should allow to always quote text and custom values, but for non-text primitves only if absolutely necessary' <|
format = Delimited_Data "," value_formatter=(Data_Formatter_Data thousand_separator='"' date_formats=["E, d MMM y"]) . with_quotes always_quote=True quote_escape='\\'
format = Delimited "," value_formatter=(Data_Formatter_Data thousand_separator='"' date_formats=["E, d MMM y"]) . with_quotes always_quote=True quote_escape='\\'
table = Table.new [['The Column "Name"', ["foo","'bar'",'"baz"', 'one, two, three']], ["B", [1.0, 1000000.5, 2.2, -1.5]], ["C", ["foo", My_Type_Data 44, (Date.new 2022 06 21), 42]], ["D", [1,2,3,4000]], ["E", [Nothing, (Time_Of_Day.new 13 55), Nothing, Nothing]]]
file = (enso_project.data / "transient" / "quote_always.csv")
file.delete_if_exists
@ -183,7 +182,7 @@ spec =
table = Table.new [["ąęćś", [0]], ["ß", ["żółw 🐢"]]]
file = (enso_project.data / "transient" / "utf16.csv")
file.delete_if_exists
table.write file (Delimited_Data "," encoding=Encoding.utf_16_be) on_problems=Report_Error . should_succeed
table.write file (Delimited "," encoding=Encoding.utf_16_be) on_problems=Report_Error . should_succeed
expected_text = normalize_lines <| """
ąęćś,ß
0,żółw 🐢
@ -195,7 +194,7 @@ spec =
table = Table.new [["A", [0, 1]], ["B", ["słówka", "🐢"]]]
file = (enso_project.data / "transient" / "ascii.csv")
file.delete_if_exists
result = table.write file (Delimited_Data "," encoding=Encoding.ascii)
result = table.write file (Delimited "," encoding=Encoding.ascii)
expected_text = normalize_lines <| """
A,B
0,s??wka
@ -210,7 +209,7 @@ spec =
file.delete
Test.specify "should allow only text columns if no formatter is specified" <|
format = Delimited_Data "," value_formatter=Nothing
format = Delimited "," value_formatter=Nothing
table_1 = Table.new [["A", ["x", "y"]], ["B", ["z", "w"]]]
file_1 = (enso_project.data / "transient" / "textonly.csv")
file_1.delete_if_exists
@ -282,7 +281,7 @@ spec =
file = (enso_project.data / "transient" / "append_by_name_2.csv")
file.delete_if_exists
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
format = Delimited_Data "," . with_headers
format = Delimited "," . with_headers
appending_table.write file format on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
got_table = file.read format
expected_table = Table.new [["0", [1,2,Nothing,0]], ["B1", [1.0,1.5,33,44]], ["C", ["x","y","a","BB"]]]
@ -294,7 +293,7 @@ spec =
appending_table = Table.new [["B", [33,44]], ["A", [Nothing, 0]], ["C", ["a","BB"]]]
file = (enso_project.data / "transient" / "append_no_header.csv")
file.delete_if_exists
no_header_format = Delimited_Data "," . without_headers
no_header_format = Delimited "," . without_headers
existing_table.write file no_header_format on_existing_file=Existing_File_Behavior.Overwrite
appending_table.write file on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error_Data
file.delete
@ -304,7 +303,7 @@ spec =
appending_table = Table.new [["B", [33,44]], ["A", [Nothing, 0]], ["C", ["a","BB"]]]
file = (enso_project.data / "transient" / "append_no_header.csv")
file.delete_if_exists
no_header_format = Delimited_Data "," . without_headers
no_header_format = Delimited "," . without_headers
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite
appending_table.write file no_header_format on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error_Data
file.delete
@ -336,7 +335,7 @@ spec =
got_table.should_equal expected_table
file.delete
base_format = Delimited_Data ","
base_format = Delimited ","
no_headers = base_format . without_headers
with_headers = base_format . with_headers
@ -378,7 +377,7 @@ spec =
style=setting.first
separator=setting.second
file = (enso_project.data / "transient" / "endings.csv")
initial_table.write file (Delimited_Data ',' line_endings=style) on_problems=Report_Error . should_succeed
initial_table.write file (Delimited ',' line_endings=style) on_problems=Report_Error . should_succeed
table_to_append.write file on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
text = File.read_text file
text.should_equal (expected_lines.join separator suffix=separator)
@ -408,7 +407,7 @@ spec =
separator=setting.second
file.delete_if_exists
(initial_lines.join separator suffix=separator).write file
format = Delimited_Data ',' . with_comments
format = Delimited ',' . with_comments
table_to_append.write file format on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
text = File.read_text file
expected_text = expected_lines.join separator suffix=separator
@ -424,7 +423,7 @@ spec =
separator=setting.second
file.delete_if_exists
(initial_lines.join separator).write file
format = Delimited_Data ',' . with_comments
format = Delimited ',' . with_comments
table_to_append.write file format on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
text = File.read_text file
expected_text = expected_lines.join separator suffix=separator
@ -436,7 +435,7 @@ spec =
file = (enso_project.data / "transient" / "append_edge_cases.csv")
file.delete_if_exists
format = Delimited_Data ',' . without_headers
format = Delimited ',' . without_headers
# A long line but without a trailing newline
base_line = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-ABC"
@ -511,7 +510,7 @@ spec =
separator=setting.second
file.delete_if_exists
(initial_line+separator).write file
format = Delimited_Data ',' . with_comments
format = Delimited ',' . with_comments
table_to_append.write file format on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
text = File.read_text file
expected_text = expected_lines.join separator suffix=separator
@ -525,7 +524,7 @@ spec =
file = (enso_project.data / "transient" / "endings_comments_only.csv")
file.delete_if_exists
(join_lines initial_lines trailing_newline=False).write file
format = Delimited_Data ',' . with_comments
format = Delimited ',' . with_comments
table_to_append.write file format on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
text = File.read_text file
expected_text = join_lines expected_lines
@ -537,8 +536,8 @@ spec =
table_to_append = Table.new [["a", ["x", "y"]]]
file = (enso_project.data / "transient" / "endings_mismatch.csv")
file.delete_if_exists
initial_table.write file (Delimited_Data ',' line_endings=Line_Ending_Style.Mac_Legacy)
result = table_to_append.write file (Delimited_Data ',' line_endings=Line_Ending_Style.Unix) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position
initial_table.write file (Delimited ',' line_endings=Line_Ending_Style.Mac_Legacy)
result = table_to_append.write file (Delimited ',' line_endings=Line_Ending_Style.Unix) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position
result . should_fail_with Illegal_Argument_Error_Data
result.catch.message . should_equal "The explicitly provided line endings ('\n') do not match the line endings in the file ('\r')."
file.delete

View File

@ -4,11 +4,10 @@ from Standard.Base.System.File import File_Already_Exists_Error
from Standard.Base.Error.Problem_Behavior import all
import Standard.Table
from Standard.Table import File_Format, Match_Columns, Column_Name_Mapping, Excel_Range, Data_Formatter_Data
from Standard.Table import Match_Columns, Column_Name_Mapping, Excel, Excel_Range, Data_Formatter_Data, Sheet_Names, Range_Names, Worksheet, Cell_Range, Delimited
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Index
from Standard.Table.IO.Excel import Sheet_Names, Range_Names, Sheet, Cell_Range
from Standard.Table.Errors as Table_Errors import Invalid_Output_Column_Names_Data, Duplicate_Output_Column_Names_Data, Invalid_Location_Data, Range_Exceeded_Data, Existing_Data_Data, Column_Count_Mismatch_Data, Column_Name_Mismatch_Data
from Standard.Table.Data.Column_Selector import By_Index
from Standard.Table.Errors import Invalid_Output_Column_Names_Data, Duplicate_Output_Column_Names_Data, Invalid_Location_Data, Range_Exceeded_Data, Existing_Data_Data, Column_Count_Mismatch_Data, Column_Name_Mismatch_Data
import Standard.Test
import Standard.Test.Problems
@ -27,7 +26,7 @@ spec_fmt header file read_method =
t.at 'Price' . to_vector . should_equal [22.3, 32, 43.2, 54, 31, Nothing]
Test.specify "should read the specified sheet by index and properly format a table" <|
t = read_method file (File_Format.Excel_Data (Sheet 2) headers=False)
t = read_method file (Excel (Worksheet 2) headers=False)
t.columns.map .name . should_equal ['A', 'B', 'C', 'D', 'E']
t.at 'A' . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
t.at 'B' . to_vector . should_equal [Nothing, Nothing, 10, Nothing, Nothing, Nothing, Nothing]
@ -36,32 +35,32 @@ spec_fmt header file read_method =
t.at 'E' . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, 'foo', Nothing]
Test.specify "should read the specified sheet by name and properly handle dates" <|
t = read_method file (File_Format.Excel_Data (Sheet 'Dates'))
t = read_method file (Excel (Worksheet 'Dates'))
t.columns.map .name . should_equal ['Student Name', 'Enrolment Date']
t.at 'Enrolment Date' . map .day . to_vector . should_equal [2, 26, 4, 24, 31, 7]
Test.specify "should read an empty table" <|
t = read_method file (File_Format.Excel_Data (Sheet "Empty"))
t = read_method file (Excel (Worksheet "Empty"))
t.column_count.should_equal 0
Test.specify "should gracefully handle duplicate column names and formulas" <|
t = read_method file (File_Format.Excel_Data (Sheet "Duplicate Columns"))
t = read_method file (Excel (Worksheet "Duplicate Columns"))
t.columns.map .name . should_equal ['Item', 'Price', 'Quantity', 'Price_1']
t.at 'Price_1' . to_vector . should_equal [20, 40, 0, 60, 0, 10]
Test.specify "should allow reading with cell range specified" <|
t_1 = read_method file (File_Format.Excel_Data (Cell_Range "Simple!B:C"))
t_1 = read_method file (Excel (Cell_Range "Simple!B:C"))
t_1.columns.map .name . should_equal ['Quantity', 'Price']
t_1.at 'Quantity' . to_vector . should_equal [10, 20, Nothing, 30, Nothing, 5]
t_1.at 'Price' . to_vector . should_equal [22.3, 32, 43.2, 54, 31, Nothing]
t_2 = read_method file (File_Format.Excel_Data (Cell_Range "Simple!3:5") headers=False)
t_2 = read_method file (Excel (Cell_Range "Simple!3:5") headers=False)
t_2.column_count.should_equal 3
t_2.at 'A' . to_vector . should_equal ['t-shirt', 'trousers', 'shoes']
t_2.at 'B' . to_vector . should_equal [20, Nothing, 30]
t_2.at 'C' . to_vector . should_equal [32, 43.2, 54]
t_3 = read_method file (File_Format.Excel_Data (Cell_Range "Simple!B4:C5") headers=False)
t_3 = read_method file (Excel (Cell_Range "Simple!B4:C5") headers=False)
t_3.column_count.should_equal 2
t_3.at 'B' . to_vector . should_equal [Nothing, 30]
t_3.at 'C' . to_vector . should_equal [43.2, 54]
@ -72,7 +71,7 @@ spec_write suffix test_sheet_name =
for these tests. This should ideally be re-enabled with the
completion of the following story:
https://www.pivotaltracker.com/story/show/181755990
no_dates = File_Format.Delimited_Data "," value_formatter=(Data_Formatter_Data date_formats=[] time_formats=[] datetime_formats=[])
no_dates = Delimited "," value_formatter=(Data_Formatter_Data date_formats=[] time_formats=[] datetime_formats=[])
out = enso_project.data / ('out.' + suffix)
out_bak = enso_project.data / ('out.' + suffix + '.bak')
table = enso_project.data/'varied_column.csv' . read (format = no_dates)
@ -96,77 +95,77 @@ spec_write suffix test_sheet_name =
Test.specify 'should write a table to existing file in overwrite mode as a new sheet with headers' <|
out.delete_if_exists
(enso_project.data / test_sheet_name) . copy_to out
table.write out (File_Format.Excel_Data (Sheet "Another")) on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Sheet "Another"))
table.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another"))
written.should_equal table
out.delete_if_exists
Test.specify 'should write a table to existing file in overwrite mode as a new sheet without headers' <|
out.delete_if_exists
(enso_project.data / test_sheet_name) . copy_to out
table.write out (File_Format.Excel_Data (Sheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Sheet "NoHeaders"))
table.write out (Excel (Worksheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "NoHeaders"))
written.should_equal (table.rename_columns (Column_Name_Mapping.By_Position ['A', 'B', 'C', 'D', 'E', 'F']))
out.delete_if_exists
Test.specify 'should create new sheets at the start if index is 0' <|
out.delete_if_exists
table.write out (File_Format.Excel_Data (Sheet 0)) on_problems=Report_Error . should_succeed
clothes.write out (File_Format.Excel_Data (Sheet 0)) on_problems=Report_Error . should_succeed
read_1 = out.read (File_Format.Excel_Data (Sheet "Sheet1"))
table.write out (Excel (Worksheet 0)) on_problems=Report_Error . should_succeed
clothes.write out (Excel (Worksheet 0)) on_problems=Report_Error . should_succeed
read_1 = out.read (Excel (Worksheet "Sheet1"))
read_1 . should_equal table
read_2 = out.read (File_Format.Excel_Data (Sheet "Sheet2"))
read_2 = out.read (Excel (Worksheet "Sheet2"))
read_2 . should_equal clothes
read_3 = out.read (File_Format.Excel_Data (Sheet_Names))
read_3 = out.read (Excel (Sheet_Names))
read_3 . should_equal ["Sheet2", "Sheet1"]
out.delete_if_exists
Test.specify 'should write a table to specific single cell location of an existing sheet' <|
out.delete_if_exists
(enso_project.data / test_sheet_name) . copy_to out
table.write out (File_Format.Excel_Data (Cell_Range "Another!G1")) on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Cell_Range "Another!G1"))
table.write out (Excel (Cell_Range "Another!G1")) on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Another!G1"))
written.should_equal table
out.delete_if_exists
Test.specify 'should clear out an existing fixed range and replace' <|
out.delete_if_exists
(enso_project.data / test_sheet_name) . copy_to out
sub_clothes.write out (File_Format.Excel_Data (Cell_Range "Another!A1:D20")) on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Cell_Range "Another!A1"))
sub_clothes.write out (Excel (Cell_Range "Another!A1:D20")) on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Another!A1"))
written.should_equal sub_clothes
out.delete_if_exists
Test.specify 'should clear out an existing range and replace' <|
out.delete_if_exists
(enso_project.data / test_sheet_name) . copy_to out
sub_clothes.write out (File_Format.Excel_Data (Cell_Range "Another!A1")) on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Cell_Range "Another!A1"))
sub_clothes.write out (Excel (Cell_Range "Another!A1")) on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Another!A1"))
written.should_equal sub_clothes
out.delete_if_exists
Test.specify 'should result in Invalid_Location error if trying to write in a bad location' <|
out.delete_if_exists
(enso_project.data / test_sheet_name) . copy_to out
sub_clothes.write out (File_Format.Excel_Data (Cell_Range "DoesNotExist!A1")) . should_fail_with Invalid_Location_Data
sub_clothes.write out (File_Format.Excel_Data (Cell_Range "DoesNotExist!A1:B2")) . should_fail_with Invalid_Location_Data
sub_clothes.write out (File_Format.Excel_Data (Cell_Range "SillyRangeName")) . should_fail_with Invalid_Location_Data
sub_clothes.write out (Excel (Cell_Range "DoesNotExist!A1")) . should_fail_with Invalid_Location_Data
sub_clothes.write out (Excel (Cell_Range "DoesNotExist!A1:B2")) . should_fail_with Invalid_Location_Data
sub_clothes.write out (Excel (Cell_Range "SillyRangeName")) . should_fail_with Invalid_Location_Data
out.delete_if_exists
Test.specify 'should result in Range_Exceeded error if trying to write in too small a range' <|
out.delete_if_exists
(enso_project.data / test_sheet_name) . copy_to out
sub_clothes.write out (File_Format.Excel_Data (Cell_Range "Another!A1:B2")) . should_fail_with Range_Exceeded_Data
sub_clothes.write out (Excel (Cell_Range "Another!A1:B2")) . should_fail_with Range_Exceeded_Data
out.delete_if_exists
Test.specify 'should result in Existing_Data error if in Error mode and trying to replace' <|
out.delete_if_exists
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
sub_clothes.write out (File_Format.Excel_Data (Sheet 1)) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data_Data
sub_clothes.write out (File_Format.Excel_Data (Sheet "Another")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data_Data
sub_clothes.write out (File_Format.Excel_Data (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data_Data
sub_clothes.write out (File_Format.Excel_Data (Cell_Range "Sheet1!A9")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data_Data
sub_clothes.write out (Excel (Worksheet 1)) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data_Data
sub_clothes.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data_Data
sub_clothes.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data_Data
sub_clothes.write out (Excel (Cell_Range "Sheet1!A9")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data_Data
out.last_modified_time.should_equal lmd
out.delete_if_exists
@ -174,13 +173,13 @@ spec_write suffix test_sheet_name =
out.delete_if_exists
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
sub_clothes.write out (File_Format.Excel_Data (Sheet "Testing")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Already_Exists_Error
sub_clothes.write out (Excel (Worksheet "Testing")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Already_Exists_Error
out.last_modified_time.should_equal lmd
out.delete_if_exists
Test.specify 'should write a table to non-existent file as a new sheet without headers' <|
out.delete_if_exists
table.write out (File_Format.Excel_Data (Sheet "Sheet1") headers=False) on_problems=Report_Error . should_succeed
table.write out (Excel (Worksheet "Sheet1") headers=False) on_problems=Report_Error . should_succeed
written = out.read
written.should_equal (table.rename_columns (Column_Name_Mapping.By_Position ['A', 'B', 'C', 'D', 'E', 'F']))
out.delete_if_exists
@ -190,8 +189,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Sheet "Another")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -200,8 +199,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Sheet "Another")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -210,8 +209,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Sheet "Another")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -220,8 +219,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Sheet "Another")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -230,8 +229,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Sheet "Another")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -240,8 +239,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Sheet "Another")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -250,8 +249,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['AA', ['d', 'e']], ['BB', [4, 5]], ['CC', [True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a', 'b', 'c', 'd', 'e']], ['BB', [1, 2, 3, 4, 5]], ['CC', [True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Sheet "Another")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -260,8 +259,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Sheet "Another")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -270,8 +269,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Cell_Range "Random!K9")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Cell_Range "Random!K9")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Cell_Range "Random!K9")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Random!K9")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -280,8 +279,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['AA_1',[True, False]], ['BB_1', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['AA_1',[True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Cell_Range "Random!S3")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Cell_Range "Random!S3")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Cell_Range "Random!S3")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Random!S3")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -290,8 +289,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Cell_Range "Random!K9")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Cell_Range "Random!K9")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Cell_Range "Random!K9")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Random!K9")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -300,8 +299,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (File_Format.Excel_Data (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (File_Format.Excel_Data (Sheet "Another")) . select_columns (By_Index [0, 1, 2])
extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns (By_Index [0, 1, 2])
written.should_equal expected
out.delete_if_exists
@ -309,7 +308,7 @@ spec_write suffix test_sheet_name =
out.delete_if_exists
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']]]
extra_another.write out (File_Format.Excel_Data (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch_Data
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch_Data
out.delete_if_exists
Test.specify 'should fail to append to a sheet by name if extra columns' <|
@ -317,7 +316,7 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]]
extra_another.write out (File_Format.Excel_Data (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch_Data
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch_Data
out.last_modified_time.should_equal lmd
out.delete_if_exists
@ -326,8 +325,8 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]]
extra_another.write out (File_Format.Excel_Data (Sheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error_Data
extra_another.write out (File_Format.Excel_Data (Sheet "Another") False) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error_Data
extra_another.write out (Excel (Worksheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error_Data
extra_another.write out (Excel (Worksheet "Another") False) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error_Data
out.last_modified_time.should_equal lmd
out.delete_if_exists
@ -336,7 +335,7 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']]]
extra_another.write out (File_Format.Excel_Data (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch_Data
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch_Data
out.last_modified_time.should_equal lmd
out.delete_if_exists
@ -345,7 +344,7 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]]
extra_another.write out (File_Format.Excel_Data (Sheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch_Data
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch_Data
out.last_modified_time.should_equal lmd
out.delete_if_exists
@ -354,7 +353,7 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
extra_another.write out (File_Format.Excel_Data (Cell_Range "Another!A1:D5")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Range_Exceeded_Data
extra_another.write out (Excel (Cell_Range "Another!A1:D5")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Range_Exceeded_Data
out.last_modified_time.should_equal lmd
out.delete_if_exists
@ -363,7 +362,7 @@ spec_write suffix test_sheet_name =
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
extra_another.write out (File_Format.Excel_Data (Cell_Range "Random!B3")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Existing_Data_Data
extra_another.write out (Excel (Cell_Range "Random!B3")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Existing_Data_Data
out.last_modified_time.should_equal lmd
out.delete_if_exists
@ -485,7 +484,7 @@ spec =
# check_column (table.at "C") col_c
Test.group "Read XLSX / XLS Files" <|
Test.specify "should let you read the first sheet with File_Format.Auto" <|
Test.specify "should let you read the first sheet with Auto_Detect" <|
check_table <| xlsx_sheet.read
check_table <| File.read xlsx_sheet
check_table <| File.read xlsx_path
@ -493,90 +492,90 @@ spec =
check_table <| File.read xls_sheet
check_table <| File.read xls_path
Test.specify "should let you read the first sheet with File_Format.Excel_Data" <|
check_table <| xlsx_sheet.read File_Format.Excel_Data
check_table <| File.read xlsx_sheet File_Format.Excel_Data
check_table <| File.read xlsx_path File_Format.Excel_Data
check_table <| xls_sheet.read File_Format.Excel_Data
check_table <| File.read xls_sheet File_Format.Excel_Data
check_table <| File.read xls_path File_Format.Excel_Data
Test.specify "should let you read the first sheet with Excel" <|
check_table <| xlsx_sheet.read Excel
check_table <| File.read xlsx_sheet Excel
check_table <| File.read xlsx_path Excel
check_table <| xls_sheet.read Excel
check_table <| File.read xls_sheet Excel
check_table <| File.read xls_path Excel
Test.specify "should let you read the sheet names" <|
sheet_names = ["Sheet1", "Another", "NoHeaders", "Random"]
xlsx_sheet.read (File_Format.Excel_Data Sheet_Names) . should_equal sheet_names
xls_sheet.read (File_Format.Excel_Data Sheet_Names) . should_equal sheet_names
xlsx_sheet.read (Excel Sheet_Names) . should_equal sheet_names
xls_sheet.read (Excel Sheet_Names) . should_equal sheet_names
Test.specify "should let you read the range names" <|
range_names = ["myData"]
xlsx_sheet.read (File_Format.Excel_Data Range_Names) . should_equal range_names
xls_sheet.read (File_Format.Excel_Data Range_Names) . should_equal range_names
xlsx_sheet.read (Excel Range_Names) . should_equal range_names
xls_sheet.read (Excel Range_Names) . should_equal range_names
Test.specify "should let you read by sheet index" <|
table = xlsx_sheet.read (File_Format.Excel_Data (Sheet 1))
table = xlsx_sheet.read (Excel (Worksheet 1))
check_table table
table_2 = xlsx_sheet.read (File_Format.Excel_Data (Sheet 1 (table.row_count - col_a.length)))
table_2 = xlsx_sheet.read (Excel (Worksheet 1 (table.row_count - col_a.length)))
table_2.row_count . should_equal col_a.length
check_table <| table_2
Test.specify "should let you read by sheet name" <|
table = xlsx_sheet.read (File_Format.Excel_Data (Sheet "Sheet1"))
table = xlsx_sheet.read (Excel (Worksheet "Sheet1"))
check_table table
table_2 = xlsx_sheet.read (File_Format.Excel_Data (Sheet "Sheet1" (table.row_count - col_a.length)))
table_2 = xlsx_sheet.read (Excel (Worksheet "Sheet1" (table.row_count - col_a.length)))
table_2.row_count . should_equal col_a.length
check_table <| table_2
Test.specify "should let you read XLS by sheet index" <|
table = xls_sheet.read (File_Format.Excel_Data (Sheet 1))
table = xls_sheet.read (Excel (Worksheet 1))
check_table table
table_2 = xls_sheet.read (File_Format.Excel_Data (Sheet 1 (table.row_count - col_a.length)))
table_2 = xls_sheet.read (Excel (Worksheet 1 (table.row_count - col_a.length)))
table_2.row_count . should_equal col_a.length
check_table <| table_2
Test.specify "should let you read XLS by sheet name" <|
table = xls_sheet.read (File_Format.Excel_Data (Sheet "Sheet1"))
table = xls_sheet.read (Excel (Worksheet "Sheet1"))
check_table table
Test.specify "should let you read by range" <|
table = xlsx_sheet.read (File_Format.Excel_Data (Cell_Range "Sheet1!A:C"))
table = xlsx_sheet.read (Excel (Cell_Range "Sheet1!A:C"))
check_table table
table_2 = xlsx_sheet.read (File_Format.Excel_Data (Cell_Range "Sheet1!A:C" (table.row_count - col_a.length)))
table_2 = xlsx_sheet.read (Excel (Cell_Range "Sheet1!A:C" (table.row_count - col_a.length)))
table_2.row_count . should_equal col_a.length
check_table <| table_2
check_table <| xlsx_sheet.read (File_Format.Excel_Data (Cell_Range "Sheet1!10:13"))
check_table <| xlsx_sheet.read (File_Format.Excel_Data (Cell_Range "Sheet1!A10:C13"))
check_table <| xlsx_sheet.read (Excel (Cell_Range "Sheet1!10:13"))
check_table <| xlsx_sheet.read (Excel (Cell_Range "Sheet1!A10:C13"))
Test.specify "should let you read by range name" <|
table = xlsx_sheet.read (File_Format.Excel_Data (Cell_Range "myData"))
table = xlsx_sheet.read (Excel (Cell_Range "myData"))
table.row_count . should_equal col_a.length
check_table <| table
Test.specify "should let you restrict number of rows read and skip rows" <|
table = xlsx_sheet.read (File_Format.Excel_Data (Sheet "Sheet1"))
table = xlsx_sheet.read (Excel (Worksheet "Sheet1"))
check_table table
table_2 = xlsx_sheet.read (File_Format.Excel_Data (Sheet "Sheet1" (table.row_count - col_a.length)))
table_2 = xlsx_sheet.read (Excel (Worksheet "Sheet1" (table.row_count - col_a.length)))
table_2.row_count . should_equal col_a.length
check_table <| table_2
table_3 = xlsx_sheet.read (File_Format.Excel_Data (Sheet "Sheet1" (table.row_count - col_a.length) 2))
table_3 = xlsx_sheet.read (Excel (Worksheet "Sheet1" (table.row_count - col_a.length) 2))
table_3.row_count . should_equal 2
table_4 = xlsx_sheet.read (File_Format.Excel_Data (Sheet "Sheet1" row_limit=6))
table_4 = xlsx_sheet.read (Excel (Worksheet "Sheet1" row_limit=6))
table_4.row_count . should_equal 6
Test.group "Problems" <|
Test.specify "should handle non-existing file gracefully" <|
bad_file = enso_project.data / "DoesNotExists.xlsx"
bad_file.read (File_Format.Excel_Data (Cell_Range "Sheet1!A:C")) . should_fail_with File.File_Not_Found
bad_file.read (Excel (Cell_Range "Sheet1!A:C")) . should_fail_with File.File_Not_Found
Test.specify "should handle wrong xls_format gracefully" <|
xlsx_sheet.read (File_Format.Excel_Data (Cell_Range "Sheet1!A:C") xls_format=True) . should_fail_with File.IO_Error
xls_sheet.read (File_Format.Excel_Data (Cell_Range "Sheet1!A:C") xls_format=False) . should_fail_with File.IO_Error
xlsx_sheet.read (Excel (Cell_Range "Sheet1!A:C") xls_format=True) . should_fail_with File.IO_Error
xls_sheet.read (Excel (Cell_Range "Sheet1!A:C") xls_format=False) . should_fail_with File.IO_Error
spec_fmt 'XLSX reading' Examples.xlsx .read
@ -592,40 +591,40 @@ spec =
table.at (col_names.at idx) . to_vector . should_equal values
Test.specify "Simple table" <|
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!A1"))) ["AA", "BB"] [[1,2,3,4,5,6], ["A","B","C","D","E","F"]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!A2"))) ["A", "B"] [[1,2,3,4,5,6], ["A","B","C","D","E","F"]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!A1:A1"))) ["A"] [["AA"]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!B1"))) ["B"] [["BB", "A","B","C","D","E","F"]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!B1") headers=True)) ["BB"] [["A","B","C","D","E","F"]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!B2"))) ["B"] [["A","B","C","D","E","F"]]
check_table (file.read (Excel (Cell_Range "Sheet1!A1"))) ["AA", "BB"] [[1,2,3,4,5,6], ["A","B","C","D","E","F"]]
check_table (file.read (Excel (Cell_Range "Sheet1!A2"))) ["A", "B"] [[1,2,3,4,5,6], ["A","B","C","D","E","F"]]
check_table (file.read (Excel (Cell_Range "Sheet1!A1:A1"))) ["A"] [["AA"]]
check_table (file.read (Excel (Cell_Range "Sheet1!B1"))) ["B"] [["BB", "A","B","C","D","E","F"]]
check_table (file.read (Excel (Cell_Range "Sheet1!B1") headers=True)) ["BB"] [["A","B","C","D","E","F"]]
check_table (file.read (Excel (Cell_Range "Sheet1!B2"))) ["B"] [["A","B","C","D","E","F"]]
Test.specify "Patchy table" <|
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!D1"))) ["A", "B", "Column_1"] [[1,2,4], [4,4,Nothing], [6,Nothing,6]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!D2"))) ["D", "E", "F"] [[1,2,4], [4,4,Nothing], [6,Nothing,6]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!E"))) ["B"] [[4,4,Nothing,Nothing,Nothing,Nothing]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!E1"))) ["B", "Column_1"] [[4,4,Nothing], [6,Nothing,6]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!E2"))) ["E", "F"] [[4,4,Nothing], [6,Nothing,6]]
check_table (file.read (Excel (Cell_Range "Sheet1!D1"))) ["A", "B", "Column_1"] [[1,2,4], [4,4,Nothing], [6,Nothing,6]]
check_table (file.read (Excel (Cell_Range "Sheet1!D2"))) ["D", "E", "F"] [[1,2,4], [4,4,Nothing], [6,Nothing,6]]
check_table (file.read (Excel (Cell_Range "Sheet1!E"))) ["B"] [[4,4,Nothing,Nothing,Nothing,Nothing]]
check_table (file.read (Excel (Cell_Range "Sheet1!E1"))) ["B", "Column_1"] [[4,4,Nothing], [6,Nothing,6]]
check_table (file.read (Excel (Cell_Range "Sheet1!E2"))) ["E", "F"] [[4,4,Nothing], [6,Nothing,6]]
Test.specify "Single cell" <|
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!H1"))) ["H"] [["Single Cell"]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!H2"))) ["H"] [[]]
check_table (file.read (Excel (Cell_Range "Sheet1!H1"))) ["H"] [["Single Cell"]]
check_table (file.read (Excel (Cell_Range "Sheet1!H2"))) ["H"] [[]]
Test.specify "Single line" <|
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!J1"))) ["J", "K", "L"] [["Just"],["Some"],["Headers"]]
check_table (file.read (Excel (Cell_Range "Sheet1!J1"))) ["J", "K", "L"] [["Just"],["Some"],["Headers"]]
Test.specify "Growing table" <|
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!N1"))) ["A", "Full", "Table", "Column_1"] [["Hello","World",Nothing,"Extend"],[1,Nothing,"Gap",3],[2,2,"Here",5],[Nothing,Nothing,"To","Hello"]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!O1"))) ["Full", "Table", "Column_1"] [[1,Nothing,"Gap",3],[2,2,"Here",5],[Nothing,Nothing,"To","Hello"]]
check_table (file.read (File_Format.Excel_Data (Cell_Range "Sheet1!O2"))) ["O", "P", "Q"] [[1,Nothing,"Gap",3],[2,2,"Here",5],[Nothing,Nothing,"To","Hello"]]
check_table (file.read (Excel (Cell_Range "Sheet1!N1"))) ["A", "Full", "Table", "Column_1"] [["Hello","World",Nothing,"Extend"],[1,Nothing,"Gap",3],[2,2,"Here",5],[Nothing,Nothing,"To","Hello"]]
check_table (file.read (Excel (Cell_Range "Sheet1!O1"))) ["Full", "Table", "Column_1"] [[1,Nothing,"Gap",3],[2,2,"Here",5],[Nothing,Nothing,"To","Hello"]]
check_table (file.read (Excel (Cell_Range "Sheet1!O2"))) ["O", "P", "Q"] [[1,Nothing,"Gap",3],[2,2,"Here",5],[Nothing,Nothing,"To","Hello"]]
Test.specify "Should handle invalid headers with warnings" <|
action = file.read (File_Format.Excel_Data (Cell_Range "Sheet1!D1")) on_problems=_
action = file.read (Excel (Cell_Range "Sheet1!D1")) on_problems=_
tester = check_table _ ["A", "B", "Column_1"] [[1,2,4], [4,4,Nothing], [6,Nothing,6]]
problems = [Invalid_Output_Column_Names_Data [""]]
Problems.test_problem_handling action problems tester
Test.specify "Should handle duplicate headers with warnings" <|
action = file.read (File_Format.Excel_Data (Cell_Range "Sheet1!S1")) on_problems=_
action = file.read (Excel (Cell_Range "Sheet1!S1")) on_problems=_
tester = check_table _ ["DD", "DD_1"] [[1,3], [2,4]]
problems = [Duplicate_Output_Column_Names_Data ["DD"]]
Problems.test_problem_handling action problems tester

View File

@ -1,9 +1,7 @@
from Standard.Base import all
from Standard.Base.System.File_Format import Unsupported_File_Type_Data
import Standard.Table.IO.File_Read
import Standard.Table.IO.File_Format
from Standard.Table.Errors import Unsupported_File_Type_Data
import Standard.Test
import Standard.Test.Problems
@ -12,20 +10,7 @@ spec =
sample_txt = enso_project.data / "sample.txt"
windows_log = enso_project.data / "windows.log"
Test.group "File_Format.Auto materialise" <|
Test.specify "should be Bytes for unknown file" <|
File_Format.Auto . materialise sample_xxx . should_fail_with Unsupported_File_Type_Data
Test.specify "should be Text for text file" <|
File_Format.Auto . materialise sample_txt . should_be_a File_Format.Plain_Text_Data
Test.specify "should be Text for log file" <|
File_Format.Auto . materialise windows_log . should_be_a File_Format.Plain_Text_Data
Test.specify "should detect CSV files" <|
File_Format.Auto . materialise (enso_project.data / "data.csv") . should_equal (File_Format.Delimited_Data ",")
Test.group "File_Format.Auto" <|
Test.group "Auto_Detect" <|
Test.specify "should raise an error when reading an unknown file" <|
bytes = sample_xxx.read
bytes.should_fail_with Unsupported_File_Type_Data
@ -34,31 +19,31 @@ spec =
content = sample_txt.read
content.should_equal "Hello World!"
Test.group "File_Format.Bytes" <|
Test.group "Bytes" <|
Test.specify "should be able to read a file as Bytes" <|
bytes = sample_xxx.read File_Format.Bytes
bytes = sample_xxx.read Bytes
bytes.should_equal [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33]
Test.specify "should be able to read a file as Bytes by method" <|
bytes = File.read (sample_xxx) File_Format.Bytes
bytes = File.read (sample_xxx) Bytes
bytes.should_equal [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33]
Test.specify "should be able to read a file as Bytes by path" <|
path = sample_xxx.path
bytes = File.read path File_Format.Bytes
bytes = File.read path Bytes
bytes.should_equal [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33]
Test.group "File_Format.Plain_Text" <|
Test.group "Plain_Text" <|
Test.specify "should be able to read a file as Text" <|
text = sample_xxx.read File_Format.Plain_Text_Data
text = sample_xxx.read Plain_Text
text.should_equal "Hello World!"
Test.specify "should be able to read a file as Text with Encoding" <|
text = windows_log.read (File_Format.Plain_Text_Data Encoding.windows_1252)
text = windows_log.read (Plain_Text Encoding.windows_1252)
text.should_equal "Hello World! $¢¤¥"
Test.specify "should raise a warning when invalid encoding in a Text file" <|
action = windows_log.read (File_Format.Plain_Text_Data Encoding.ascii) on_problems=_
action = windows_log.read (Plain_Text Encoding.ascii) on_problems=_
tester result = result . should_equal 'Hello World! $\uFFFD\uFFFD\uFFFD'
problems = [Encoding_Error_Data "Encoding issues at 14, 15, 16."]
Problems.test_problem_handling action problems tester

View File

@ -1,7 +1,7 @@
from Standard.Base import all
import Standard.Table
from Standard.Table import Column, File_Format, Data_Formatter_Data
from Standard.Table import Column, Delimited, Data_Formatter_Data
import Standard.Table.Data.Storage
import Standard.Test
@ -40,14 +40,14 @@ spec =
Test.specify "should serialise back to input" <|
expected_text = normalize_lines <|
(enso_project.data / "prime_ministers.csv").read_text
delimited = Text.from expected format=(File_Format.Delimited_Data "," line_endings=Line_Ending_Style.Unix)
delimited = Text.from expected format=(Delimited "," line_endings=Line_Ending_Style.Unix)
delimited.should_equal expected_text
Test.specify "should serialise dates with format" <|
test_table = Table.new [c_from]
expected_text = 'From\n04.05.1979\n28.11.1990\n02.05.1997\n27.06.2007\n11.05.2010\n13.07.2016\n24.07.2019\n'
data_formatter = Data_Formatter_Data . with_datetime_formats date_formats=["dd.MM.yyyy"]
delimited = Text.from test_table format=(File_Format.Delimited_Data "," value_formatter=data_formatter line_endings=Line_Ending_Style.Unix)
delimited = Text.from test_table format=(Delimited "," value_formatter=data_formatter line_endings=Line_Ending_Style.Unix)
delimited.should_equal expected_text
main = Test.Suite.run_main spec

View File

@ -2,12 +2,11 @@ from Standard.Base import all
from Standard.Base.Error.Problem_Behavior import Report_Error
import Standard.Table
from Standard.Table import Column, Sort_Column, Sort_Column_Selector
from Standard.Table.Data.Table import Empty_Error
from Standard.Table.Errors as Table_Errors import Invalid_Output_Column_Names_Data, Duplicate_Output_Column_Names_Data, No_Input_Columns_Selected, Missing_Input_Columns_Data
import Standard.Table.Data.Storage
import Standard.Table.Data.Aggregate_Column
from Standard.Table import Column, Sort_Column, Sort_Column_Selector, Aggregate_Column
from Standard.Table.Data.Aggregate_Column import all hiding First, Last
from Standard.Table.Data.Table import Empty_Error
from Standard.Table.Errors import Invalid_Output_Column_Names_Data, Duplicate_Output_Column_Names_Data, No_Input_Columns_Selected, Missing_Input_Columns_Data
import Standard.Table.Data.Storage
from Standard.Table.Data.Column_Selector import By_Name
from Standard.Table.Errors import Floating_Point_Grouping_Data

View File

@ -1,10 +1,8 @@
from Standard.Base import all
import Standard.Table
import Standard.Table.Data.Column
from Standard.Table import Delimited, Column, Data_Formatter
import Standard.Table.Data.Storage
import Standard.Table.IO.File_Format
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
import Standard.Test
@ -40,14 +38,14 @@ spec =
Test.specify "should serialise back to input" <|
expected_text = normalize_lines <|
(enso_project.data / "time_of_day_sample.csv").read_text
delimited = Text.from expected format=(File_Format.Delimited "," line_endings=Line_Ending_Style.Unix)
delimited = Text.from expected format=(Delimited "," line_endings=Line_Ending_Style.Unix)
delimited.should_equal expected_text
Test.specify "should serialise dates with format" <|
test_table = Table.new [c_time]
expected_text = 'Posting time\n09-00-00\n14-00-12\n09-00-00\n17-30-00\n09-00-04\n15-30-00\n'
data_formatter = Data_Formatter . with_datetime_formats time_formats=["HH-mm-ss"]
delimited = Text.from test_table format=(File_Format.Delimited "," value_formatter=data_formatter line_endings=Line_Ending_Style.Unix)
delimited = Text.from test_table format=(Delimited "," value_formatter=data_formatter line_endings=Line_Ending_Style.Unix)
delimited.should_equal expected_text
main = Test.Suite.run_main spec

View File

@ -1,10 +1,8 @@
from Standard.Base import all
import Standard.Table
import Standard.Table.Data.Column
from Standard.Table import Delimited, Column, Data_Formatter
import Standard.Table.Data.Storage
import Standard.Table.IO.File_Format
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
import Standard.Test
@ -39,14 +37,14 @@ spec =
Test.specify "should serialise back to input" <|
expected_text = normalize_lines <|
(enso_project.data / "datetime_sample.csv").read_text
delimited = Text.from expected format=(File_Format.Delimited "," line_endings=Line_Ending_Style.Unix)
delimited = Text.from expected format=(Delimited "," line_endings=Line_Ending_Style.Unix)
delimited.should_equal expected_text
Test.specify "should serialise dates with format" <|
test_table = Table.new [c_date]
expected_text = 'Posting date\n05.01.2015 09-00\n05.01.2015 14-00\n06.01.2015 09-00\n07.01.2015 17-30\n05.01.2011 09-00\n09.01.2011 15-30\n'
data_formatter = Data_Formatter . with_datetime_formats datetime_formats=["dd.MM.yyyy HH-mm"]
delimited = Text.from test_table format=(File_Format.Delimited "," value_formatter=data_formatter line_endings=Line_Ending_Style.Unix)
delimited = Text.from test_table format=(Delimited "," value_formatter=data_formatter line_endings=Line_Ending_Style.Unix)
delimited.should_equal expected_text
main = Test.Suite.run_main spec

View File

@ -4,7 +4,7 @@ from Standard.Base.System.File import File_Already_Exists_Error
from Standard.Base.Error.Problem_Behavior import all
import Standard.Base.System.Platform
import Standard.Base.System.Process
from Standard.Base.System.File.File_Permissions as File_Permissions_Module import all
from Standard.Base.System.File.File_Permissions import all
from Standard.Base.System.Process.Exit_Code import Exit_Success
import Standard.Test

View File

@ -1,6 +1,6 @@
from Standard.Base import all
import Standard.Table.Data.Table
from Standard.Table import Table
import Standard.Visualization.Geo_Map

View File

@ -1,6 +1,6 @@
from Standard.Base import all
import Standard.Table.Data.Column
from Standard.Table import Column
import Standard.Test

View File

@ -1,6 +1,6 @@
from Standard.Base import all
import Standard.Table.Data.Table
from Standard.Table import Table
import Standard.Visualization.Helpers

View File

@ -1,7 +1,6 @@
from Standard.Base import all
import Standard.Table.Data.Column
import Standard.Table.Data.Table
from Standard.Table import Table, Column
import Standard.Visualization.Histogram

View File

@ -1,7 +1,6 @@
from Standard.Base import all
import Standard.Table.Data.Column
import Standard.Table.Data.Table
from Standard.Table import Table, Column
import Standard.Visualization.Scatter_Plot

View File

@ -1,7 +1,6 @@
from Standard.Base import all
import Standard.Table.Data.Table as Dataframe_Table
from Standard.Table.Data.Aggregate_Column import Group_By, Average
from Standard.Table import Table, Aggregate_Column
from Standard.Database import all
import Standard.Database.Data.Table as Database_Table
@ -19,7 +18,7 @@ type Foo
to_json self = Json.from_pairs [["x", self.x]]
visualization_spec connection =
t = connection.upload_table "T" <| Dataframe_Table.new [["A", ['a', 'a', 'a']], ["B", [2, 2, 3]], ["C", [3, 5, 6]]]
t = connection.upload_table "T" <| Table.new [["A", ['a', 'a', 'a']], ["B", [2, 2, 3]], ["C", [3, 5, 6]]]
make_json header data all_rows ixes_header ixes =
p_header = ["header", header]
@ -52,12 +51,12 @@ visualization_spec connection =
json = make_json header=["A"] data=[['a', 'a']] all_rows=3 ixes_header=[] ixes=[]
vis . should_equal json
g = t.aggregate [Group_By "A", Group_By "B", Average "C"] . at "Average C"
g = t.aggregate [Aggregate_Column.Group_By "A", Aggregate_Column.Group_By "B", Aggregate_Column.Average "C"] . at "Average C"
vis2 = Visualization.prepare_visualization g 1
json2 = make_json header=["Average C"] data=[[4.0]] all_rows=2 ixes_header=[] ixes=[]
vis2 . should_equal json2
t2 = Dataframe_Table.new [["A", [1, 2, 3]], ["B", [4, 5, 6]], ["C", [7, 8, 9]]]
t2 = Table.new [["A", [1, 2, 3]], ["B", [4, 5, 6]], ["C", [7, 8, 9]]]
Test.specify "should visualize dataframe tables" <|
vis = Visualization.prepare_visualization t2 1
json = make_json header=["A", "B", "C"] data=[[1], [4], [7]] all_rows=3 ixes_header=[""] ixes=[[0]]