Basic changes to File_Format (#3516)

Implements https://www.pivotaltracker.com/story/show/182308987
This commit is contained in:
Radosław Waśko 2022-06-08 11:53:18 +02:00 committed by GitHub
parent b1db359f19
commit 2af970fe52
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 169 additions and 81 deletions

View File

@ -134,6 +134,8 @@
- [Added rank data, correlation and covariance statistics for `Vector`][3484] - [Added rank data, correlation and covariance statistics for `Vector`][3484]
- [Implemented `Table.order_by` for the SQLite backend.][3502] - [Implemented `Table.order_by` for the SQLite backend.][3502]
- [Implemented `Table.order_by` for the PostgreSQL backend.][3514] - [Implemented `Table.order_by` for the PostgreSQL backend.][3514]
- [Renamed `File_Format.Text` to `Plain_Text`, updated `File_Format.Delimited`
API and added builders for customizing less common settings.][3516]
[debug-shortcuts]: [debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -210,6 +212,7 @@
[3484]: https://github.com/enso-org/enso/pull/3484 [3484]: https://github.com/enso-org/enso/pull/3484
[3502]: https://github.com/enso-org/enso/pull/3502 [3502]: https://github.com/enso-org/enso/pull/3502
[3514]: https://github.com/enso-org/enso/pull/3514 [3514]: https://github.com/enso-org/enso/pull/3514
[3516]: https://github.com/enso-org/enso/pull/3516
#### Enso Compiler #### Enso Compiler

View File

@ -414,6 +414,12 @@ type Locale
if this.variant.is_nothing.not then b.append ["variant", this.variant] if this.variant.is_nothing.not then b.append ["variant", this.variant]
Json.from_pairs b.to_vector Json.from_pairs b.to_vector
## Compares two locales for equality.
== : Any -> Boolean
== other = case other of
Locale other_java_locale -> this.java_locale.equals other_java_locale
_ -> False
## PRIVATE ## PRIVATE
Convert a java locale to an Enso locale. Convert a java locale to an Enso locale.

View File

@ -235,7 +235,7 @@ make_order_descriptor internal_column sort_direction text_ordering =
IR.Order_Descriptor internal_column.expression sort_direction nulls_order=nulls collation=Nothing IR.Order_Descriptor internal_column.expression sort_direction nulls_order=nulls collation=Nothing
True -> True ->
IR.Order_Descriptor internal_column.expression sort_direction nulls_order=nulls collation="ucs_basic" IR.Order_Descriptor internal_column.expression sort_direction nulls_order=nulls collation="ucs_basic"
Case_Insensitive locale -> case Locale.default.java_locale.equals locale.java_locale of Case_Insensitive locale -> case locale == Locale.default of
False -> False ->
Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.") Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
True -> True ->

View File

@ -58,7 +58,7 @@ type Sqlite_Dialect
IR.Order_Descriptor internal_column.expression sort_direction collation=Nothing IR.Order_Descriptor internal_column.expression sort_direction collation=Nothing
True -> True ->
IR.Order_Descriptor internal_column.expression sort_direction collation="BINARY" IR.Order_Descriptor internal_column.expression sort_direction collation="BINARY"
Case_Insensitive locale -> case Locale.default.java_locale.equals locale.java_locale of Case_Insensitive locale -> case locale == Locale.default of
False -> False ->
Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is not supported by the SQLite backend. You may need to materialize the Table to perform this operation.") Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is not supported by the SQLite backend. You may need to materialize the Table to perform this operation.")
True -> True ->

View File

@ -154,3 +154,10 @@ type Leading_Zeros column:Text (datatype:(Integer|Number|Date|Time|Time_Of_Day|B
a parse is attempted anyway. If mixed types are requested, the column is not a parse is attempted anyway. If mixed types are requested, the column is not
parsed due to ambiguity. parsed due to ambiguity.
type Duplicate_Type_Selector column:Text ambiguous:Boolean type Duplicate_Type_Selector column:Text ambiguous:Boolean
## Indicates that the given file type is not supported by the `Auto` format.
type Unsupported_File_Type filename
Unsupported_File_Type.to_display_text : Text
Unsupported_File_Type.to_display_text =
"The "+this.filename+" has a type that is not supported by the Auto format."

View File

@ -94,10 +94,6 @@ read_from_reader format java_reader on_problems max_columns=4096 =
True -> DelimitedReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS True -> DelimitedReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Infer -> DelimitedReader.HeaderBehavior.INFER Infer -> DelimitedReader.HeaderBehavior.INFER
False -> DelimitedReader.HeaderBehavior.GENERATE_HEADERS False -> DelimitedReader.HeaderBehavior.GENERATE_HEADERS
skip_rows = case format.skip_rows of
Nothing -> 0
Integer -> format.skip_rows
_ -> Error.throw (Illegal_Argument_Error "`skip_rows` should be Integer or Nothing.")
row_limit = case format.row_limit of row_limit = case format.row_limit of
Nothing -> -1 Nothing -> -1
Integer -> format.row_limit Integer -> format.row_limit
@ -127,7 +123,7 @@ read_from_reader format java_reader on_problems max_columns=4096 =
cell_type_guesser = if format.headers != Infer then Nothing else cell_type_guesser = if format.headers != Infer then Nothing else
formatter = format.value_formatter.if_nothing Data_Formatter formatter = format.value_formatter.if_nothing Data_Formatter
TypeInferringParser.new formatter.get_specific_type_parsers.to_array IdentityParser.new TypeInferringParser.new formatter.get_specific_type_parsers.to_array IdentityParser.new
reader = DelimitedReader.new java_reader format.delimiter format.quote format.quote_escape java_headers skip_rows row_limit max_columns value_parser cell_type_guesser format.keep_invalid_rows warnings_as_errors reader = DelimitedReader.new java_reader format.delimiter format.quote format.quote_escape java_headers format.skip_rows row_limit max_columns value_parser cell_type_guesser format.keep_invalid_rows warnings_as_errors
result_with_problems = reader.read result_with_problems = reader.read
parsing_problems = Vector.Vector (result_with_problems.problems) . map here.translate_reader_problem parsing_problems = Vector.Vector (result_with_problems.problems) . map here.translate_reader_problem
on_problems.attach_problems_after (Table.Table result_with_problems.value) parsing_problems on_problems.attach_problems_after (Table.Table result_with_problems.value) parsing_problems

View File

@ -20,11 +20,11 @@ type Excel_Section
## Gets the data from a specific sheet. Column names are the Excel column ## Gets the data from a specific sheet. Column names are the Excel column
names. names.
type Sheet (sheet:(Integer|Text)) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) type Sheet (sheet:(Integer|Text)) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
## Gets a specific range (taking either a defined name or external style ## Gets a specific range (taking either a defined name or external style
address) from the workbook. address) from the workbook.
type Range (address:(Text|Excel_Range)) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing) type Range (address:(Text|Excel_Range)) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
type Excel_Range type Excel_Range
## Specifies a range within an Excel Workbook. ## Specifies a range within an Excel Workbook.

View File

@ -6,9 +6,11 @@ from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Prob
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding
import Standard.Base.Runtime.Ref import Standard.Base.Runtime.Ref
import Standard.Table.Internal.Delimited_Reader import Standard.Table.Internal.Delimited_Reader
from Standard.Table.Error as Table_Errors import Unsupported_File_Type
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
import Standard.Table.Io.Excel as Excel_Module import Standard.Table.Io.Excel as Excel_Module
import Standard.Table.Io.Quote_Style
## This type needs to be here to allow for the usage of Standard.Table ## This type needs to be here to allow for the usage of Standard.Table
functions. Ideally, it would be an interface within Standard.Base and functions. Ideally, it would be an interface within Standard.Base and
@ -24,9 +26,9 @@ type Auto
materialise file = materialise file =
extension = file.extension extension = file.extension
output = Ref.new File_Format.Bytes output = Ref.new Nothing
if ".txt".equals_ignore_case extension then output.put File_Format.Text if ".txt".equals_ignore_case extension then output.put File_Format.Plain_Text
if ".log".equals_ignore_case extension then output.put File_Format.Text if ".log".equals_ignore_case extension then output.put File_Format.Plain_Text
if ".csv".equals_ignore_case extension then output.put (File_Format.Delimited ',') if ".csv".equals_ignore_case extension then output.put (File_Format.Delimited ',')
if ".tsv".equals_ignore_case extension then output.put (File_Format.Delimited '\t') if ".tsv".equals_ignore_case extension then output.put (File_Format.Delimited '\t')
if ".xlsx".equals_ignore_case extension then output.put File_Format.Excel if ".xlsx".equals_ignore_case extension then output.put File_Format.Excel
@ -34,7 +36,8 @@ type Auto
if ".xls".equals_ignore_case extension then output.put File_Format.Excel if ".xls".equals_ignore_case extension then output.put File_Format.Excel
if ".xlt".equals_ignore_case extension then output.put File_Format.Excel if ".xlt".equals_ignore_case extension then output.put File_Format.Excel
output.get output.get.if_nothing <|
Error.throw (Unsupported_File_Type file.name)
## Implements the `File.read` for this `File_Format` ## Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any read : File -> Problem_Behavior -> Any
@ -52,8 +55,8 @@ type Bytes
file.read_bytes file.read_bytes
## Reads the file to a `Text` with specified encoding. ## Reads the file to a `Text` with specified encoding.
type Text type Plain_Text
type Text (encoding:Encoding=Encoding.utf_8) type Plain_Text (encoding:Encoding=Encoding.utf_8)
## Implements the `File.read` for this `File_Format` ## Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any read : File -> Problem_Behavior -> Any
@ -72,6 +75,9 @@ type Delimited
- delimiter: The delimiter character to split the file into columns. An - delimiter: The delimiter character to split the file into columns. An
`Illegal_Argument_Error` error is returned if this is an empty string. `Illegal_Argument_Error` error is returned if this is an empty string.
- encoding: The encoding to use when reading the file. - encoding: The encoding to use when reading the file.
- skip_rows: The number of rows to skip from the top of the file.
- row_limit: The maximum number of rows to read from the file. This count
does not include the header row (if applicable).
- quote: The quote character denotes the start and end of a quoted value. - quote: The quote character denotes the start and end of a quoted value.
No quote character is used if set to `Nothing`. Quoted items are not No quote character is used if set to `Nothing`. Quoted items are not
split on the delimiter and can also contain newlines. Within a quoted split on the delimiter and can also contain newlines. Within a quoted
@ -83,27 +89,58 @@ type Delimited
then escaping quotes is done by double quotes: `"ab""cd"` will yield then escaping quotes is done by double quotes: `"ab""cd"` will yield
the text `ab"cd"`. Another popular choice for `quote_escape` is the `\` the text `ab"cd"`. Another popular choice for `quote_escape` is the `\`
character. Then `"ab\"cd"` will yield the same text. character. Then `"ab\"cd"` will yield the same text.
- quote_style: The style of quoting to use when writing the file.
- headers: If set to `True`, the first row is used as column names. If - headers: If set to `True`, the first row is used as column names. If
set to `False`, the column names are generated by adding increasing set to `False`, the column names are generated by adding increasing
numeric suffixes to the base name `Column` (i.e. `Column_1`, numeric suffixes to the base name `Column` (i.e. `Column_1`,
`Column_2` etc.). If set to `Infer`, the process tries to infer if `Column_2` etc.). If set to `Infer`, the process tries to infer if
headers are present on the first row. If the column names are not headers are present on the first row. If the column names are not
unique, numeric suffixes will be appended to disambiguate them. unique, numeric suffixes will be appended to disambiguate them.
- skip_rows: The number of rows to skip from the top of the file.
- row_limit: The maximum number of rows to read from the file. This count
does not include the header row (if applicable).
- value_formatter: Formatter to parse text values into numbers, dates, - value_formatter: Formatter to parse text values into numbers, dates,
times, etc. If `Nothing` values are left as Text. times, etc. If `Nothing` values are left as Text.
- keep_invalid_rows: Specifies whether rows that contain less or more - keep_invalid_rows: Specifies whether rows that contain less or more
columns than expected should be kept (setting the missing columns to columns than expected should be kept (setting the missing columns to
`Nothing` or dropping the excess columns) or dropped. `Nothing` or dropping the excess columns) or dropped.
type Delimited (delimiter:Text) (encoding:Encoding=Encoding.utf_8) (quote:Text|Nothing='"') (quote_escape:Text|Nothing='"') (headers:True|False|Infer=Infer) (skip_rows:Integer|Nothing=Nothing) (row_limit:Integer|Nothing=Nothing) (value_formatter:Data_Formatter|Nothing=Data_Formatter) (keep_invalid_rows:Boolean=True) type Delimited (delimiter:Text) (encoding:Encoding=Encoding.utf_8) (skip_rows:Integer=0) (row_limit:Integer|Nothing=Nothing) (quote:Text|Nothing='"') (quote_escape:Text|Nothing='"') (quote_style:Quote_Style=Quote_Style.Necessary) (headers:True|False|Infer=Infer) (value_formatter:Data_Formatter|Nothing=Data_Formatter) (keep_invalid_rows:Boolean=True)
## Implements the `File.read` for this `File_Format` ## Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any read : File -> Problem_Behavior -> Any
read file on_problems = read file on_problems =
Delimited_Reader.read_file this file on_problems Delimited_Reader.read_file this file on_problems
## PRIVATE
Clone the instance with some properties overridden.
Note: This function is internal until such time as Atom cloning with modification is built into Enso.
clone : Text->Text->(Boolean|Infer)->Data_Formatter->Boolean->Delimited
clone (quote=this.quote) (quote_escape=this.quote_escape) (quote_style=this.quote_style) (headers=this.headers) (value_formatter=this.value_formatter) (keep_invalid_rows=this.keep_invalid_rows) =
Delimited this.delimiter this.encoding this.skip_rows this.row_limit quote quote_escape quote_style headers value_formatter keep_invalid_rows
## Create a clone of this with specified `quote` and `quote_escape`.
with_quotes : Text->Text->Quote_Style->Delimited
with_quotes quote quote_escape=quote quote_style=this.quote_style =
this.clone quote=quote quote_escape=quote_escape quote_style=quote_style
## Create a clone of this with first row treated as header.
with_headers : Delimited
with_headers = this.clone headers=True
## Create a clone of this where the first row is treated as data, not a
header.
without_headers : Delimited
without_headers = this.clone headers=False
## Create a clone of this with value parsing.
A custom `Data_Formatter` can be provided to customize parser options.
with_parsing : Data_Formatter -> Delimited
with_parsing (value_formatter=Data_Formatter) =
this.clone value_formatter=value_formatter
## Create a clone of this without value parsing.
without_parsing : Delimited
without_parsing =
this.clone value_formatter=Nothing
## A setting to infer the default behaviour of some option. ## A setting to infer the default behaviour of some option.
type Infer type Infer

View File

@ -0,0 +1,9 @@
type Quote_Style
## Do not quote any values even if this will result in an invalid file.
type Never
## Quote text values which are empty or contain the delimiter or new lines.
type Necessary
## Quote all text values.
type Always

View File

@ -352,7 +352,7 @@ public class Reader {
public static Table readSheetByName( public static Table readSheetByName(
InputStream stream, InputStream stream,
String sheetName, String sheetName,
Integer skip_rows, int skip_rows,
Integer row_limit, Integer row_limit,
boolean xls_format) boolean xls_format)
throws IOException, IllegalArgumentException { throws IOException, IllegalArgumentException {
@ -367,7 +367,7 @@ public class Reader {
workbook, workbook,
sheetIndex, sheetIndex,
null, null,
skip_rows == null ? 0 : skip_rows, skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit); row_limit == null ? Integer.MAX_VALUE : row_limit);
} }
@ -383,7 +383,7 @@ public class Reader {
* @throws IOException when the input stream cannot be read. * @throws IOException when the input stream cannot be read.
*/ */
public static Table readSheetByIndex( public static Table readSheetByIndex(
InputStream stream, int index, Integer skip_rows, Integer row_limit, boolean xls_format) InputStream stream, int index, int skip_rows, Integer row_limit, boolean xls_format)
throws IOException, IllegalArgumentException { throws IOException, IllegalArgumentException {
Workbook workbook = getWorkbook(stream, xls_format); Workbook workbook = getWorkbook(stream, xls_format);
@ -397,7 +397,7 @@ public class Reader {
workbook, workbook,
index - 1, index - 1,
null, null,
skip_rows == null ? 0 : skip_rows, skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit); row_limit == null ? Integer.MAX_VALUE : row_limit);
} }
@ -415,7 +415,7 @@ public class Reader {
public static Table readRangeByName( public static Table readRangeByName(
InputStream stream, InputStream stream,
String rangeNameOrAddress, String rangeNameOrAddress,
Integer skip_rows, int skip_rows,
Integer row_limit, Integer row_limit,
boolean xls_format) boolean xls_format)
throws IOException { throws IOException {
@ -438,7 +438,7 @@ public class Reader {
* @throws IOException when the input stream cannot be read. * @throws IOException when the input stream cannot be read.
*/ */
public static Table readRange( public static Table readRange(
InputStream stream, Range range, Integer skip_rows, Integer row_limit, boolean xls_format) InputStream stream, Range range, int skip_rows, Integer row_limit, boolean xls_format)
throws IOException { throws IOException {
return readRange(getWorkbook(stream, xls_format), range, skip_rows, row_limit); return readRange(getWorkbook(stream, xls_format), range, skip_rows, row_limit);
} }
@ -448,7 +448,7 @@ public class Reader {
} }
private static Table readRange( private static Table readRange(
Workbook workbook, Range range, Integer skip_rows, Integer row_limit) { Workbook workbook, Range range, int skip_rows, Integer row_limit) {
int sheetIndex = getSheetIndex(workbook, range.getSheetName()); int sheetIndex = getSheetIndex(workbook, range.getSheetName());
if (sheetIndex == -1) { if (sheetIndex == -1) {
throw new IllegalArgumentException("Unknown sheet '" + range.getSheetName() + "'."); throw new IllegalArgumentException("Unknown sheet '" + range.getSheetName() + "'.");
@ -458,7 +458,7 @@ public class Reader {
workbook, workbook,
sheetIndex, sheetIndex,
range, range,
skip_rows == null ? 0 : skip_rows, skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit); row_limit == null ? Integer.MAX_VALUE : row_limit);
} }
} }

View File

@ -7,8 +7,9 @@ import Standard.Table.Data.Column
from Standard.Table.Error import all from Standard.Table.Error import all
import Standard.Table.Io.File_Read import Standard.Table.Io.File_Read
import Standard.Table.Io.File_Format from Standard.Table.Io.File_Format import Delimited
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
import Standard.Table.Io.Quote_Style
import Standard.Test import Standard.Test
import Standard.Test.Problems import Standard.Test.Problems
@ -22,7 +23,7 @@ spec =
c_2 = ["b", ['2', Nothing, '8', '11']] c_2 = ["b", ['2', Nothing, '8', '11']]
c_3 = ["c", [Nothing, '6', '9', '12']] c_3 = ["c", [Nothing, '6', '9', '12']]
expected_table = Table.new [c_1, c_2, c_3] expected_table = Table.new [c_1, c_2, c_3]
simple_empty = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing) simple_empty = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=True value_formatter=Nothing)
simple_empty.should_equal expected_table simple_empty.should_equal expected_table
Test.specify "should load a simple table without headers" <| Test.specify "should load a simple table without headers" <|
@ -30,11 +31,11 @@ spec =
c_2 = ["Column_2", ['b', '2', Nothing, '8', '11']] c_2 = ["Column_2", ['b', '2', Nothing, '8', '11']]
c_3 = ["Column_3", ['c', Nothing, '6', '9', '12']] c_3 = ["Column_3", ['c', Nothing, '6', '9', '12']]
expected_table = Table.new [c_1, c_2, c_3] expected_table = Table.new [c_1, c_2, c_3]
simple_empty = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False value_formatter=Nothing) simple_empty = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False value_formatter=Nothing)
simple_empty.should_equal expected_table simple_empty.should_equal expected_table
Test.specify "should work in presence of missing headers" <| Test.specify "should work in presence of missing headers" <|
action on_problems = File.read (Enso_Project.data / "missing_header.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing) on_problems action on_problems = File.read (Enso_Project.data / "missing_header.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
tester table = tester table =
table.columns.map .name . should_equal ["a", "Column_1", "c", "Column_2", "d"] table.columns.map .name . should_equal ["a", "Column_1", "c", "Column_2", "d"]
table.at "a" . to_vector . should_equal ["1"] table.at "a" . to_vector . should_equal ["1"]
@ -46,61 +47,61 @@ spec =
Problems.test_problem_handling action problems tester Problems.test_problem_handling action problems tester
Test.specify "should infer headers based on the first two rows" <| Test.specify "should infer headers based on the first two rows" <|
t1 = File.read (Enso_Project.data / "data_small.csv") (File_Format.Delimited "," headers=File_Format.Infer) t1 = File.read (Enso_Project.data / "data_small.csv") (Delimited "," headers=File_Format.Infer)
t1.columns.map .name . should_equal ["Code", "Index", "Flag", "Value", "ValueWithNothing", "TextWithNothing", "Hexadecimal", "Leading0s", "QuotedNumbers", "Mixed Types"] t1.columns.map .name . should_equal ["Code", "Index", "Flag", "Value", "ValueWithNothing", "TextWithNothing", "Hexadecimal", "Leading0s", "QuotedNumbers", "Mixed Types"]
t2 = File.read (Enso_Project.data / "all_text.csv") (File_Format.Delimited "," headers=File_Format.Infer) t2 = File.read (Enso_Project.data / "all_text.csv") (Delimited "," headers=File_Format.Infer)
t2.columns.map .name . should_equal ["Column_1", "Column_2"] t2.columns.map .name . should_equal ["Column_1", "Column_2"]
t2.at "Column_1" . to_vector . should_equal ["a", "c", "e", "g"] t2.at "Column_1" . to_vector . should_equal ["a", "c", "e", "g"]
t2.at "Column_2" . to_vector . should_equal ["b", "d", "f", "h"] t2.at "Column_2" . to_vector . should_equal ["b", "d", "f", "h"]
t3 = File.read (Enso_Project.data / "two_rows1.csv") (File_Format.Delimited "," headers=File_Format.Infer) t3 = File.read (Enso_Project.data / "two_rows1.csv") (Delimited "," headers=File_Format.Infer)
t3.columns.map .name . should_equal ["a", "b", "c"] t3.columns.map .name . should_equal ["a", "b", "c"]
t3.at "a" . to_vector . should_equal ["x"] t3.at "a" . to_vector . should_equal ["x"]
t3.at "b" . to_vector . should_equal [Nothing] t3.at "b" . to_vector . should_equal [Nothing]
t3.at "c" . to_vector . should_equal [Nothing] t3.at "c" . to_vector . should_equal [Nothing]
t4 = File.read (Enso_Project.data / "two_rows2.csv") (File_Format.Delimited "," headers=File_Format.Infer) t4 = File.read (Enso_Project.data / "two_rows2.csv") (Delimited "," headers=File_Format.Infer)
t4.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"] t4.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
t4.at "Column_1" . to_vector . should_equal ["a", "d"] t4.at "Column_1" . to_vector . should_equal ["a", "d"]
t4.at "Column_2" . to_vector . should_equal ["b", "e"] t4.at "Column_2" . to_vector . should_equal ["b", "e"]
t4.at "Column_3" . to_vector . should_equal ["c", "f"] t4.at "Column_3" . to_vector . should_equal ["c", "f"]
t5 = File.read (Enso_Project.data / "numbers_in_header.csv") (File_Format.Delimited "," headers=File_Format.Infer) t5 = File.read (Enso_Project.data / "numbers_in_header.csv") (Delimited "," headers=File_Format.Infer)
t5.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"] t5.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
t5.at "Column_1" . to_vector . should_equal ["a", "1"] t5.at "Column_1" . to_vector . should_equal ["a", "1"]
t5.at "Column_2" . to_vector . should_equal ["b", "2"] t5.at "Column_2" . to_vector . should_equal ["b", "2"]
t5.at "Column_3" . to_vector . should_equal [0, 3] t5.at "Column_3" . to_vector . should_equal [0, 3]
t6 = File.read (Enso_Project.data / "quoted_numbers_in_header.csv") (File_Format.Delimited "," headers=File_Format.Infer) t6 = File.read (Enso_Project.data / "quoted_numbers_in_header.csv") (Delimited "," headers=File_Format.Infer)
t6.columns.map .name . should_equal ["1", "x"] t6.columns.map .name . should_equal ["1", "x"]
t6.at "1" . to_vector . should_equal ["y"] t6.at "1" . to_vector . should_equal ["y"]
t6.at "x" . to_vector . should_equal [2] t6.at "x" . to_vector . should_equal [2]
Test.specify "should not use the first row as headers if it is the only row, unless specifically asked to" <| Test.specify "should not use the first row as headers if it is the only row, unless specifically asked to" <|
t1 = File.read (Enso_Project.data / "one_row.csv") (File_Format.Delimited "," headers=File_Format.Infer) t1 = File.read (Enso_Project.data / "one_row.csv") (Delimited "," headers=File_Format.Infer)
t1.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"] t1.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
t1.at "Column_1" . to_vector . should_equal ["x"] t1.at "Column_1" . to_vector . should_equal ["x"]
t1.at "Column_2" . to_vector . should_equal ["y"] t1.at "Column_2" . to_vector . should_equal ["y"]
t1.at "Column_3" . to_vector . should_equal ["z"] t1.at "Column_3" . to_vector . should_equal ["z"]
t2 = File.read (Enso_Project.data / "one_row.csv") (File_Format.Delimited "," headers=True) t2 = File.read (Enso_Project.data / "one_row.csv") (Delimited "," headers=True)
t2.columns.map .name . should_equal ["x", "y", "z"] t2.columns.map .name . should_equal ["x", "y", "z"]
t2.row_count . should_equal 0 t2.row_count . should_equal 0
t2.at "x" . to_vector . should_equal [] t2.at "x" . to_vector . should_equal []
Test.specify "should be able to load even an empty file" <| Test.specify "should be able to load even an empty file" <|
table = File.read (Enso_Project.data / "empty.txt") (File_Format.Delimited "," headers=True value_formatter=Nothing) table = File.read (Enso_Project.data / "empty.txt") (Delimited "," headers=True value_formatter=Nothing)
table.columns.map .name . should_equal [] table.columns.map .name . should_equal []
table.row_count . should_equal 0 table.row_count . should_equal 0
Test.specify "should correctly handle file opening issues" <| Test.specify "should correctly handle file opening issues" <|
nonexistent_file = Enso_Project.data / "a_filename_that_does_not_exist.foobar" nonexistent_file = Enso_Project.data / "a_filename_that_does_not_exist.foobar"
r1 = File.read nonexistent_file (File_Format.Delimited "," headers=True value_formatter=Nothing) r1 = File.read nonexistent_file (Delimited "," headers=True value_formatter=Nothing)
r1.should_fail_with File.File_Not_Found r1.should_fail_with File.File_Not_Found
directory = Enso_Project.data directory = Enso_Project.data
r2 = File.read directory (File_Format.Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error r2 = File.read directory (Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error
r2.should_fail_with File.Io_Error r2.should_fail_with File.Io_Error
Test.specify "should work with all kinds of line endings" <| Test.specify "should work with all kinds of line endings" <|
@ -111,7 +112,7 @@ spec =
(path name).write_text text Encoding.utf_8 (path name).write_text text Encoding.utf_8
test_file name = test_file name =
table = File.read (path name) (File_Format.Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error table = File.read (path name) (Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error
table.columns.map .name . should_equal ['a', 'b', 'c'] table.columns.map .name . should_equal ['a', 'b', 'c']
table.at 'a' . to_vector . should_equal ['d', '1'] table.at 'a' . to_vector . should_equal ['d', '1']
table.at 'b' . to_vector . should_equal ['e', '2'] table.at 'b' . to_vector . should_equal ['e', '2']
@ -126,17 +127,17 @@ spec =
# Currently mixed line endings are not supported. # Currently mixed line endings are not supported.
(path 'mixed.csv').write_text 'a,b,c\nd,e,f\r1,2,3' (path 'mixed.csv').write_text 'a,b,c\nd,e,f\r1,2,3'
File.read (path 'mixed.csv') (File_Format.Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error . should_fail_with Invalid_Row File.read (path 'mixed.csv') (Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error . should_fail_with Invalid_Row
Test.specify "should work with Windows-1252 encoding" <| Test.specify "should work with Windows-1252 encoding" <|
table = File.read (Enso_Project.data / "windows.csv") (File_Format.Delimited "," headers=True encoding=Encoding.windows_1252) Problem_Behavior.Report_Error table = File.read (Enso_Project.data / "windows.csv") (Delimited "," headers=True encoding=Encoding.windows_1252) Problem_Behavior.Report_Error
table.columns.map .name . should_equal ['a', 'b', 'c'] table.columns.map .name . should_equal ['a', 'b', 'c']
table.at 'a' . to_vector . should_equal ['$¢'] table.at 'a' . to_vector . should_equal ['$¢']
table.at 'b' . to_vector . should_equal ['¤'] table.at 'b' . to_vector . should_equal ['¤']
table.at 'c' . to_vector . should_equal ['¥'] table.at 'c' . to_vector . should_equal ['¥']
Test.specify "should work with UTF-16 encoding" <| Test.specify "should work with UTF-16 encoding" <|
table = File.read (Enso_Project.data / "utf16.csv") (File_Format.Delimited "," headers=True encoding=Encoding.utf_16_be) Problem_Behavior.Report_Error table = File.read (Enso_Project.data / "utf16.csv") (Delimited "," headers=True encoding=Encoding.utf_16_be) Problem_Behavior.Report_Error
table.columns.map .name . should_equal ['ą', '🚀b', 'ć😎'] table.columns.map .name . should_equal ['ą', '🚀b', 'ć😎']
table.at 'ą' . to_vector . should_equal ['ą'] table.at 'ą' . to_vector . should_equal ['ą']
table.at '🚀b' . to_vector . should_equal ['✨🚀🚧😍😃😍😎😙😉☺'] table.at '🚀b' . to_vector . should_equal ['✨🚀🚧😍😃😍😎😙😉☺']
@ -147,7 +148,7 @@ spec =
utf8_bytes = [97, 44, 98, 44, 99, 10, -60, -123, 44, -17, -65, -65, 44, -61, 40, -61, 40, 10] utf8_bytes = [97, 44, 98, 44, 99, 10, -60, -123, 44, -17, -65, -65, 44, -61, 40, -61, 40, 10]
utf8_file.write_bytes utf8_bytes utf8_file.write_bytes utf8_bytes
action_1 on_problems = action_1 on_problems =
utf8_file.read (File_Format.Delimited "," headers=True) on_problems utf8_file.read (Delimited "," headers=True) on_problems
tester_1 table = tester_1 table =
table.columns.map .name . should_equal ['a', 'b', 'c'] table.columns.map .name . should_equal ['a', 'b', 'c']
table.at 'a' . to_vector . should_equal ['ą'] table.at 'a' . to_vector . should_equal ['ą']
@ -157,7 +158,7 @@ spec =
Problems.test_problem_handling action_1 problems_1 tester_1 Problems.test_problem_handling action_1 problems_1 tester_1
action_2 on_problems = action_2 on_problems =
(Enso_Project.data / "utf16_invalid.csv").read (File_Format.Delimited "," headers=True encoding=Encoding.utf_16_be) on_problems (Enso_Project.data / "utf16_invalid.csv").read (Delimited "," headers=True encoding=Encoding.utf_16_be) on_problems
tester_2 table = tester_2 table =
table.columns.map .name . should_equal ['a', 'b', 'c'] table.columns.map .name . should_equal ['a', 'b', 'c']
# This column does not raise a problem - the '\uFFFD' is simply present in the input file. # This column does not raise a problem - the '\uFFFD' is simply present in the input file.
@ -170,7 +171,7 @@ spec =
Test.specify "should handle duplicated columns" <| Test.specify "should handle duplicated columns" <|
action on_problems = File.read (Enso_Project.data / "duplicated_columns.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing) on_problems action on_problems = File.read (Enso_Project.data / "duplicated_columns.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
tester table = tester table =
table.columns.map .name . should_equal ['a', 'b', 'c', 'a_1'] table.columns.map .name . should_equal ['a', 'b', 'c', 'a_1']
table.at 'a' . to_vector . should_equal ['1'] table.at 'a' . to_vector . should_equal ['1']
@ -179,27 +180,27 @@ spec =
Problems.test_problem_handling action problems tester Problems.test_problem_handling action problems tester
Test.specify "should handle quotes" <| Test.specify "should handle quotes" <|
t1 = File.read (Enso_Project.data / "double_quoted.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing) t1 = File.read (Enso_Project.data / "double_quoted.csv") (Delimited "," headers=True value_formatter=Nothing)
t1.at 'a' . to_vector . should_equal ['a, x', '"a'] t1.at 'a' . to_vector . should_equal ['a, x', '"a']
t1.at 'c' . to_vector . should_equal ['3', '"'] t1.at 'c' . to_vector . should_equal ['3', '"']
t2 = File.read (Enso_Project.data / "escape_quoted.csv") (File_Format.Delimited "," headers=True quote_escape="\" value_formatter=Nothing) t2 = File.read (Enso_Project.data / "escape_quoted.csv") (Delimited "," headers=True quote_escape="\" value_formatter=Nothing)
t2.at 'a' . to_vector . should_equal ['a"b', 'a\\\"z'] t2.at 'a' . to_vector . should_equal ['a"b', 'a\\\"z']
t3 = File.read (Enso_Project.data / "no_quoting.csv") (File_Format.Delimited "," quote=Nothing headers=True value_formatter=Nothing) t3 = File.read (Enso_Project.data / "no_quoting.csv") (Delimited "," quote=Nothing headers=True value_formatter=Nothing)
t3.at 'a' . to_vector . should_equal ['"y'] t3.at 'a' . to_vector . should_equal ['"y']
t3.at 'b' . to_vector . should_equal ['z"'] t3.at 'b' . to_vector . should_equal ['z"']
t3.at 'c' . to_vector . should_equal ['a'] t3.at 'c' . to_vector . should_equal ['a']
Test.specify "should support rows spanning multiple lines if quoted" <| Test.specify "should support rows spanning multiple lines if quoted" <|
t1 = File.read (Enso_Project.data / "multiline_quoted.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing) t1 = File.read (Enso_Project.data / "multiline_quoted.csv") (Delimited "," headers=True value_formatter=Nothing)
t1.at 'a' . to_vector . should_equal ['1', '4'] t1.at 'a' . to_vector . should_equal ['1', '4']
t1.at 'b' . to_vector . should_equal ['start\n\ncontinue', '5'] t1.at 'b' . to_vector . should_equal ['start\n\ncontinue', '5']
t1.at 'c' . to_vector . should_equal ['3', '6'] t1.at 'c' . to_vector . should_equal ['3', '6']
Test.specify "should behave correctly in presence of a mismatched quote" <| Test.specify "should behave correctly in presence of a mismatched quote" <|
action_1 on_problems = action_1 on_problems =
File.read (Enso_Project.data / "mismatched_quote.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing) on_problems File.read (Enso_Project.data / "mismatched_quote.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
tester_1 table = tester_1 table =
table.columns.map .name . should_equal ['a', 'b', 'c'] table.columns.map .name . should_equal ['a', 'b', 'c']
@ -210,7 +211,7 @@ spec =
Problems.test_problem_handling action_1 problems_1 tester_1 Problems.test_problem_handling action_1 problems_1 tester_1
action_2 on_problems = action_2 on_problems =
File.read (Enso_Project.data / "mismatched_quote2.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing) on_problems File.read (Enso_Project.data / "mismatched_quote2.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
tester_2 table = tester_2 table =
table.columns.map .name . should_equal ['a', 'b', 'c'] table.columns.map .name . should_equal ['a', 'b', 'c']
@ -222,7 +223,7 @@ spec =
Test.specify "should handle too long and too short rows" <| Test.specify "should handle too long and too short rows" <|
action keep_invalid_rows on_problems = action keep_invalid_rows on_problems =
File.read (Enso_Project.data / "varying_rows.csv") (File_Format.Delimited "," headers=True keep_invalid_rows=keep_invalid_rows value_formatter=Nothing) on_problems File.read (Enso_Project.data / "varying_rows.csv") (Delimited "," headers=True keep_invalid_rows=keep_invalid_rows value_formatter=Nothing) on_problems
tester_kept table = tester_kept table =
table.columns.map .name . should_equal ['a', 'b', 'c'] table.columns.map .name . should_equal ['a', 'b', 'c']
@ -242,7 +243,7 @@ spec =
Test.specify "should aggregate invalid rows over some limit" <| Test.specify "should aggregate invalid rows over some limit" <|
action on_problems = action on_problems =
File.read (Enso_Project.data / "many_invalid_rows.csv") (File_Format.Delimited "," headers=True keep_invalid_rows=False value_formatter=Nothing) on_problems File.read (Enso_Project.data / "many_invalid_rows.csv") (Delimited "," headers=True keep_invalid_rows=False value_formatter=Nothing) on_problems
tester table = tester table =
table.columns.map .name . should_equal ['a', 'b', 'c'] table.columns.map .name . should_equal ['a', 'b', 'c']
@ -253,45 +254,45 @@ spec =
Problems.test_problem_handling action problems tester Problems.test_problem_handling action problems tester
Test.specify "should allow to skip rows" <| Test.specify "should allow to skip rows" <|
t1 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False skip_rows=3 value_formatter=Nothing) t1 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False skip_rows=3 value_formatter=Nothing)
t1.at "Column_1" . to_vector . should_equal ['7', '10'] t1.at "Column_1" . to_vector . should_equal ['7', '10']
t2 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True skip_rows=3 value_formatter=Nothing) t2 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=True skip_rows=3 value_formatter=Nothing)
t2.columns.map .name . should_equal ['7', '8', '9'] t2.columns.map .name . should_equal ['7', '8', '9']
t2.at "7" . to_vector . should_equal ['10'] t2.at "7" . to_vector . should_equal ['10']
Test.specify "should allow to set a limit of rows to read" <| Test.specify "should allow to set a limit of rows to read" <|
t1 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False row_limit=2 value_formatter=Nothing) t1 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False row_limit=2 value_formatter=Nothing)
t1.at "Column_1" . to_vector . should_equal ['a', '1'] t1.at "Column_1" . to_vector . should_equal ['a', '1']
t2 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True row_limit=2 value_formatter=Nothing) t2 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=True row_limit=2 value_formatter=Nothing)
t2.at "a" . to_vector . should_equal ['1', '4'] t2.at "a" . to_vector . should_equal ['1', '4']
t3 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False skip_rows=3 row_limit=1 value_formatter=Nothing) t3 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False skip_rows=3 row_limit=1 value_formatter=Nothing)
t3.at "Column_1" . to_vector . should_equal ['7'] t3.at "Column_1" . to_vector . should_equal ['7']
t4 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False row_limit=0 value_formatter=Nothing) t4 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False row_limit=0 value_formatter=Nothing)
t4.columns.map .name . should_equal ['Column_1', 'Column_2', 'Column_3'] t4.columns.map .name . should_equal ['Column_1', 'Column_2', 'Column_3']
t4.row_count . should_equal 0 t4.row_count . should_equal 0
t5 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True row_limit=0 value_formatter=Nothing) t5 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=True row_limit=0 value_formatter=Nothing)
t5.columns.map .name . should_equal ['a', 'b', 'c'] t5.columns.map .name . should_equal ['a', 'b', 'c']
t5.at 'a' . to_vector . should_equal [] t5.at 'a' . to_vector . should_equal []
t5.row_count . should_equal 0 t5.row_count . should_equal 0
t6 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False skip_rows=3 row_limit=1000 value_formatter=Nothing) t6 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False skip_rows=3 row_limit=1000 value_formatter=Nothing)
t6.at "Column_1" . to_vector . should_equal ['7', '10'] t6.at "Column_1" . to_vector . should_equal ['7', '10']
Test.specify "should check arguments" <| Test.specify "should check arguments" <|
path = (Enso_Project.data / "simple_empty.csv") path = (Enso_Project.data / "simple_empty.csv")
pb = Problem_Behavior.Report_Error pb = Problem_Behavior.Report_Error
path.read (File_Format.Delimited "," headers=False quote='abc') pb . should_fail_with Illegal_Argument_Error path.read (Delimited "," headers=False quote='abc') pb . should_fail_with Illegal_Argument_Error
path.read (File_Format.Delimited "," headers=False quote='🚧') pb . should_fail_with Illegal_Argument_Error path.read (Delimited "," headers=False quote='🚧') pb . should_fail_with Illegal_Argument_Error
path.read (File_Format.Delimited "," headers=False quote_escape='//') pb . should_fail_with Illegal_Argument_Error path.read (Delimited "," headers=False quote_escape='//') pb . should_fail_with Illegal_Argument_Error
path.read (File_Format.Delimited 'a\u{301}' headers=False) pb . should_fail_with Illegal_Argument_Error path.read (Delimited 'a\u{301}' headers=False) pb . should_fail_with Illegal_Argument_Error
Test.specify "should correctly guess column types" <| Test.specify "should correctly guess column types" <|
t = (Enso_Project.data / "data_small.csv") . read (File_Format.Delimited "," headers=True) t = (Enso_Project.data / "data_small.csv") . read (Delimited "," headers=True)
t.at "Code" . to_vector . should_equal ["gxl", "wca", "nfw", "der"] t.at "Code" . to_vector . should_equal ["gxl", "wca", "nfw", "der"]
t.at "Index" . to_vector . should_equal [7, 0, 1, 7] t.at "Index" . to_vector . should_equal [7, 0, 1, 7]
t.at "Flag" . to_vector . should_equal [True, False, True, True] t.at "Flag" . to_vector . should_equal [True, False, True, True]
@ -303,7 +304,7 @@ spec =
t.at "QuotedNumbers" . to_vector . should_equal ["1", "2", Nothing, "34"] t.at "QuotedNumbers" . to_vector . should_equal ["1", "2", Nothing, "34"]
t.at "Mixed Types" . to_vector . should_equal ["33", Nothing, "45", "True"] t.at "Mixed Types" . to_vector . should_equal ["33", Nothing, "45", "True"]
t2 = (Enso_Project.data / "data_small.csv") . read (File_Format.Delimited "," headers=True value_formatter=(Data_Formatter allow_leading_zeros=True)) t2 = (Enso_Project.data / "data_small.csv") . read (Delimited "," headers=True value_formatter=(Data_Formatter allow_leading_zeros=True))
t2.at "Leading0s" . to_vector . should_equal [1, 2, 123, Nothing] t2.at "Leading0s" . to_vector . should_equal [1, 2, 123, Nothing]
Test.specify "should be able to detect types automatically" <| Test.specify "should be able to detect types automatically" <|
@ -322,7 +323,7 @@ spec =
a,b,c a,b,c
1,2,3 1,2,3
4,5,6 4,5,6
t1 = Table.Table.from text1 (format = File_Format.Delimited ",") t1 = Table.Table.from text1 (format = Delimited ",")
t1.columns.map .name . should_equal ["a", "b", "c"] t1.columns.map .name . should_equal ["a", "b", "c"]
t1.at "a" . to_vector . should_equal [1, 4] t1.at "a" . to_vector . should_equal [1, 4]
t1.at "b" . to_vector . should_equal [2, 5] t1.at "b" . to_vector . should_equal [2, 5]
@ -334,4 +335,24 @@ spec =
t2.at "a" . to_vector . should_equal [1, 3] t2.at "a" . to_vector . should_equal [1, 3]
t2.at "b" . to_vector . should_equal [2, 4] t2.at "b" . to_vector . should_equal [2, 4]
Test.specify "should allow to build the Delimited configuration using builders" <|
Delimited "," . clone . should_equal (Delimited ",")
Delimited "," encoding=Encoding.ascii skip_rows=123 row_limit=100 headers=False value_formatter=Nothing . clone . should_equal (Delimited "," headers=False value_formatter=Nothing skip_rows=123 row_limit=100 encoding=Encoding.ascii)
Delimited "," . clone quote="'" quote_escape='\\' quote_style=Quote_Style.Always headers=False value_formatter=Nothing . should_equal (Delimited "," headers=False value_formatter=Nothing quote="'" quote_escape='\\' quote_style=Quote_Style.Always)
Delimited '\t' . with_quotes "|" . should_equal (Delimited '\t' quote='|' quote_escape='|')
Delimited '\t' quote_style=Quote_Style.Always . with_quotes "-" '\\' . should_equal (Delimited '\t' quote='-' quote_escape='\\' quote_style=Quote_Style.Always)
Delimited '\t' quote_style=Quote_Style.Always . with_quotes "-" '\\' Quote_Style.Never . should_equal (Delimited '\t' quote='-' quote_escape='\\' quote_style=Quote_Style.Never)
Delimited ',' . with_headers . should_equal (Delimited ',' headers=True)
Delimited ',' . without_headers . should_equal (Delimited ',' headers=False)
Delimited "," skip_rows=123 headers=False value_formatter=Nothing quote_style=Quote_Style.Never . with_headers . should_equal (Delimited "," skip_rows=123 value_formatter=Nothing quote_style=Quote_Style.Never headers=True)
Delimited "," skip_rows=123 headers=True value_formatter=Nothing quote_style=Quote_Style.Never . without_headers . should_equal (Delimited "," skip_rows=123 value_formatter=Nothing quote_style=Quote_Style.Never headers=False)
Delimited ',' . with_parsing . should_equal (Delimited ',')
Delimited ',' . without_parsing . should_equal (Delimited ',' value_formatter=Nothing)
custom_formatter = Data_Formatter true_values=["A", "B", "C"] false_values=["D", "E", "F"]
Delimited ',' . with_parsing custom_formatter . should_equal (Delimited ',' value_formatter=custom_formatter)
Delimited ',' row_limit=456 . without_parsing . should_equal (Delimited ',' value_formatter=Nothing row_limit=456)
main = Test.Suite.run_main here.spec main = Test.Suite.run_main here.spec

View File

@ -2,6 +2,7 @@ from Standard.Base import all
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
import Standard.Table.Io.File_Read import Standard.Table.Io.File_Read
import Standard.Table.Io.File_Format import Standard.Table.Io.File_Format
from Standard.Table.Error import Unsupported_File_Type
import Standard.Test import Standard.Test
import Standard.Test.Problems import Standard.Test.Problems
@ -13,18 +14,21 @@ spec =
Test.group "File_Format.Auto materialise" <| Test.group "File_Format.Auto materialise" <|
Test.specify "should be Bytes for unknown file" <| Test.specify "should be Bytes for unknown file" <|
File_Format.Auto . materialise sample_xxx . should_be_a File_Format.Bytes File_Format.Auto . materialise sample_xxx . should_fail_with Unsupported_File_Type
Test.specify "should be Text for text file" <| Test.specify "should be Text for text file" <|
File_Format.Auto . materialise sample_txt . should_be_a File_Format.Text File_Format.Auto . materialise sample_txt . should_be_a File_Format.Plain_Text
Test.specify "should be Text for log file" <| Test.specify "should be Text for log file" <|
File_Format.Auto . materialise windows_log . should_be_a File_Format.Text File_Format.Auto . materialise windows_log . should_be_a File_Format.Plain_Text
Test.specify "should detect CSV files" <|
File_Format.Auto . materialise (Enso_Project.data / "data.csv") . should_equal (File_Format.Delimited ",")
Test.group "File_Format.Auto" <| Test.group "File_Format.Auto" <|
Test.specify "should be able to read an unknown file" <| Test.specify "should raise an error when reading an unknown file" <|
bytes = sample_xxx.read bytes = sample_xxx.read
bytes.should_equal [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33] bytes.should_fail_with Unsupported_File_Type
Test.specify "should be able to read a text file" <| Test.specify "should be able to read a text file" <|
content = sample_txt.read content = sample_txt.read
@ -44,17 +48,17 @@ spec =
bytes = File.read path File_Format.Bytes bytes = File.read path File_Format.Bytes
bytes.should_equal [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33] bytes.should_equal [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33]
Test.group "File_Format.Text" <| Test.group "File_Format.Plain_Text" <|
Test.specify "should be able to read a file as Text" <| Test.specify "should be able to read a file as Text" <|
text = sample_xxx.read File_Format.Text text = sample_xxx.read File_Format.Plain_Text
text.should_equal "Hello World!" text.should_equal "Hello World!"
Test.specify "should be able to read a file as Text with Encoding" <| Test.specify "should be able to read a file as Text with Encoding" <|
text = windows_log.read (File_Format.Text Encoding.windows_1252) text = windows_log.read (File_Format.Plain_Text Encoding.windows_1252)
text.should_equal "Hello World! $¢¤¥" text.should_equal "Hello World! $¢¤¥"
Test.specify "should raise a warning when invalid encoding in a Text file" <| Test.specify "should raise a warning when invalid encoding in a Text file" <|
action = windows_log.read (File_Format.Text Encoding.ascii) on_problems=_ action = windows_log.read (File_Format.Plain_Text Encoding.ascii) on_problems=_
tester result = result . should_equal 'Hello World! $\uFFFD\uFFFD\uFFFD' tester result = result . should_equal 'Hello World! $\uFFFD\uFFFD\uFFFD'
problems = [Encoding_Error "Encoding issues at 14, 15, 16."] problems = [Encoding_Error "Encoding issues at 14, 15, 16."]
Problems.test_problem_handling action problems tester Problems.test_problem_handling action problems tester

View File

@ -71,3 +71,8 @@ spec = Test.group "Locale" <|
Test.specify "should convert to Json" <| Test.specify "should convert to Json" <|
en_gb.to_json.should_equal <| en_gb.to_json.should_equal <|
Json.from_pairs [["type", "Locale"], ["language", "en"], ["country", "GB"]] Json.from_pairs [["type", "Locale"], ["language", "en"], ["country", "GB"]]
Test.specify "should allow equality comparisons" <|
Locale.uk . should_equal Locale.uk
Locale.uk . should_not_equal Locale.us
main = Test.Suite.run_main here.spec