mirror of
https://github.com/enso-org/enso.git
synced 2024-11-23 16:18:23 +03:00
Basic changes to File_Format (#3516)
Implements https://www.pivotaltracker.com/story/show/182308987
This commit is contained in:
parent
b1db359f19
commit
2af970fe52
@ -134,6 +134,8 @@
|
|||||||
- [Added rank data, correlation and covariance statistics for `Vector`][3484]
|
- [Added rank data, correlation and covariance statistics for `Vector`][3484]
|
||||||
- [Implemented `Table.order_by` for the SQLite backend.][3502]
|
- [Implemented `Table.order_by` for the SQLite backend.][3502]
|
||||||
- [Implemented `Table.order_by` for the PostgreSQL backend.][3514]
|
- [Implemented `Table.order_by` for the PostgreSQL backend.][3514]
|
||||||
|
- [Renamed `File_Format.Text` to `Plain_Text`, updated `File_Format.Delimited`
|
||||||
|
API and added builders for customizing less common settings.][3516]
|
||||||
|
|
||||||
[debug-shortcuts]:
|
[debug-shortcuts]:
|
||||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||||
@ -210,6 +212,7 @@
|
|||||||
[3484]: https://github.com/enso-org/enso/pull/3484
|
[3484]: https://github.com/enso-org/enso/pull/3484
|
||||||
[3502]: https://github.com/enso-org/enso/pull/3502
|
[3502]: https://github.com/enso-org/enso/pull/3502
|
||||||
[3514]: https://github.com/enso-org/enso/pull/3514
|
[3514]: https://github.com/enso-org/enso/pull/3514
|
||||||
|
[3516]: https://github.com/enso-org/enso/pull/3516
|
||||||
|
|
||||||
#### Enso Compiler
|
#### Enso Compiler
|
||||||
|
|
||||||
|
@ -414,6 +414,12 @@ type Locale
|
|||||||
if this.variant.is_nothing.not then b.append ["variant", this.variant]
|
if this.variant.is_nothing.not then b.append ["variant", this.variant]
|
||||||
Json.from_pairs b.to_vector
|
Json.from_pairs b.to_vector
|
||||||
|
|
||||||
|
## Compares two locales for equality.
|
||||||
|
== : Any -> Boolean
|
||||||
|
== other = case other of
|
||||||
|
Locale other_java_locale -> this.java_locale.equals other_java_locale
|
||||||
|
_ -> False
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
|
||||||
Convert a java locale to an Enso locale.
|
Convert a java locale to an Enso locale.
|
||||||
|
@ -235,7 +235,7 @@ make_order_descriptor internal_column sort_direction text_ordering =
|
|||||||
IR.Order_Descriptor internal_column.expression sort_direction nulls_order=nulls collation=Nothing
|
IR.Order_Descriptor internal_column.expression sort_direction nulls_order=nulls collation=Nothing
|
||||||
True ->
|
True ->
|
||||||
IR.Order_Descriptor internal_column.expression sort_direction nulls_order=nulls collation="ucs_basic"
|
IR.Order_Descriptor internal_column.expression sort_direction nulls_order=nulls collation="ucs_basic"
|
||||||
Case_Insensitive locale -> case Locale.default.java_locale.equals locale.java_locale of
|
Case_Insensitive locale -> case locale == Locale.default of
|
||||||
False ->
|
False ->
|
||||||
Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
|
Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
|
||||||
True ->
|
True ->
|
||||||
|
@ -58,7 +58,7 @@ type Sqlite_Dialect
|
|||||||
IR.Order_Descriptor internal_column.expression sort_direction collation=Nothing
|
IR.Order_Descriptor internal_column.expression sort_direction collation=Nothing
|
||||||
True ->
|
True ->
|
||||||
IR.Order_Descriptor internal_column.expression sort_direction collation="BINARY"
|
IR.Order_Descriptor internal_column.expression sort_direction collation="BINARY"
|
||||||
Case_Insensitive locale -> case Locale.default.java_locale.equals locale.java_locale of
|
Case_Insensitive locale -> case locale == Locale.default of
|
||||||
False ->
|
False ->
|
||||||
Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is not supported by the SQLite backend. You may need to materialize the Table to perform this operation.")
|
Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is not supported by the SQLite backend. You may need to materialize the Table to perform this operation.")
|
||||||
True ->
|
True ->
|
||||||
|
@ -154,3 +154,10 @@ type Leading_Zeros column:Text (datatype:(Integer|Number|Date|Time|Time_Of_Day|B
|
|||||||
a parse is attempted anyway. If mixed types are requested, the column is not
|
a parse is attempted anyway. If mixed types are requested, the column is not
|
||||||
parsed due to ambiguity.
|
parsed due to ambiguity.
|
||||||
type Duplicate_Type_Selector column:Text ambiguous:Boolean
|
type Duplicate_Type_Selector column:Text ambiguous:Boolean
|
||||||
|
|
||||||
|
## Indicates that the given file type is not supported by the `Auto` format.
|
||||||
|
type Unsupported_File_Type filename
|
||||||
|
|
||||||
|
Unsupported_File_Type.to_display_text : Text
|
||||||
|
Unsupported_File_Type.to_display_text =
|
||||||
|
"The "+this.filename+" has a type that is not supported by the Auto format."
|
||||||
|
@ -94,10 +94,6 @@ read_from_reader format java_reader on_problems max_columns=4096 =
|
|||||||
True -> DelimitedReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
|
True -> DelimitedReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
|
||||||
Infer -> DelimitedReader.HeaderBehavior.INFER
|
Infer -> DelimitedReader.HeaderBehavior.INFER
|
||||||
False -> DelimitedReader.HeaderBehavior.GENERATE_HEADERS
|
False -> DelimitedReader.HeaderBehavior.GENERATE_HEADERS
|
||||||
skip_rows = case format.skip_rows of
|
|
||||||
Nothing -> 0
|
|
||||||
Integer -> format.skip_rows
|
|
||||||
_ -> Error.throw (Illegal_Argument_Error "`skip_rows` should be Integer or Nothing.")
|
|
||||||
row_limit = case format.row_limit of
|
row_limit = case format.row_limit of
|
||||||
Nothing -> -1
|
Nothing -> -1
|
||||||
Integer -> format.row_limit
|
Integer -> format.row_limit
|
||||||
@ -127,7 +123,7 @@ read_from_reader format java_reader on_problems max_columns=4096 =
|
|||||||
cell_type_guesser = if format.headers != Infer then Nothing else
|
cell_type_guesser = if format.headers != Infer then Nothing else
|
||||||
formatter = format.value_formatter.if_nothing Data_Formatter
|
formatter = format.value_formatter.if_nothing Data_Formatter
|
||||||
TypeInferringParser.new formatter.get_specific_type_parsers.to_array IdentityParser.new
|
TypeInferringParser.new formatter.get_specific_type_parsers.to_array IdentityParser.new
|
||||||
reader = DelimitedReader.new java_reader format.delimiter format.quote format.quote_escape java_headers skip_rows row_limit max_columns value_parser cell_type_guesser format.keep_invalid_rows warnings_as_errors
|
reader = DelimitedReader.new java_reader format.delimiter format.quote format.quote_escape java_headers format.skip_rows row_limit max_columns value_parser cell_type_guesser format.keep_invalid_rows warnings_as_errors
|
||||||
result_with_problems = reader.read
|
result_with_problems = reader.read
|
||||||
parsing_problems = Vector.Vector (result_with_problems.problems) . map here.translate_reader_problem
|
parsing_problems = Vector.Vector (result_with_problems.problems) . map here.translate_reader_problem
|
||||||
on_problems.attach_problems_after (Table.Table result_with_problems.value) parsing_problems
|
on_problems.attach_problems_after (Table.Table result_with_problems.value) parsing_problems
|
||||||
|
@ -20,11 +20,11 @@ type Excel_Section
|
|||||||
|
|
||||||
## Gets the data from a specific sheet. Column names are the Excel column
|
## Gets the data from a specific sheet. Column names are the Excel column
|
||||||
names.
|
names.
|
||||||
type Sheet (sheet:(Integer|Text)) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing)
|
type Sheet (sheet:(Integer|Text)) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
|
||||||
|
|
||||||
## Gets a specific range (taking either a defined name or external style
|
## Gets a specific range (taking either a defined name or external style
|
||||||
address) from the workbook.
|
address) from the workbook.
|
||||||
type Range (address:(Text|Excel_Range)) (skip_rows:(Integer|Nothing)=Nothing) (row_limit:(Integer|Nothing)=Nothing)
|
type Range (address:(Text|Excel_Range)) (skip_rows:Integer=0) (row_limit:(Integer|Nothing)=Nothing)
|
||||||
|
|
||||||
type Excel_Range
|
type Excel_Range
|
||||||
## Specifies a range within an Excel Workbook.
|
## Specifies a range within an Excel Workbook.
|
||||||
|
@ -6,9 +6,11 @@ from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Prob
|
|||||||
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding
|
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding
|
||||||
import Standard.Base.Runtime.Ref
|
import Standard.Base.Runtime.Ref
|
||||||
import Standard.Table.Internal.Delimited_Reader
|
import Standard.Table.Internal.Delimited_Reader
|
||||||
|
from Standard.Table.Error as Table_Errors import Unsupported_File_Type
|
||||||
|
|
||||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||||
import Standard.Table.Io.Excel as Excel_Module
|
import Standard.Table.Io.Excel as Excel_Module
|
||||||
|
import Standard.Table.Io.Quote_Style
|
||||||
|
|
||||||
## This type needs to be here to allow for the usage of Standard.Table
|
## This type needs to be here to allow for the usage of Standard.Table
|
||||||
functions. Ideally, it would be an interface within Standard.Base and
|
functions. Ideally, it would be an interface within Standard.Base and
|
||||||
@ -24,9 +26,9 @@ type Auto
|
|||||||
materialise file =
|
materialise file =
|
||||||
extension = file.extension
|
extension = file.extension
|
||||||
|
|
||||||
output = Ref.new File_Format.Bytes
|
output = Ref.new Nothing
|
||||||
if ".txt".equals_ignore_case extension then output.put File_Format.Text
|
if ".txt".equals_ignore_case extension then output.put File_Format.Plain_Text
|
||||||
if ".log".equals_ignore_case extension then output.put File_Format.Text
|
if ".log".equals_ignore_case extension then output.put File_Format.Plain_Text
|
||||||
if ".csv".equals_ignore_case extension then output.put (File_Format.Delimited ',')
|
if ".csv".equals_ignore_case extension then output.put (File_Format.Delimited ',')
|
||||||
if ".tsv".equals_ignore_case extension then output.put (File_Format.Delimited '\t')
|
if ".tsv".equals_ignore_case extension then output.put (File_Format.Delimited '\t')
|
||||||
if ".xlsx".equals_ignore_case extension then output.put File_Format.Excel
|
if ".xlsx".equals_ignore_case extension then output.put File_Format.Excel
|
||||||
@ -34,7 +36,8 @@ type Auto
|
|||||||
if ".xls".equals_ignore_case extension then output.put File_Format.Excel
|
if ".xls".equals_ignore_case extension then output.put File_Format.Excel
|
||||||
if ".xlt".equals_ignore_case extension then output.put File_Format.Excel
|
if ".xlt".equals_ignore_case extension then output.put File_Format.Excel
|
||||||
|
|
||||||
output.get
|
output.get.if_nothing <|
|
||||||
|
Error.throw (Unsupported_File_Type file.name)
|
||||||
|
|
||||||
## Implements the `File.read` for this `File_Format`
|
## Implements the `File.read` for this `File_Format`
|
||||||
read : File -> Problem_Behavior -> Any
|
read : File -> Problem_Behavior -> Any
|
||||||
@ -52,8 +55,8 @@ type Bytes
|
|||||||
file.read_bytes
|
file.read_bytes
|
||||||
|
|
||||||
## Reads the file to a `Text` with specified encoding.
|
## Reads the file to a `Text` with specified encoding.
|
||||||
type Text
|
type Plain_Text
|
||||||
type Text (encoding:Encoding=Encoding.utf_8)
|
type Plain_Text (encoding:Encoding=Encoding.utf_8)
|
||||||
|
|
||||||
## Implements the `File.read` for this `File_Format`
|
## Implements the `File.read` for this `File_Format`
|
||||||
read : File -> Problem_Behavior -> Any
|
read : File -> Problem_Behavior -> Any
|
||||||
@ -72,6 +75,9 @@ type Delimited
|
|||||||
- delimiter: The delimiter character to split the file into columns. An
|
- delimiter: The delimiter character to split the file into columns. An
|
||||||
`Illegal_Argument_Error` error is returned if this is an empty string.
|
`Illegal_Argument_Error` error is returned if this is an empty string.
|
||||||
- encoding: The encoding to use when reading the file.
|
- encoding: The encoding to use when reading the file.
|
||||||
|
- skip_rows: The number of rows to skip from the top of the file.
|
||||||
|
- row_limit: The maximum number of rows to read from the file. This count
|
||||||
|
does not include the header row (if applicable).
|
||||||
- quote: The quote character denotes the start and end of a quoted value.
|
- quote: The quote character denotes the start and end of a quoted value.
|
||||||
No quote character is used if set to `Nothing`. Quoted items are not
|
No quote character is used if set to `Nothing`. Quoted items are not
|
||||||
split on the delimiter and can also contain newlines. Within a quoted
|
split on the delimiter and can also contain newlines. Within a quoted
|
||||||
@ -83,27 +89,58 @@ type Delimited
|
|||||||
then escaping quotes is done by double quotes: `"ab""cd"` will yield
|
then escaping quotes is done by double quotes: `"ab""cd"` will yield
|
||||||
the text `ab"cd"`. Another popular choice for `quote_escape` is the `\`
|
the text `ab"cd"`. Another popular choice for `quote_escape` is the `\`
|
||||||
character. Then `"ab\"cd"` will yield the same text.
|
character. Then `"ab\"cd"` will yield the same text.
|
||||||
|
- quote_style: The style of quoting to use when writing the file.
|
||||||
- headers: If set to `True`, the first row is used as column names. If
|
- headers: If set to `True`, the first row is used as column names. If
|
||||||
set to `False`, the column names are generated by adding increasing
|
set to `False`, the column names are generated by adding increasing
|
||||||
numeric suffixes to the base name `Column` (i.e. `Column_1`,
|
numeric suffixes to the base name `Column` (i.e. `Column_1`,
|
||||||
`Column_2` etc.). If set to `Infer`, the process tries to infer if
|
`Column_2` etc.). If set to `Infer`, the process tries to infer if
|
||||||
headers are present on the first row. If the column names are not
|
headers are present on the first row. If the column names are not
|
||||||
unique, numeric suffixes will be appended to disambiguate them.
|
unique, numeric suffixes will be appended to disambiguate them.
|
||||||
- skip_rows: The number of rows to skip from the top of the file.
|
|
||||||
- row_limit: The maximum number of rows to read from the file. This count
|
|
||||||
does not include the header row (if applicable).
|
|
||||||
- value_formatter: Formatter to parse text values into numbers, dates,
|
- value_formatter: Formatter to parse text values into numbers, dates,
|
||||||
times, etc. If `Nothing` values are left as Text.
|
times, etc. If `Nothing` values are left as Text.
|
||||||
- keep_invalid_rows: Specifies whether rows that contain less or more
|
- keep_invalid_rows: Specifies whether rows that contain less or more
|
||||||
columns than expected should be kept (setting the missing columns to
|
columns than expected should be kept (setting the missing columns to
|
||||||
`Nothing` or dropping the excess columns) or dropped.
|
`Nothing` or dropping the excess columns) or dropped.
|
||||||
type Delimited (delimiter:Text) (encoding:Encoding=Encoding.utf_8) (quote:Text|Nothing='"') (quote_escape:Text|Nothing='"') (headers:True|False|Infer=Infer) (skip_rows:Integer|Nothing=Nothing) (row_limit:Integer|Nothing=Nothing) (value_formatter:Data_Formatter|Nothing=Data_Formatter) (keep_invalid_rows:Boolean=True)
|
type Delimited (delimiter:Text) (encoding:Encoding=Encoding.utf_8) (skip_rows:Integer=0) (row_limit:Integer|Nothing=Nothing) (quote:Text|Nothing='"') (quote_escape:Text|Nothing='"') (quote_style:Quote_Style=Quote_Style.Necessary) (headers:True|False|Infer=Infer) (value_formatter:Data_Formatter|Nothing=Data_Formatter) (keep_invalid_rows:Boolean=True)
|
||||||
|
|
||||||
## Implements the `File.read` for this `File_Format`
|
## Implements the `File.read` for this `File_Format`
|
||||||
read : File -> Problem_Behavior -> Any
|
read : File -> Problem_Behavior -> Any
|
||||||
read file on_problems =
|
read file on_problems =
|
||||||
Delimited_Reader.read_file this file on_problems
|
Delimited_Reader.read_file this file on_problems
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Clone the instance with some properties overridden.
|
||||||
|
Note: This function is internal until such time as Atom cloning with modification is built into Enso.
|
||||||
|
clone : Text->Text->(Boolean|Infer)->Data_Formatter->Boolean->Delimited
|
||||||
|
clone (quote=this.quote) (quote_escape=this.quote_escape) (quote_style=this.quote_style) (headers=this.headers) (value_formatter=this.value_formatter) (keep_invalid_rows=this.keep_invalid_rows) =
|
||||||
|
Delimited this.delimiter this.encoding this.skip_rows this.row_limit quote quote_escape quote_style headers value_formatter keep_invalid_rows
|
||||||
|
|
||||||
|
## Create a clone of this with specified `quote` and `quote_escape`.
|
||||||
|
with_quotes : Text->Text->Quote_Style->Delimited
|
||||||
|
with_quotes quote quote_escape=quote quote_style=this.quote_style =
|
||||||
|
this.clone quote=quote quote_escape=quote_escape quote_style=quote_style
|
||||||
|
|
||||||
|
## Create a clone of this with first row treated as header.
|
||||||
|
with_headers : Delimited
|
||||||
|
with_headers = this.clone headers=True
|
||||||
|
|
||||||
|
## Create a clone of this where the first row is treated as data, not a
|
||||||
|
header.
|
||||||
|
without_headers : Delimited
|
||||||
|
without_headers = this.clone headers=False
|
||||||
|
|
||||||
|
## Create a clone of this with value parsing.
|
||||||
|
|
||||||
|
A custom `Data_Formatter` can be provided to customize parser options.
|
||||||
|
with_parsing : Data_Formatter -> Delimited
|
||||||
|
with_parsing (value_formatter=Data_Formatter) =
|
||||||
|
this.clone value_formatter=value_formatter
|
||||||
|
|
||||||
|
## Create a clone of this without value parsing.
|
||||||
|
without_parsing : Delimited
|
||||||
|
without_parsing =
|
||||||
|
this.clone value_formatter=Nothing
|
||||||
|
|
||||||
## A setting to infer the default behaviour of some option.
|
## A setting to infer the default behaviour of some option.
|
||||||
type Infer
|
type Infer
|
||||||
|
|
||||||
|
@ -0,0 +1,9 @@
|
|||||||
|
type Quote_Style
|
||||||
|
## Do not quote any values even if this will result in an invalid file.
|
||||||
|
type Never
|
||||||
|
|
||||||
|
## Quote text values which are empty or contain the delimiter or new lines.
|
||||||
|
type Necessary
|
||||||
|
|
||||||
|
## Quote all text values.
|
||||||
|
type Always
|
@ -352,7 +352,7 @@ public class Reader {
|
|||||||
public static Table readSheetByName(
|
public static Table readSheetByName(
|
||||||
InputStream stream,
|
InputStream stream,
|
||||||
String sheetName,
|
String sheetName,
|
||||||
Integer skip_rows,
|
int skip_rows,
|
||||||
Integer row_limit,
|
Integer row_limit,
|
||||||
boolean xls_format)
|
boolean xls_format)
|
||||||
throws IOException, IllegalArgumentException {
|
throws IOException, IllegalArgumentException {
|
||||||
@ -367,7 +367,7 @@ public class Reader {
|
|||||||
workbook,
|
workbook,
|
||||||
sheetIndex,
|
sheetIndex,
|
||||||
null,
|
null,
|
||||||
skip_rows == null ? 0 : skip_rows,
|
skip_rows,
|
||||||
row_limit == null ? Integer.MAX_VALUE : row_limit);
|
row_limit == null ? Integer.MAX_VALUE : row_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,7 +383,7 @@ public class Reader {
|
|||||||
* @throws IOException when the input stream cannot be read.
|
* @throws IOException when the input stream cannot be read.
|
||||||
*/
|
*/
|
||||||
public static Table readSheetByIndex(
|
public static Table readSheetByIndex(
|
||||||
InputStream stream, int index, Integer skip_rows, Integer row_limit, boolean xls_format)
|
InputStream stream, int index, int skip_rows, Integer row_limit, boolean xls_format)
|
||||||
throws IOException, IllegalArgumentException {
|
throws IOException, IllegalArgumentException {
|
||||||
Workbook workbook = getWorkbook(stream, xls_format);
|
Workbook workbook = getWorkbook(stream, xls_format);
|
||||||
|
|
||||||
@ -397,7 +397,7 @@ public class Reader {
|
|||||||
workbook,
|
workbook,
|
||||||
index - 1,
|
index - 1,
|
||||||
null,
|
null,
|
||||||
skip_rows == null ? 0 : skip_rows,
|
skip_rows,
|
||||||
row_limit == null ? Integer.MAX_VALUE : row_limit);
|
row_limit == null ? Integer.MAX_VALUE : row_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -415,7 +415,7 @@ public class Reader {
|
|||||||
public static Table readRangeByName(
|
public static Table readRangeByName(
|
||||||
InputStream stream,
|
InputStream stream,
|
||||||
String rangeNameOrAddress,
|
String rangeNameOrAddress,
|
||||||
Integer skip_rows,
|
int skip_rows,
|
||||||
Integer row_limit,
|
Integer row_limit,
|
||||||
boolean xls_format)
|
boolean xls_format)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
@ -438,7 +438,7 @@ public class Reader {
|
|||||||
* @throws IOException when the input stream cannot be read.
|
* @throws IOException when the input stream cannot be read.
|
||||||
*/
|
*/
|
||||||
public static Table readRange(
|
public static Table readRange(
|
||||||
InputStream stream, Range range, Integer skip_rows, Integer row_limit, boolean xls_format)
|
InputStream stream, Range range, int skip_rows, Integer row_limit, boolean xls_format)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return readRange(getWorkbook(stream, xls_format), range, skip_rows, row_limit);
|
return readRange(getWorkbook(stream, xls_format), range, skip_rows, row_limit);
|
||||||
}
|
}
|
||||||
@ -448,7 +448,7 @@ public class Reader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static Table readRange(
|
private static Table readRange(
|
||||||
Workbook workbook, Range range, Integer skip_rows, Integer row_limit) {
|
Workbook workbook, Range range, int skip_rows, Integer row_limit) {
|
||||||
int sheetIndex = getSheetIndex(workbook, range.getSheetName());
|
int sheetIndex = getSheetIndex(workbook, range.getSheetName());
|
||||||
if (sheetIndex == -1) {
|
if (sheetIndex == -1) {
|
||||||
throw new IllegalArgumentException("Unknown sheet '" + range.getSheetName() + "'.");
|
throw new IllegalArgumentException("Unknown sheet '" + range.getSheetName() + "'.");
|
||||||
@ -458,7 +458,7 @@ public class Reader {
|
|||||||
workbook,
|
workbook,
|
||||||
sheetIndex,
|
sheetIndex,
|
||||||
range,
|
range,
|
||||||
skip_rows == null ? 0 : skip_rows,
|
skip_rows,
|
||||||
row_limit == null ? Integer.MAX_VALUE : row_limit);
|
row_limit == null ? Integer.MAX_VALUE : row_limit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,8 +7,9 @@ import Standard.Table.Data.Column
|
|||||||
from Standard.Table.Error import all
|
from Standard.Table.Error import all
|
||||||
|
|
||||||
import Standard.Table.Io.File_Read
|
import Standard.Table.Io.File_Read
|
||||||
import Standard.Table.Io.File_Format
|
from Standard.Table.Io.File_Format import Delimited
|
||||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||||
|
import Standard.Table.Io.Quote_Style
|
||||||
|
|
||||||
import Standard.Test
|
import Standard.Test
|
||||||
import Standard.Test.Problems
|
import Standard.Test.Problems
|
||||||
@ -22,7 +23,7 @@ spec =
|
|||||||
c_2 = ["b", ['2', Nothing, '8', '11']]
|
c_2 = ["b", ['2', Nothing, '8', '11']]
|
||||||
c_3 = ["c", [Nothing, '6', '9', '12']]
|
c_3 = ["c", [Nothing, '6', '9', '12']]
|
||||||
expected_table = Table.new [c_1, c_2, c_3]
|
expected_table = Table.new [c_1, c_2, c_3]
|
||||||
simple_empty = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing)
|
simple_empty = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=True value_formatter=Nothing)
|
||||||
simple_empty.should_equal expected_table
|
simple_empty.should_equal expected_table
|
||||||
|
|
||||||
Test.specify "should load a simple table without headers" <|
|
Test.specify "should load a simple table without headers" <|
|
||||||
@ -30,11 +31,11 @@ spec =
|
|||||||
c_2 = ["Column_2", ['b', '2', Nothing, '8', '11']]
|
c_2 = ["Column_2", ['b', '2', Nothing, '8', '11']]
|
||||||
c_3 = ["Column_3", ['c', Nothing, '6', '9', '12']]
|
c_3 = ["Column_3", ['c', Nothing, '6', '9', '12']]
|
||||||
expected_table = Table.new [c_1, c_2, c_3]
|
expected_table = Table.new [c_1, c_2, c_3]
|
||||||
simple_empty = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False value_formatter=Nothing)
|
simple_empty = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False value_formatter=Nothing)
|
||||||
simple_empty.should_equal expected_table
|
simple_empty.should_equal expected_table
|
||||||
|
|
||||||
Test.specify "should work in presence of missing headers" <|
|
Test.specify "should work in presence of missing headers" <|
|
||||||
action on_problems = File.read (Enso_Project.data / "missing_header.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing) on_problems
|
action on_problems = File.read (Enso_Project.data / "missing_header.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
|
||||||
tester table =
|
tester table =
|
||||||
table.columns.map .name . should_equal ["a", "Column_1", "c", "Column_2", "d"]
|
table.columns.map .name . should_equal ["a", "Column_1", "c", "Column_2", "d"]
|
||||||
table.at "a" . to_vector . should_equal ["1"]
|
table.at "a" . to_vector . should_equal ["1"]
|
||||||
@ -46,61 +47,61 @@ spec =
|
|||||||
Problems.test_problem_handling action problems tester
|
Problems.test_problem_handling action problems tester
|
||||||
|
|
||||||
Test.specify "should infer headers based on the first two rows" <|
|
Test.specify "should infer headers based on the first two rows" <|
|
||||||
t1 = File.read (Enso_Project.data / "data_small.csv") (File_Format.Delimited "," headers=File_Format.Infer)
|
t1 = File.read (Enso_Project.data / "data_small.csv") (Delimited "," headers=File_Format.Infer)
|
||||||
t1.columns.map .name . should_equal ["Code", "Index", "Flag", "Value", "ValueWithNothing", "TextWithNothing", "Hexadecimal", "Leading0s", "QuotedNumbers", "Mixed Types"]
|
t1.columns.map .name . should_equal ["Code", "Index", "Flag", "Value", "ValueWithNothing", "TextWithNothing", "Hexadecimal", "Leading0s", "QuotedNumbers", "Mixed Types"]
|
||||||
|
|
||||||
t2 = File.read (Enso_Project.data / "all_text.csv") (File_Format.Delimited "," headers=File_Format.Infer)
|
t2 = File.read (Enso_Project.data / "all_text.csv") (Delimited "," headers=File_Format.Infer)
|
||||||
t2.columns.map .name . should_equal ["Column_1", "Column_2"]
|
t2.columns.map .name . should_equal ["Column_1", "Column_2"]
|
||||||
t2.at "Column_1" . to_vector . should_equal ["a", "c", "e", "g"]
|
t2.at "Column_1" . to_vector . should_equal ["a", "c", "e", "g"]
|
||||||
t2.at "Column_2" . to_vector . should_equal ["b", "d", "f", "h"]
|
t2.at "Column_2" . to_vector . should_equal ["b", "d", "f", "h"]
|
||||||
|
|
||||||
t3 = File.read (Enso_Project.data / "two_rows1.csv") (File_Format.Delimited "," headers=File_Format.Infer)
|
t3 = File.read (Enso_Project.data / "two_rows1.csv") (Delimited "," headers=File_Format.Infer)
|
||||||
t3.columns.map .name . should_equal ["a", "b", "c"]
|
t3.columns.map .name . should_equal ["a", "b", "c"]
|
||||||
t3.at "a" . to_vector . should_equal ["x"]
|
t3.at "a" . to_vector . should_equal ["x"]
|
||||||
t3.at "b" . to_vector . should_equal [Nothing]
|
t3.at "b" . to_vector . should_equal [Nothing]
|
||||||
t3.at "c" . to_vector . should_equal [Nothing]
|
t3.at "c" . to_vector . should_equal [Nothing]
|
||||||
|
|
||||||
t4 = File.read (Enso_Project.data / "two_rows2.csv") (File_Format.Delimited "," headers=File_Format.Infer)
|
t4 = File.read (Enso_Project.data / "two_rows2.csv") (Delimited "," headers=File_Format.Infer)
|
||||||
t4.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
|
t4.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
|
||||||
t4.at "Column_1" . to_vector . should_equal ["a", "d"]
|
t4.at "Column_1" . to_vector . should_equal ["a", "d"]
|
||||||
t4.at "Column_2" . to_vector . should_equal ["b", "e"]
|
t4.at "Column_2" . to_vector . should_equal ["b", "e"]
|
||||||
t4.at "Column_3" . to_vector . should_equal ["c", "f"]
|
t4.at "Column_3" . to_vector . should_equal ["c", "f"]
|
||||||
|
|
||||||
t5 = File.read (Enso_Project.data / "numbers_in_header.csv") (File_Format.Delimited "," headers=File_Format.Infer)
|
t5 = File.read (Enso_Project.data / "numbers_in_header.csv") (Delimited "," headers=File_Format.Infer)
|
||||||
t5.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
|
t5.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
|
||||||
t5.at "Column_1" . to_vector . should_equal ["a", "1"]
|
t5.at "Column_1" . to_vector . should_equal ["a", "1"]
|
||||||
t5.at "Column_2" . to_vector . should_equal ["b", "2"]
|
t5.at "Column_2" . to_vector . should_equal ["b", "2"]
|
||||||
t5.at "Column_3" . to_vector . should_equal [0, 3]
|
t5.at "Column_3" . to_vector . should_equal [0, 3]
|
||||||
|
|
||||||
t6 = File.read (Enso_Project.data / "quoted_numbers_in_header.csv") (File_Format.Delimited "," headers=File_Format.Infer)
|
t6 = File.read (Enso_Project.data / "quoted_numbers_in_header.csv") (Delimited "," headers=File_Format.Infer)
|
||||||
t6.columns.map .name . should_equal ["1", "x"]
|
t6.columns.map .name . should_equal ["1", "x"]
|
||||||
t6.at "1" . to_vector . should_equal ["y"]
|
t6.at "1" . to_vector . should_equal ["y"]
|
||||||
t6.at "x" . to_vector . should_equal [2]
|
t6.at "x" . to_vector . should_equal [2]
|
||||||
|
|
||||||
Test.specify "should not use the first row as headers if it is the only row, unless specifically asked to" <|
|
Test.specify "should not use the first row as headers if it is the only row, unless specifically asked to" <|
|
||||||
t1 = File.read (Enso_Project.data / "one_row.csv") (File_Format.Delimited "," headers=File_Format.Infer)
|
t1 = File.read (Enso_Project.data / "one_row.csv") (Delimited "," headers=File_Format.Infer)
|
||||||
t1.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
|
t1.columns.map .name . should_equal ["Column_1", "Column_2", "Column_3"]
|
||||||
t1.at "Column_1" . to_vector . should_equal ["x"]
|
t1.at "Column_1" . to_vector . should_equal ["x"]
|
||||||
t1.at "Column_2" . to_vector . should_equal ["y"]
|
t1.at "Column_2" . to_vector . should_equal ["y"]
|
||||||
t1.at "Column_3" . to_vector . should_equal ["z"]
|
t1.at "Column_3" . to_vector . should_equal ["z"]
|
||||||
|
|
||||||
t2 = File.read (Enso_Project.data / "one_row.csv") (File_Format.Delimited "," headers=True)
|
t2 = File.read (Enso_Project.data / "one_row.csv") (Delimited "," headers=True)
|
||||||
t2.columns.map .name . should_equal ["x", "y", "z"]
|
t2.columns.map .name . should_equal ["x", "y", "z"]
|
||||||
t2.row_count . should_equal 0
|
t2.row_count . should_equal 0
|
||||||
t2.at "x" . to_vector . should_equal []
|
t2.at "x" . to_vector . should_equal []
|
||||||
|
|
||||||
Test.specify "should be able to load even an empty file" <|
|
Test.specify "should be able to load even an empty file" <|
|
||||||
table = File.read (Enso_Project.data / "empty.txt") (File_Format.Delimited "," headers=True value_formatter=Nothing)
|
table = File.read (Enso_Project.data / "empty.txt") (Delimited "," headers=True value_formatter=Nothing)
|
||||||
table.columns.map .name . should_equal []
|
table.columns.map .name . should_equal []
|
||||||
table.row_count . should_equal 0
|
table.row_count . should_equal 0
|
||||||
|
|
||||||
Test.specify "should correctly handle file opening issues" <|
|
Test.specify "should correctly handle file opening issues" <|
|
||||||
nonexistent_file = Enso_Project.data / "a_filename_that_does_not_exist.foobar"
|
nonexistent_file = Enso_Project.data / "a_filename_that_does_not_exist.foobar"
|
||||||
r1 = File.read nonexistent_file (File_Format.Delimited "," headers=True value_formatter=Nothing)
|
r1 = File.read nonexistent_file (Delimited "," headers=True value_formatter=Nothing)
|
||||||
r1.should_fail_with File.File_Not_Found
|
r1.should_fail_with File.File_Not_Found
|
||||||
|
|
||||||
directory = Enso_Project.data
|
directory = Enso_Project.data
|
||||||
r2 = File.read directory (File_Format.Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error
|
r2 = File.read directory (Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error
|
||||||
r2.should_fail_with File.Io_Error
|
r2.should_fail_with File.Io_Error
|
||||||
|
|
||||||
Test.specify "should work with all kinds of line endings" <|
|
Test.specify "should work with all kinds of line endings" <|
|
||||||
@ -111,7 +112,7 @@ spec =
|
|||||||
(path name).write_text text Encoding.utf_8
|
(path name).write_text text Encoding.utf_8
|
||||||
|
|
||||||
test_file name =
|
test_file name =
|
||||||
table = File.read (path name) (File_Format.Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error
|
table = File.read (path name) (Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error
|
||||||
table.columns.map .name . should_equal ['a', 'b', 'c']
|
table.columns.map .name . should_equal ['a', 'b', 'c']
|
||||||
table.at 'a' . to_vector . should_equal ['d', '1']
|
table.at 'a' . to_vector . should_equal ['d', '1']
|
||||||
table.at 'b' . to_vector . should_equal ['e', '2']
|
table.at 'b' . to_vector . should_equal ['e', '2']
|
||||||
@ -126,17 +127,17 @@ spec =
|
|||||||
|
|
||||||
# Currently mixed line endings are not supported.
|
# Currently mixed line endings are not supported.
|
||||||
(path 'mixed.csv').write_text 'a,b,c\nd,e,f\r1,2,3'
|
(path 'mixed.csv').write_text 'a,b,c\nd,e,f\r1,2,3'
|
||||||
File.read (path 'mixed.csv') (File_Format.Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error . should_fail_with Invalid_Row
|
File.read (path 'mixed.csv') (Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error . should_fail_with Invalid_Row
|
||||||
|
|
||||||
Test.specify "should work with Windows-1252 encoding" <|
|
Test.specify "should work with Windows-1252 encoding" <|
|
||||||
table = File.read (Enso_Project.data / "windows.csv") (File_Format.Delimited "," headers=True encoding=Encoding.windows_1252) Problem_Behavior.Report_Error
|
table = File.read (Enso_Project.data / "windows.csv") (Delimited "," headers=True encoding=Encoding.windows_1252) Problem_Behavior.Report_Error
|
||||||
table.columns.map .name . should_equal ['a', 'b', 'c']
|
table.columns.map .name . should_equal ['a', 'b', 'c']
|
||||||
table.at 'a' . to_vector . should_equal ['$¢']
|
table.at 'a' . to_vector . should_equal ['$¢']
|
||||||
table.at 'b' . to_vector . should_equal ['¤']
|
table.at 'b' . to_vector . should_equal ['¤']
|
||||||
table.at 'c' . to_vector . should_equal ['¥']
|
table.at 'c' . to_vector . should_equal ['¥']
|
||||||
|
|
||||||
Test.specify "should work with UTF-16 encoding" <|
|
Test.specify "should work with UTF-16 encoding" <|
|
||||||
table = File.read (Enso_Project.data / "utf16.csv") (File_Format.Delimited "," headers=True encoding=Encoding.utf_16_be) Problem_Behavior.Report_Error
|
table = File.read (Enso_Project.data / "utf16.csv") (Delimited "," headers=True encoding=Encoding.utf_16_be) Problem_Behavior.Report_Error
|
||||||
table.columns.map .name . should_equal ['ą', '🚀b', 'ć😎']
|
table.columns.map .name . should_equal ['ą', '🚀b', 'ć😎']
|
||||||
table.at 'ą' . to_vector . should_equal ['ą']
|
table.at 'ą' . to_vector . should_equal ['ą']
|
||||||
table.at '🚀b' . to_vector . should_equal ['✨🚀🚧😍😃😍😎😙😉☺']
|
table.at '🚀b' . to_vector . should_equal ['✨🚀🚧😍😃😍😎😙😉☺']
|
||||||
@ -147,7 +148,7 @@ spec =
|
|||||||
utf8_bytes = [97, 44, 98, 44, 99, 10, -60, -123, 44, -17, -65, -65, 44, -61, 40, -61, 40, 10]
|
utf8_bytes = [97, 44, 98, 44, 99, 10, -60, -123, 44, -17, -65, -65, 44, -61, 40, -61, 40, 10]
|
||||||
utf8_file.write_bytes utf8_bytes
|
utf8_file.write_bytes utf8_bytes
|
||||||
action_1 on_problems =
|
action_1 on_problems =
|
||||||
utf8_file.read (File_Format.Delimited "," headers=True) on_problems
|
utf8_file.read (Delimited "," headers=True) on_problems
|
||||||
tester_1 table =
|
tester_1 table =
|
||||||
table.columns.map .name . should_equal ['a', 'b', 'c']
|
table.columns.map .name . should_equal ['a', 'b', 'c']
|
||||||
table.at 'a' . to_vector . should_equal ['ą']
|
table.at 'a' . to_vector . should_equal ['ą']
|
||||||
@ -157,7 +158,7 @@ spec =
|
|||||||
Problems.test_problem_handling action_1 problems_1 tester_1
|
Problems.test_problem_handling action_1 problems_1 tester_1
|
||||||
|
|
||||||
action_2 on_problems =
|
action_2 on_problems =
|
||||||
(Enso_Project.data / "utf16_invalid.csv").read (File_Format.Delimited "," headers=True encoding=Encoding.utf_16_be) on_problems
|
(Enso_Project.data / "utf16_invalid.csv").read (Delimited "," headers=True encoding=Encoding.utf_16_be) on_problems
|
||||||
tester_2 table =
|
tester_2 table =
|
||||||
table.columns.map .name . should_equal ['a', 'b', 'c']
|
table.columns.map .name . should_equal ['a', 'b', 'c']
|
||||||
# This column does not raise a problem - the '\uFFFD' is simply present in the input file.
|
# This column does not raise a problem - the '\uFFFD' is simply present in the input file.
|
||||||
@ -170,7 +171,7 @@ spec =
|
|||||||
|
|
||||||
|
|
||||||
Test.specify "should handle duplicated columns" <|
|
Test.specify "should handle duplicated columns" <|
|
||||||
action on_problems = File.read (Enso_Project.data / "duplicated_columns.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing) on_problems
|
action on_problems = File.read (Enso_Project.data / "duplicated_columns.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
|
||||||
tester table =
|
tester table =
|
||||||
table.columns.map .name . should_equal ['a', 'b', 'c', 'a_1']
|
table.columns.map .name . should_equal ['a', 'b', 'c', 'a_1']
|
||||||
table.at 'a' . to_vector . should_equal ['1']
|
table.at 'a' . to_vector . should_equal ['1']
|
||||||
@ -179,27 +180,27 @@ spec =
|
|||||||
Problems.test_problem_handling action problems tester
|
Problems.test_problem_handling action problems tester
|
||||||
|
|
||||||
Test.specify "should handle quotes" <|
|
Test.specify "should handle quotes" <|
|
||||||
t1 = File.read (Enso_Project.data / "double_quoted.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing)
|
t1 = File.read (Enso_Project.data / "double_quoted.csv") (Delimited "," headers=True value_formatter=Nothing)
|
||||||
t1.at 'a' . to_vector . should_equal ['a, x', '"a']
|
t1.at 'a' . to_vector . should_equal ['a, x', '"a']
|
||||||
t1.at 'c' . to_vector . should_equal ['3', '"']
|
t1.at 'c' . to_vector . should_equal ['3', '"']
|
||||||
|
|
||||||
t2 = File.read (Enso_Project.data / "escape_quoted.csv") (File_Format.Delimited "," headers=True quote_escape="\" value_formatter=Nothing)
|
t2 = File.read (Enso_Project.data / "escape_quoted.csv") (Delimited "," headers=True quote_escape="\" value_formatter=Nothing)
|
||||||
t2.at 'a' . to_vector . should_equal ['a"b', 'a\\\"z']
|
t2.at 'a' . to_vector . should_equal ['a"b', 'a\\\"z']
|
||||||
|
|
||||||
t3 = File.read (Enso_Project.data / "no_quoting.csv") (File_Format.Delimited "," quote=Nothing headers=True value_formatter=Nothing)
|
t3 = File.read (Enso_Project.data / "no_quoting.csv") (Delimited "," quote=Nothing headers=True value_formatter=Nothing)
|
||||||
t3.at 'a' . to_vector . should_equal ['"y']
|
t3.at 'a' . to_vector . should_equal ['"y']
|
||||||
t3.at 'b' . to_vector . should_equal ['z"']
|
t3.at 'b' . to_vector . should_equal ['z"']
|
||||||
t3.at 'c' . to_vector . should_equal ['a']
|
t3.at 'c' . to_vector . should_equal ['a']
|
||||||
|
|
||||||
Test.specify "should support rows spanning multiple lines if quoted" <|
|
Test.specify "should support rows spanning multiple lines if quoted" <|
|
||||||
t1 = File.read (Enso_Project.data / "multiline_quoted.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing)
|
t1 = File.read (Enso_Project.data / "multiline_quoted.csv") (Delimited "," headers=True value_formatter=Nothing)
|
||||||
t1.at 'a' . to_vector . should_equal ['1', '4']
|
t1.at 'a' . to_vector . should_equal ['1', '4']
|
||||||
t1.at 'b' . to_vector . should_equal ['start\n\ncontinue', '5']
|
t1.at 'b' . to_vector . should_equal ['start\n\ncontinue', '5']
|
||||||
t1.at 'c' . to_vector . should_equal ['3', '6']
|
t1.at 'c' . to_vector . should_equal ['3', '6']
|
||||||
|
|
||||||
Test.specify "should behave correctly in presence of a mismatched quote" <|
|
Test.specify "should behave correctly in presence of a mismatched quote" <|
|
||||||
action_1 on_problems =
|
action_1 on_problems =
|
||||||
File.read (Enso_Project.data / "mismatched_quote.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing) on_problems
|
File.read (Enso_Project.data / "mismatched_quote.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
|
||||||
|
|
||||||
tester_1 table =
|
tester_1 table =
|
||||||
table.columns.map .name . should_equal ['a', 'b', 'c']
|
table.columns.map .name . should_equal ['a', 'b', 'c']
|
||||||
@ -210,7 +211,7 @@ spec =
|
|||||||
Problems.test_problem_handling action_1 problems_1 tester_1
|
Problems.test_problem_handling action_1 problems_1 tester_1
|
||||||
|
|
||||||
action_2 on_problems =
|
action_2 on_problems =
|
||||||
File.read (Enso_Project.data / "mismatched_quote2.csv") (File_Format.Delimited "," headers=True value_formatter=Nothing) on_problems
|
File.read (Enso_Project.data / "mismatched_quote2.csv") (Delimited "," headers=True value_formatter=Nothing) on_problems
|
||||||
|
|
||||||
tester_2 table =
|
tester_2 table =
|
||||||
table.columns.map .name . should_equal ['a', 'b', 'c']
|
table.columns.map .name . should_equal ['a', 'b', 'c']
|
||||||
@ -222,7 +223,7 @@ spec =
|
|||||||
|
|
||||||
Test.specify "should handle too long and too short rows" <|
|
Test.specify "should handle too long and too short rows" <|
|
||||||
action keep_invalid_rows on_problems =
|
action keep_invalid_rows on_problems =
|
||||||
File.read (Enso_Project.data / "varying_rows.csv") (File_Format.Delimited "," headers=True keep_invalid_rows=keep_invalid_rows value_formatter=Nothing) on_problems
|
File.read (Enso_Project.data / "varying_rows.csv") (Delimited "," headers=True keep_invalid_rows=keep_invalid_rows value_formatter=Nothing) on_problems
|
||||||
|
|
||||||
tester_kept table =
|
tester_kept table =
|
||||||
table.columns.map .name . should_equal ['a', 'b', 'c']
|
table.columns.map .name . should_equal ['a', 'b', 'c']
|
||||||
@ -242,7 +243,7 @@ spec =
|
|||||||
|
|
||||||
Test.specify "should aggregate invalid rows over some limit" <|
|
Test.specify "should aggregate invalid rows over some limit" <|
|
||||||
action on_problems =
|
action on_problems =
|
||||||
File.read (Enso_Project.data / "many_invalid_rows.csv") (File_Format.Delimited "," headers=True keep_invalid_rows=False value_formatter=Nothing) on_problems
|
File.read (Enso_Project.data / "many_invalid_rows.csv") (Delimited "," headers=True keep_invalid_rows=False value_formatter=Nothing) on_problems
|
||||||
|
|
||||||
tester table =
|
tester table =
|
||||||
table.columns.map .name . should_equal ['a', 'b', 'c']
|
table.columns.map .name . should_equal ['a', 'b', 'c']
|
||||||
@ -253,45 +254,45 @@ spec =
|
|||||||
Problems.test_problem_handling action problems tester
|
Problems.test_problem_handling action problems tester
|
||||||
|
|
||||||
Test.specify "should allow to skip rows" <|
|
Test.specify "should allow to skip rows" <|
|
||||||
t1 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False skip_rows=3 value_formatter=Nothing)
|
t1 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False skip_rows=3 value_formatter=Nothing)
|
||||||
t1.at "Column_1" . to_vector . should_equal ['7', '10']
|
t1.at "Column_1" . to_vector . should_equal ['7', '10']
|
||||||
|
|
||||||
t2 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True skip_rows=3 value_formatter=Nothing)
|
t2 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=True skip_rows=3 value_formatter=Nothing)
|
||||||
t2.columns.map .name . should_equal ['7', '8', '9']
|
t2.columns.map .name . should_equal ['7', '8', '9']
|
||||||
t2.at "7" . to_vector . should_equal ['10']
|
t2.at "7" . to_vector . should_equal ['10']
|
||||||
|
|
||||||
Test.specify "should allow to set a limit of rows to read" <|
|
Test.specify "should allow to set a limit of rows to read" <|
|
||||||
t1 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False row_limit=2 value_formatter=Nothing)
|
t1 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False row_limit=2 value_formatter=Nothing)
|
||||||
t1.at "Column_1" . to_vector . should_equal ['a', '1']
|
t1.at "Column_1" . to_vector . should_equal ['a', '1']
|
||||||
|
|
||||||
t2 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True row_limit=2 value_formatter=Nothing)
|
t2 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=True row_limit=2 value_formatter=Nothing)
|
||||||
t2.at "a" . to_vector . should_equal ['1', '4']
|
t2.at "a" . to_vector . should_equal ['1', '4']
|
||||||
|
|
||||||
t3 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False skip_rows=3 row_limit=1 value_formatter=Nothing)
|
t3 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False skip_rows=3 row_limit=1 value_formatter=Nothing)
|
||||||
t3.at "Column_1" . to_vector . should_equal ['7']
|
t3.at "Column_1" . to_vector . should_equal ['7']
|
||||||
|
|
||||||
t4 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False row_limit=0 value_formatter=Nothing)
|
t4 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False row_limit=0 value_formatter=Nothing)
|
||||||
t4.columns.map .name . should_equal ['Column_1', 'Column_2', 'Column_3']
|
t4.columns.map .name . should_equal ['Column_1', 'Column_2', 'Column_3']
|
||||||
t4.row_count . should_equal 0
|
t4.row_count . should_equal 0
|
||||||
|
|
||||||
t5 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=True row_limit=0 value_formatter=Nothing)
|
t5 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=True row_limit=0 value_formatter=Nothing)
|
||||||
t5.columns.map .name . should_equal ['a', 'b', 'c']
|
t5.columns.map .name . should_equal ['a', 'b', 'c']
|
||||||
t5.at 'a' . to_vector . should_equal []
|
t5.at 'a' . to_vector . should_equal []
|
||||||
t5.row_count . should_equal 0
|
t5.row_count . should_equal 0
|
||||||
|
|
||||||
t6 = File.read (Enso_Project.data / "simple_empty.csv") (File_Format.Delimited "," headers=False skip_rows=3 row_limit=1000 value_formatter=Nothing)
|
t6 = File.read (Enso_Project.data / "simple_empty.csv") (Delimited "," headers=False skip_rows=3 row_limit=1000 value_formatter=Nothing)
|
||||||
t6.at "Column_1" . to_vector . should_equal ['7', '10']
|
t6.at "Column_1" . to_vector . should_equal ['7', '10']
|
||||||
|
|
||||||
Test.specify "should check arguments" <|
|
Test.specify "should check arguments" <|
|
||||||
path = (Enso_Project.data / "simple_empty.csv")
|
path = (Enso_Project.data / "simple_empty.csv")
|
||||||
pb = Problem_Behavior.Report_Error
|
pb = Problem_Behavior.Report_Error
|
||||||
path.read (File_Format.Delimited "," headers=False quote='abc') pb . should_fail_with Illegal_Argument_Error
|
path.read (Delimited "," headers=False quote='abc') pb . should_fail_with Illegal_Argument_Error
|
||||||
path.read (File_Format.Delimited "," headers=False quote='🚧') pb . should_fail_with Illegal_Argument_Error
|
path.read (Delimited "," headers=False quote='🚧') pb . should_fail_with Illegal_Argument_Error
|
||||||
path.read (File_Format.Delimited "," headers=False quote_escape='//') pb . should_fail_with Illegal_Argument_Error
|
path.read (Delimited "," headers=False quote_escape='//') pb . should_fail_with Illegal_Argument_Error
|
||||||
path.read (File_Format.Delimited 'a\u{301}' headers=False) pb . should_fail_with Illegal_Argument_Error
|
path.read (Delimited 'a\u{301}' headers=False) pb . should_fail_with Illegal_Argument_Error
|
||||||
|
|
||||||
Test.specify "should correctly guess column types" <|
|
Test.specify "should correctly guess column types" <|
|
||||||
t = (Enso_Project.data / "data_small.csv") . read (File_Format.Delimited "," headers=True)
|
t = (Enso_Project.data / "data_small.csv") . read (Delimited "," headers=True)
|
||||||
t.at "Code" . to_vector . should_equal ["gxl", "wca", "nfw", "der"]
|
t.at "Code" . to_vector . should_equal ["gxl", "wca", "nfw", "der"]
|
||||||
t.at "Index" . to_vector . should_equal [7, 0, 1, 7]
|
t.at "Index" . to_vector . should_equal [7, 0, 1, 7]
|
||||||
t.at "Flag" . to_vector . should_equal [True, False, True, True]
|
t.at "Flag" . to_vector . should_equal [True, False, True, True]
|
||||||
@ -303,7 +304,7 @@ spec =
|
|||||||
t.at "QuotedNumbers" . to_vector . should_equal ["1", "2", Nothing, "34"]
|
t.at "QuotedNumbers" . to_vector . should_equal ["1", "2", Nothing, "34"]
|
||||||
t.at "Mixed Types" . to_vector . should_equal ["33", Nothing, "45", "True"]
|
t.at "Mixed Types" . to_vector . should_equal ["33", Nothing, "45", "True"]
|
||||||
|
|
||||||
t2 = (Enso_Project.data / "data_small.csv") . read (File_Format.Delimited "," headers=True value_formatter=(Data_Formatter allow_leading_zeros=True))
|
t2 = (Enso_Project.data / "data_small.csv") . read (Delimited "," headers=True value_formatter=(Data_Formatter allow_leading_zeros=True))
|
||||||
t2.at "Leading0s" . to_vector . should_equal [1, 2, 123, Nothing]
|
t2.at "Leading0s" . to_vector . should_equal [1, 2, 123, Nothing]
|
||||||
|
|
||||||
Test.specify "should be able to detect types automatically" <|
|
Test.specify "should be able to detect types automatically" <|
|
||||||
@ -322,7 +323,7 @@ spec =
|
|||||||
a,b,c
|
a,b,c
|
||||||
1,2,3
|
1,2,3
|
||||||
4,5,6
|
4,5,6
|
||||||
t1 = Table.Table.from text1 (format = File_Format.Delimited ",")
|
t1 = Table.Table.from text1 (format = Delimited ",")
|
||||||
t1.columns.map .name . should_equal ["a", "b", "c"]
|
t1.columns.map .name . should_equal ["a", "b", "c"]
|
||||||
t1.at "a" . to_vector . should_equal [1, 4]
|
t1.at "a" . to_vector . should_equal [1, 4]
|
||||||
t1.at "b" . to_vector . should_equal [2, 5]
|
t1.at "b" . to_vector . should_equal [2, 5]
|
||||||
@ -334,4 +335,24 @@ spec =
|
|||||||
t2.at "a" . to_vector . should_equal [1, 3]
|
t2.at "a" . to_vector . should_equal [1, 3]
|
||||||
t2.at "b" . to_vector . should_equal [2, 4]
|
t2.at "b" . to_vector . should_equal [2, 4]
|
||||||
|
|
||||||
|
Test.specify "should allow to build the Delimited configuration using builders" <|
|
||||||
|
Delimited "," . clone . should_equal (Delimited ",")
|
||||||
|
Delimited "," encoding=Encoding.ascii skip_rows=123 row_limit=100 headers=False value_formatter=Nothing . clone . should_equal (Delimited "," headers=False value_formatter=Nothing skip_rows=123 row_limit=100 encoding=Encoding.ascii)
|
||||||
|
Delimited "," . clone quote="'" quote_escape='\\' quote_style=Quote_Style.Always headers=False value_formatter=Nothing . should_equal (Delimited "," headers=False value_formatter=Nothing quote="'" quote_escape='\\' quote_style=Quote_Style.Always)
|
||||||
|
|
||||||
|
Delimited '\t' . with_quotes "|" . should_equal (Delimited '\t' quote='|' quote_escape='|')
|
||||||
|
Delimited '\t' quote_style=Quote_Style.Always . with_quotes "-" '\\' . should_equal (Delimited '\t' quote='-' quote_escape='\\' quote_style=Quote_Style.Always)
|
||||||
|
Delimited '\t' quote_style=Quote_Style.Always . with_quotes "-" '\\' Quote_Style.Never . should_equal (Delimited '\t' quote='-' quote_escape='\\' quote_style=Quote_Style.Never)
|
||||||
|
|
||||||
|
Delimited ',' . with_headers . should_equal (Delimited ',' headers=True)
|
||||||
|
Delimited ',' . without_headers . should_equal (Delimited ',' headers=False)
|
||||||
|
Delimited "," skip_rows=123 headers=False value_formatter=Nothing quote_style=Quote_Style.Never . with_headers . should_equal (Delimited "," skip_rows=123 value_formatter=Nothing quote_style=Quote_Style.Never headers=True)
|
||||||
|
Delimited "," skip_rows=123 headers=True value_formatter=Nothing quote_style=Quote_Style.Never . without_headers . should_equal (Delimited "," skip_rows=123 value_formatter=Nothing quote_style=Quote_Style.Never headers=False)
|
||||||
|
|
||||||
|
Delimited ',' . with_parsing . should_equal (Delimited ',')
|
||||||
|
Delimited ',' . without_parsing . should_equal (Delimited ',' value_formatter=Nothing)
|
||||||
|
custom_formatter = Data_Formatter true_values=["A", "B", "C"] false_values=["D", "E", "F"]
|
||||||
|
Delimited ',' . with_parsing custom_formatter . should_equal (Delimited ',' value_formatter=custom_formatter)
|
||||||
|
Delimited ',' row_limit=456 . without_parsing . should_equal (Delimited ',' value_formatter=Nothing row_limit=456)
|
||||||
|
|
||||||
main = Test.Suite.run_main here.spec
|
main = Test.Suite.run_main here.spec
|
||||||
|
@ -2,6 +2,7 @@ from Standard.Base import all
|
|||||||
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
|
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
|
||||||
import Standard.Table.Io.File_Read
|
import Standard.Table.Io.File_Read
|
||||||
import Standard.Table.Io.File_Format
|
import Standard.Table.Io.File_Format
|
||||||
|
from Standard.Table.Error import Unsupported_File_Type
|
||||||
|
|
||||||
import Standard.Test
|
import Standard.Test
|
||||||
import Standard.Test.Problems
|
import Standard.Test.Problems
|
||||||
@ -13,18 +14,21 @@ spec =
|
|||||||
|
|
||||||
Test.group "File_Format.Auto materialise" <|
|
Test.group "File_Format.Auto materialise" <|
|
||||||
Test.specify "should be Bytes for unknown file" <|
|
Test.specify "should be Bytes for unknown file" <|
|
||||||
File_Format.Auto . materialise sample_xxx . should_be_a File_Format.Bytes
|
File_Format.Auto . materialise sample_xxx . should_fail_with Unsupported_File_Type
|
||||||
|
|
||||||
Test.specify "should be Text for text file" <|
|
Test.specify "should be Text for text file" <|
|
||||||
File_Format.Auto . materialise sample_txt . should_be_a File_Format.Text
|
File_Format.Auto . materialise sample_txt . should_be_a File_Format.Plain_Text
|
||||||
|
|
||||||
Test.specify "should be Text for log file" <|
|
Test.specify "should be Text for log file" <|
|
||||||
File_Format.Auto . materialise windows_log . should_be_a File_Format.Text
|
File_Format.Auto . materialise windows_log . should_be_a File_Format.Plain_Text
|
||||||
|
|
||||||
|
Test.specify "should detect CSV files" <|
|
||||||
|
File_Format.Auto . materialise (Enso_Project.data / "data.csv") . should_equal (File_Format.Delimited ",")
|
||||||
|
|
||||||
Test.group "File_Format.Auto" <|
|
Test.group "File_Format.Auto" <|
|
||||||
Test.specify "should be able to read an unknown file" <|
|
Test.specify "should raise an error when reading an unknown file" <|
|
||||||
bytes = sample_xxx.read
|
bytes = sample_xxx.read
|
||||||
bytes.should_equal [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33]
|
bytes.should_fail_with Unsupported_File_Type
|
||||||
|
|
||||||
Test.specify "should be able to read a text file" <|
|
Test.specify "should be able to read a text file" <|
|
||||||
content = sample_txt.read
|
content = sample_txt.read
|
||||||
@ -44,17 +48,17 @@ spec =
|
|||||||
bytes = File.read path File_Format.Bytes
|
bytes = File.read path File_Format.Bytes
|
||||||
bytes.should_equal [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33]
|
bytes.should_equal [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33]
|
||||||
|
|
||||||
Test.group "File_Format.Text" <|
|
Test.group "File_Format.Plain_Text" <|
|
||||||
Test.specify "should be able to read a file as Text" <|
|
Test.specify "should be able to read a file as Text" <|
|
||||||
text = sample_xxx.read File_Format.Text
|
text = sample_xxx.read File_Format.Plain_Text
|
||||||
text.should_equal "Hello World!"
|
text.should_equal "Hello World!"
|
||||||
|
|
||||||
Test.specify "should be able to read a file as Text with Encoding" <|
|
Test.specify "should be able to read a file as Text with Encoding" <|
|
||||||
text = windows_log.read (File_Format.Text Encoding.windows_1252)
|
text = windows_log.read (File_Format.Plain_Text Encoding.windows_1252)
|
||||||
text.should_equal "Hello World! $¢¤¥"
|
text.should_equal "Hello World! $¢¤¥"
|
||||||
|
|
||||||
Test.specify "should raise a warning when invalid encoding in a Text file" <|
|
Test.specify "should raise a warning when invalid encoding in a Text file" <|
|
||||||
action = windows_log.read (File_Format.Text Encoding.ascii) on_problems=_
|
action = windows_log.read (File_Format.Plain_Text Encoding.ascii) on_problems=_
|
||||||
tester result = result . should_equal 'Hello World! $\uFFFD\uFFFD\uFFFD'
|
tester result = result . should_equal 'Hello World! $\uFFFD\uFFFD\uFFFD'
|
||||||
problems = [Encoding_Error "Encoding issues at 14, 15, 16."]
|
problems = [Encoding_Error "Encoding issues at 14, 15, 16."]
|
||||||
Problems.test_problem_handling action problems tester
|
Problems.test_problem_handling action problems tester
|
||||||
|
@ -71,3 +71,8 @@ spec = Test.group "Locale" <|
|
|||||||
Test.specify "should convert to Json" <|
|
Test.specify "should convert to Json" <|
|
||||||
en_gb.to_json.should_equal <|
|
en_gb.to_json.should_equal <|
|
||||||
Json.from_pairs [["type", "Locale"], ["language", "en"], ["country", "GB"]]
|
Json.from_pairs [["type", "Locale"], ["language", "en"], ["country", "GB"]]
|
||||||
|
Test.specify "should allow equality comparisons" <|
|
||||||
|
Locale.uk . should_equal Locale.uk
|
||||||
|
Locale.uk . should_not_equal Locale.us
|
||||||
|
|
||||||
|
main = Test.Suite.run_main here.spec
|
||||||
|
Loading…
Reference in New Issue
Block a user