mirror of
https://github.com/enso-org/enso.git
synced 2024-11-27 05:15:42 +03:00
Add appending support for Delimited files (#3573)
Implements https://www.pivotaltracker.com/story/show/182309839
This commit is contained in:
parent
b59a496589
commit
df10e4ba7c
@ -151,6 +151,7 @@
|
||||
- [Added append support for `File_Format.Excel`.][3558]
|
||||
- [Added support for custom encodings in `File_Format.Delimited` writing.][3564]
|
||||
- [Allow filtering caught error type in `Error.catch`.][3574]
|
||||
- [Implemented `Append` mode for `File_Format.Delimited`.][3573]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -241,6 +242,7 @@
|
||||
[3558]: https://github.com/enso-org/enso/pull/3558
|
||||
[3564]: https://github.com/enso-org/enso/pull/3564
|
||||
[3574]: https://github.com/enso-org/enso/pull/3574
|
||||
[3573]: https://github.com/enso-org/enso/pull/3573
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -20,7 +20,7 @@ from Standard.Table.Data.Column_Type_Selection as Column_Type_Selection_Module i
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
|
||||
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning
|
||||
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector
|
||||
from Standard.Table.Errors as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector
|
||||
import Standard.Table.Data.Match_Columns
|
||||
|
||||
import Standard.Table.Data.Column_Name_Mapping
|
||||
|
@ -1,5 +1,8 @@
|
||||
from Standard.Base import all
|
||||
|
||||
polyglot java import org.enso.table.error.ColumnCountMismatchException
|
||||
polyglot java import org.enso.table.error.ColumnNameMismatchException
|
||||
|
||||
## One or more columns not found in the input table.
|
||||
Can occur when using By_Name or By_Column.
|
||||
type Missing_Input_Columns (criteria : [Text])
|
||||
@ -196,9 +199,23 @@ Column_Count_Mismatch.to_display_text : Text
|
||||
Column_Count_Mismatch.to_display_text =
|
||||
"Expected " + self.expected.to_text + " columns, got " + self.actual.to_text + "."
|
||||
|
||||
## PRIVATE
|
||||
Column_Count_Mismatch.handle_java_exception =
|
||||
throw_column_count_mismatch caught_panic =
|
||||
cause = caught_panic.payload.cause
|
||||
Error.throw (Column_Count_Mismatch cause.getExpected cause.getActual)
|
||||
Panic.catch ColumnCountMismatchException handler=throw_column_count_mismatch
|
||||
|
||||
## Indicates that the existing table has a different set of column names to the
|
||||
new table.
|
||||
type Column_Name_Mismatch expected actual message
|
||||
type Column_Name_Mismatch missing extras message
|
||||
|
||||
Column_Name_Mismatch.to_display_text : Text
|
||||
Column_Name_Mismatch.to_display_text = self.message
|
||||
|
||||
## PRIVATE
|
||||
Column_Name_Mismatch.handle_java_exception =
|
||||
throw_column_name_mismatch caught_panic =
|
||||
cause = caught_panic.payload.cause
|
||||
Error.throw (Column_Name_Mismatch (Vector.Vector cause.getMissing) (Vector.Vector cause.getExtras) cause.getMessage)
|
||||
Panic.catch ColumnNameMismatchException handler=throw_column_name_mismatch
|
@ -12,7 +12,7 @@ import Standard.Table.Data.Sort_Column
|
||||
|
||||
import Standard.Base.Data.Ordering.Comparator
|
||||
|
||||
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings
|
||||
from Standard.Table.Errors as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings
|
||||
|
||||
polyglot java import org.enso.table.aggregations.Aggregator
|
||||
polyglot java import org.enso.table.aggregations.Concatenate as ConcatenateAggregator
|
||||
|
@ -2,8 +2,8 @@ from Standard.Base import all
|
||||
import Standard.Table
|
||||
|
||||
import Standard.Base.Error.Common as Errors
|
||||
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior
|
||||
from Standard.Table.Error as Table_Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows
|
||||
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Ignore
|
||||
from Standard.Table.Errors as Table_Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows
|
||||
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
|
||||
from Standard.Table.Io.File_Format import Infer
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
@ -49,7 +49,7 @@ read_file format file on_problems =
|
||||
read_text : Text -> Delimited -> Problem_Behavior -> Table
|
||||
read_text text format on_problems =
|
||||
java_reader = StringReader.new text
|
||||
Delimited_Reader.read_from_reader format java_reader on_problems
|
||||
read_from_reader format java_reader on_problems
|
||||
|
||||
## PRIVATE
|
||||
Reads an input stream according to the provided format.
|
||||
@ -67,11 +67,8 @@ read_text text format on_problems =
|
||||
- related_file: The file related to the provided `java_stream`, if available,
|
||||
or `Nothing`. It is used for more detailed error reporting.
|
||||
read_stream : Delimited -> Input_Stream -> Problem_Behavior -> Integer -> File | Nothing -> Any
|
||||
read_stream format stream on_problems max_columns=4096 related_file=Nothing =
|
||||
handle_io_exception ~action = Panic.catch_java IOException action java_exception->
|
||||
Error.throw (File.wrap_io_exception related_file java_exception)
|
||||
|
||||
handle_io_exception <|
|
||||
read_stream format stream on_problems max_columns=default_max_columns related_file=Nothing =
|
||||
handle_io_exception related_file <|
|
||||
stream.with_stream_decoder format.encoding on_problems reporting_stream_decoder->
|
||||
read_from_reader format reporting_stream_decoder on_problems max_columns
|
||||
|
||||
@ -93,6 +90,14 @@ read_stream format stream on_problems max_columns=4096 related_file=Nothing =
|
||||
integer.
|
||||
read_from_reader : Delimited -> Reader -> Problem_Behavior -> Integer -> Any
|
||||
read_from_reader format java_reader on_problems max_columns=4096 =
|
||||
handle_illegal_arguments <| handle_parsing_failure <| handle_parsing_exception <|
|
||||
reader = prepare_delimited_reader java_reader format max_columns on_problems
|
||||
result_with_problems = reader.read
|
||||
parsing_problems = Vector.Vector (result_with_problems.problems) . map translate_reader_problem
|
||||
on_problems.attach_problems_after (Table.Table result_with_problems.value) parsing_problems
|
||||
|
||||
## PRIVATE
|
||||
prepare_delimited_reader java_reader format max_columns on_problems =
|
||||
java_headers = case format.headers of
|
||||
True -> DelimitedReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
|
||||
Infer -> DelimitedReader.HeaderBehavior.INFER
|
||||
@ -101,40 +106,21 @@ read_from_reader format java_reader on_problems max_columns=4096 =
|
||||
Nothing -> -1
|
||||
Integer -> format.row_limit
|
||||
_ -> Error.throw (Illegal_Argument_Error "`row_limit` should be Integer or Nothing.")
|
||||
|
||||
translate_illegal_argument caught_panic =
|
||||
Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage)
|
||||
handle_illegal_arguments = Panic.catch IllegalArgumentException handler=translate_illegal_argument
|
||||
|
||||
translate_parsing_failure caught_panic =
|
||||
Error.throw (translate_reader_problem caught_panic.payload.cause.problem)
|
||||
handle_parsing_failure = Panic.catch ParsingFailedException handler=translate_parsing_failure
|
||||
|
||||
translate_parsing_exception caught_panic =
|
||||
cause = caught_panic.payload.cause.getCause
|
||||
if Java.is_instance cause IOException then Panic.throw cause else
|
||||
Error.throw (Parser_Error caught_panic.payload)
|
||||
handle_parsing_exception = Panic.catch TextParsingException handler=translate_parsing_exception
|
||||
|
||||
handle_illegal_arguments <| handle_parsing_failure <| handle_parsing_exception <|
|
||||
warnings_as_errors = on_problems == Problem_Behavior_Module.Report_Error
|
||||
quote_characters = case format.quote_style of
|
||||
Quote_Style.No_Quotes -> Pair Nothing Nothing
|
||||
Quote_Style.With_Quotes _ quote quote_escape -> Pair quote quote_escape
|
||||
base_parser = case format.quote_style of
|
||||
Quote_Style.No_Quotes -> IdentityParser.new
|
||||
Quote_Style.With_Quotes _ quote _ ->
|
||||
QuoteStrippingParser.new quote
|
||||
value_parser = if format.value_formatter.is_nothing then base_parser else
|
||||
wrapped = format.value_formatter.wrap_base_parser base_parser
|
||||
TypeInferringParser.new format.value_formatter.get_specific_type_parsers.to_array wrapped
|
||||
cell_type_guesser = if format.headers != Infer then Nothing else
|
||||
formatter = format.value_formatter.if_nothing Data_Formatter
|
||||
TypeInferringParser.new formatter.get_specific_type_parsers.to_array IdentityParser.new
|
||||
reader = DelimitedReader.new java_reader format.delimiter quote_characters.first quote_characters.second java_headers format.skip_rows row_limit max_columns value_parser cell_type_guesser format.keep_invalid_rows warnings_as_errors
|
||||
result_with_problems = reader.read
|
||||
parsing_problems = Vector.Vector (result_with_problems.problems) . map translate_reader_problem
|
||||
on_problems.attach_problems_after (Table.Table result_with_problems.value) parsing_problems
|
||||
warnings_as_errors = on_problems == Problem_Behavior_Module.Report_Error
|
||||
quote_characters = case format.quote_style of
|
||||
Quote_Style.No_Quotes -> Pair Nothing Nothing
|
||||
Quote_Style.With_Quotes _ quote quote_escape -> Pair quote quote_escape
|
||||
base_parser = case format.quote_style of
|
||||
Quote_Style.No_Quotes -> IdentityParser.new
|
||||
Quote_Style.With_Quotes _ quote _ ->
|
||||
QuoteStrippingParser.new quote
|
||||
value_parser = if format.value_formatter.is_nothing then base_parser else
|
||||
wrapped = format.value_formatter.wrap_base_parser base_parser
|
||||
TypeInferringParser.new format.value_formatter.get_specific_type_parsers.to_array wrapped
|
||||
cell_type_guesser = if format.headers != Infer then Nothing else
|
||||
formatter = format.value_formatter.if_nothing Data_Formatter
|
||||
TypeInferringParser.new formatter.get_specific_type_parsers.to_array IdentityParser.new
|
||||
DelimitedReader.new java_reader format.delimiter quote_characters.first quote_characters.second java_headers format.skip_rows row_limit max_columns value_parser cell_type_guesser format.keep_invalid_rows warnings_as_errors
|
||||
|
||||
translate_reader_problem problem =
|
||||
invalid_row = [InvalidRow, (java_problem-> Invalid_Row java_problem.source_row java_problem.table_index (Vector.Vector java_problem.row))]
|
||||
@ -147,3 +133,65 @@ translate_reader_problem problem =
|
||||
found = translations.find t-> Java.is_instance problem t.first
|
||||
if found.is_error then problem else
|
||||
found.second problem
|
||||
|
||||
## PRIVATE
|
||||
An internal type representing columns deduced from an existing file.
|
||||
type Detected_Headers
|
||||
## Indicates that the file did not exist or was empty.
|
||||
Nothing
|
||||
|
||||
## Represents the headers found in the file.
|
||||
type Existing_Headers (column_names : Vector Text)
|
||||
|
||||
## Indicates that the file exists but no headers have been found, so only positional column matching is possible.
|
||||
type No_Headers (column_count : Integer)
|
||||
|
||||
## PRIVATE
|
||||
Reads the beginning of the file to detect the existing headers and column
|
||||
count.
|
||||
detect_headers : File -> File_Format.Delimited -> Detected_Headers
|
||||
detect_headers file format =
|
||||
on_problems = Ignore
|
||||
result = handle_io_exception file <| handle_illegal_arguments <| handle_parsing_failure <| handle_parsing_exception <|
|
||||
file.with_input_stream [File.Option.Read] stream->
|
||||
stream.with_stream_decoder format.encoding on_problems java_reader->
|
||||
## We use the default `max_columns` setting. If we want to be able to
|
||||
read files with unlimited column limits (risking OutOfMemory
|
||||
exceptions), we can catch the exception indicating the limit has been
|
||||
reached and restart parsing with an increased limit.
|
||||
reader = prepare_delimited_reader java_reader format max_columns=default_max_columns on_problems
|
||||
defined_columns = reader.getDefinedColumnNames
|
||||
case defined_columns of
|
||||
Nothing ->
|
||||
column_count = reader.getColumnCount
|
||||
if column_count == 0 then Nothing else
|
||||
No_Headers column_count
|
||||
_ -> Existing_Headers (Vector.Vector defined_columns)
|
||||
result.catch File.File_Not_Found (_->Nothing)
|
||||
|
||||
## PRIVATE
|
||||
handle_illegal_arguments =
|
||||
translate_illegal_argument caught_panic =
|
||||
Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage)
|
||||
Panic.catch IllegalArgumentException handler=translate_illegal_argument
|
||||
|
||||
## PRIVATE
|
||||
handle_parsing_failure =
|
||||
translate_parsing_failure caught_panic =
|
||||
Error.throw (translate_reader_problem caught_panic.payload.cause.problem)
|
||||
Panic.catch ParsingFailedException handler=translate_parsing_failure
|
||||
|
||||
## PRIVATE
|
||||
handle_parsing_exception =
|
||||
translate_parsing_exception caught_panic =
|
||||
cause = caught_panic.payload.cause.getCause
|
||||
if Java.is_instance cause IOException then Panic.throw cause else
|
||||
Error.throw (Parser_Error caught_panic.payload)
|
||||
Panic.catch TextParsingException handler=translate_parsing_exception
|
||||
|
||||
## PRIVATE
|
||||
handle_io_exception related_file ~action = Panic.catch_java IOException action java_exception->
|
||||
Error.throw (File.wrap_io_exception related_file java_exception)
|
||||
|
||||
## PRIVATE
|
||||
default_max_columns = 4096
|
||||
|
@ -4,19 +4,22 @@ import Standard.Table
|
||||
import Standard.Base.Error.Common as Errors
|
||||
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior
|
||||
import Standard.Base.System.File.Existing_File_Behavior
|
||||
from Standard.Table.Error as Table_Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows
|
||||
from Standard.Table.Errors as Table_Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows, Column_Count_Mismatch, Column_Name_Mismatch
|
||||
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
|
||||
from Standard.Table.Io.File_Format import Infer
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
import Standard.Table.Data.Storage
|
||||
import Standard.Table.Io.Quote_Style
|
||||
from Standard.Table.Internal.Delimited_Reader import Existing_Headers, No_Headers
|
||||
import Standard.Table.Data.Match_Columns
|
||||
|
||||
polyglot java import org.enso.table.write.DelimitedWriter
|
||||
polyglot java import org.enso.table.write.WriteQuoteBehavior
|
||||
polyglot java import java.io.PrintWriter
|
||||
polyglot java import java.io.IOException
|
||||
polyglot java import org.enso.table.formatting.TextFormatter
|
||||
polyglot java import org.enso.table.util.ColumnMapper
|
||||
polyglot java import java.io.PrintWriter
|
||||
polyglot java import java.io.StringWriter
|
||||
polyglot java import java.io.IOException
|
||||
|
||||
## Writes a delimited file according to the provided format.
|
||||
|
||||
@ -29,15 +32,45 @@ polyglot java import java.io.StringWriter
|
||||
operation. By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
write_file : Table -> File_Format.Delimited -> File -> Existing_File_Behavior -> Problem_Behavior -> Any
|
||||
write_file table format file on_existing_file on_problems =
|
||||
write_file : Table -> File_Format.Delimited -> File -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Any
|
||||
write_file table format file on_existing_file match_columns on_problems =
|
||||
case on_existing_file of
|
||||
Existing_File_Behavior.Append ->
|
||||
Errors.unimplemented "Appending to an existing File_Format.Delimited file is not implemented yet."
|
||||
append_to_file table format file match_columns on_problems
|
||||
_ ->
|
||||
on_existing_file.write file stream->
|
||||
write_to_stream table format stream on_problems related_file=file
|
||||
|
||||
## PRIVATE
|
||||
Handles appending to an existing file, ensuring that the columns are matched
|
||||
against the ones already in the file.
|
||||
|
||||
If the file does not exist or is empty, it acts like a regular overwrite.
|
||||
append_to_file : Table -> File_Format.Delimited -> File -> Match_Columns -> Problem_Behavior -> Any
|
||||
append_to_file table format file match_columns on_problems =
|
||||
Column_Name_Mismatch.handle_java_exception <| Column_Count_Mismatch.handle_java_exception <|
|
||||
preexisting_headers = Delimited_Reader.detect_headers file format
|
||||
reordered_java_table = case preexisting_headers of
|
||||
Nothing -> table.java_table
|
||||
Existing_Headers column_names -> case match_columns of
|
||||
Match_Columns.By_Name ->
|
||||
ColumnMapper.mapColumnsByName table.java_table column_names.to_array
|
||||
Match_Columns.By_Position ->
|
||||
column_count = column_names.length
|
||||
ColumnMapper.mapColumnsByPosition table.java_table column_count
|
||||
No_Headers column_count -> case match_columns of
|
||||
Match_Columns.By_Name ->
|
||||
Error.throw (Illegal_Argument_Error "Cannot append by name when headers are not present in the existing data.")
|
||||
Match_Columns.By_Position ->
|
||||
ColumnMapper.mapColumnsByPosition table.java_table column_count
|
||||
reordered_table = Table.Table reordered_java_table
|
||||
writing_new_file = preexisting_headers == Nothing
|
||||
amended_format = case writing_new_file && (should_write_headers format.headers) of
|
||||
True -> format.with_headers
|
||||
False -> format.without_headers
|
||||
Existing_File_Behavior.Append.write file stream->
|
||||
write_to_stream reordered_table amended_format stream on_problems related_file=file
|
||||
|
||||
## PRIVATE
|
||||
Returns a Text value representing the table in the delimited format.
|
||||
write_text : Table -> File_Format.Delimited -> Text
|
||||
@ -95,10 +128,13 @@ write_to_writer table format java_writer =
|
||||
quote_characters = case format.quote_style of
|
||||
Quote_Style.No_Quotes -> Pair Nothing Nothing
|
||||
Quote_Style.With_Quotes _ quote quote_escape -> Pair quote quote_escape
|
||||
write_headers = case format.headers of
|
||||
True -> True
|
||||
Infer -> True
|
||||
False -> False
|
||||
write_headers = should_write_headers format.headers
|
||||
new_line = '\n'
|
||||
writer = DelimitedWriter.new java_writer column_formatters.to_array format.delimiter new_line quote_characters.first quote_characters.second quote_behavior write_headers
|
||||
writer.write table.java_table
|
||||
|
||||
## PRIVATE
|
||||
should_write_headers headers = case headers of
|
||||
True -> True
|
||||
Infer -> True
|
||||
False -> False
|
||||
|
@ -1,6 +1,6 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Error as Table_Errors import Invalid_Format, Leading_Zeros
|
||||
from Standard.Table.Errors as Table_Errors import Invalid_Format, Leading_Zeros
|
||||
|
||||
polyglot java import org.enso.table.parsing.problems.InvalidFormat
|
||||
polyglot java import org.enso.table.parsing.problems.LeadingZeros
|
||||
|
@ -4,7 +4,7 @@ from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Prob
|
||||
import Standard.Base.Runtime.Ref
|
||||
import Standard.Table.Internal.Vector_Builder
|
||||
|
||||
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Too_Many_Column_Names_Provided, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Column_Matched_By_Multiple_Selectors
|
||||
from Standard.Table.Errors as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Too_Many_Column_Names_Provided, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Column_Matched_By_Multiple_Selectors
|
||||
|
||||
type Problem_Builder
|
||||
type Problem_Builder oob_indices duplicate_column_selectors input_indices_already_matched missing_input_columns other
|
||||
|
@ -6,7 +6,7 @@ import Standard.Base.Data.Ordering.Vector_Lexicographic_Order
|
||||
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
|
||||
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning
|
||||
import Standard.Table.Data.Position
|
||||
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Too_Many_Column_Names_Provided, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, No_Input_Columns_Selected
|
||||
from Standard.Table.Errors as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Too_Many_Column_Names_Provided, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, No_Input_Columns_Selected
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Name, By_Index, By_Column
|
||||
import Standard.Table.Data.Column_Name_Mapping
|
||||
import Standard.Table.Internal.Unique_Name_Strategy
|
||||
|
@ -5,7 +5,7 @@ import Standard.Base.System.File.Option
|
||||
from Standard.Table.Io.File_Format import Infer
|
||||
|
||||
import Standard.Table.Data.Table
|
||||
from Standard.Table.Error as Error_Module import Invalid_Location, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch
|
||||
from Standard.Table.Errors as Error_Module import Invalid_Location, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch
|
||||
import Standard.Base.Error.Common as Errors
|
||||
import Standard.Table.Data.Match_Columns
|
||||
|
||||
@ -17,8 +17,6 @@ polyglot java import org.enso.table.write.ExistingDataMode
|
||||
polyglot java import org.enso.table.error.ExistingDataException
|
||||
polyglot java import org.enso.table.error.RangeExceededException
|
||||
polyglot java import org.enso.table.error.InvalidLocationException
|
||||
polyglot java import org.enso.table.error.ColumnCountMismatchException
|
||||
polyglot java import org.enso.table.error.ColumnNameMismatchException
|
||||
|
||||
polyglot java import java.lang.IllegalArgumentException
|
||||
polyglot java import java.lang.IllegalStateException
|
||||
@ -283,16 +281,6 @@ handle_writer ~writer =
|
||||
throw_existing_data caught_panic = Error.throw (Existing_Data caught_panic.payload.cause.getMessage)
|
||||
handle_existing_data = Panic.catch ExistingDataException handler=throw_existing_data
|
||||
|
||||
throw_column_count_mismatch caught_panic =
|
||||
cause = caught_panic.payload.cause
|
||||
Error.throw (Column_Count_Mismatch cause.getExpected cause.getActual)
|
||||
handle_column_count_mismatch = Panic.catch ColumnCountMismatchException handler=throw_column_count_mismatch
|
||||
|
||||
throw_column_name_mismatch caught_panic =
|
||||
cause = caught_panic.payload.cause
|
||||
Error.throw (Column_Name_Mismatch (Vector.Vector cause.getMissing) (Vector.Vector cause.getExtras) cause.getMessage)
|
||||
handle_column_name_mismatch = Panic.catch ColumnNameMismatchException handler=throw_column_name_mismatch
|
||||
|
||||
## Illegal argument can occur if appending in an invalid mode
|
||||
illegal_argument caught_panic = Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage caught_panic.payload.cause)
|
||||
handle_illegal_argument = Panic.catch IllegalArgumentException handler=illegal_argument
|
||||
@ -301,5 +289,7 @@ handle_writer ~writer =
|
||||
throw_illegal_state caught_panic = Panic.throw (Illegal_State_Error caught_panic.payload.cause.getMessage)
|
||||
handle_illegal_state = Panic.catch IllegalStateException handler=throw_illegal_state
|
||||
|
||||
handle_illegal_state <| handle_column_name_mismatch <| handle_column_count_mismatch <| handle_bad_location <|
|
||||
handle_illegal_argument <| handle_range_exceeded <| handle_existing_data <| writer
|
||||
handle_illegal_state <| Column_Name_Mismatch.handle_java_exception <|
|
||||
Column_Count_Mismatch.handle_java_exception <| handle_bad_location <|
|
||||
handle_illegal_argument <| handle_range_exceeded <| handle_existing_data <|
|
||||
writer
|
||||
|
@ -8,7 +8,7 @@ from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding
|
||||
import Standard.Base.Runtime.Ref
|
||||
import Standard.Table.Internal.Delimited_Reader
|
||||
import Standard.Table.Internal.Delimited_Writer
|
||||
from Standard.Table.Error as Table_Errors import Unsupported_File_Type
|
||||
from Standard.Table.Errors as Table_Errors import Unsupported_File_Type
|
||||
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
import Standard.Table.Io.Excel as Excel_Module
|
||||
@ -118,8 +118,8 @@ type Delimited
|
||||
|
||||
## Implements the `Table.write` for this `File_Format`.
|
||||
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing
|
||||
write_table file table on_existing_file _ on_problems =
|
||||
Delimited_Writer.write_file table self file on_existing_file on_problems
|
||||
write_table file table on_existing_file match_columns on_problems =
|
||||
Delimited_Writer.write_file table self file on_existing_file match_columns on_problems
|
||||
|
||||
## PRIVATE
|
||||
Clone the instance with some properties overridden.
|
||||
|
@ -3,19 +3,12 @@ package org.enso.table.read;
|
||||
import com.univocity.parsers.csv.CsvFormat;
|
||||
import com.univocity.parsers.csv.CsvParser;
|
||||
import com.univocity.parsers.csv.CsvParserSettings;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import org.enso.table.data.column.builder.string.StringStorageBuilder;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.column.storage.StringStorage;
|
||||
import org.enso.table.data.index.DefaultIndex;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.Table;
|
||||
import org.enso.table.problems.WithProblems;
|
||||
import org.enso.table.parsing.DatatypeParser;
|
||||
import org.enso.table.parsing.TypeInferringParser;
|
||||
import org.enso.table.parsing.problems.AdditionalInvalidRows;
|
||||
@ -23,8 +16,13 @@ import org.enso.table.parsing.problems.InvalidRow;
|
||||
import org.enso.table.parsing.problems.MismatchedQuote;
|
||||
import org.enso.table.parsing.problems.NoOpProblemAggregator;
|
||||
import org.enso.table.problems.Problem;
|
||||
import org.enso.table.problems.WithProblems;
|
||||
import org.enso.table.util.NameDeduplicator;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/** A helper for reading delimited (CSV-like) files. */
|
||||
public class DelimitedReader {
|
||||
|
||||
@ -203,13 +201,38 @@ public class DelimitedReader {
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the next row and updates the current line accordingly.
|
||||
* Loads a next row from the CSV file.
|
||||
*
|
||||
* <p>This is an internal function that just loads the row but does not update the state nor take
|
||||
* into consideration pending rows. The regular reading process should use {@code readNextRow}
|
||||
* instead.
|
||||
*/
|
||||
private Row loadNextRow() {
|
||||
long line = parser.getContext().currentLine() + 1;
|
||||
String[] cells = parser.parseNext();
|
||||
if (cells == null) return null;
|
||||
return new Row(line, cells);
|
||||
}
|
||||
|
||||
private record Row(long lineNumber, String[] cells) {}
|
||||
|
||||
private final Queue<Row> pendingRows = new ArrayDeque<>(2);
|
||||
|
||||
/**
|
||||
* Reads the next row and updates the current line accordingly. It takes into consideration the
|
||||
* pending rows that have already been loaded when inferring the headers but were still not
|
||||
* processed.
|
||||
*
|
||||
* <p>Will return {@code null} if no more rows are available.
|
||||
*/
|
||||
private String[] readNextRow() {
|
||||
currentLine = parser.getContext().currentLine() + 1;
|
||||
return parser.parseNext();
|
||||
Row row = pendingRows.isEmpty() ? loadNextRow() : pendingRows.remove();
|
||||
if (row == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
currentLine = row.lineNumber;
|
||||
return row.cells;
|
||||
}
|
||||
|
||||
private void appendRow(String[] row) {
|
||||
@ -280,69 +303,111 @@ public class DelimitedReader {
|
||||
return parsed instanceof String;
|
||||
}
|
||||
|
||||
/** Reads the input stream and returns a Table. */
|
||||
public WithProblems<Table> read() {
|
||||
/** The column names as defined in the input (if applicable, otherwise null). */
|
||||
private String[] definedColumnNames = null;
|
||||
|
||||
/** The effective column names.
|
||||
*
|
||||
* If {@code GENERATE_HEADERS} is used or if {@code INFER} is used and no headers are found, this will be populated with automatically generated column names. */
|
||||
private String[] effectiveColumnNames;
|
||||
|
||||
private List<Problem> headerProblems;
|
||||
|
||||
/** Returns the column names that are defined in the input.
|
||||
*
|
||||
* Will return {@code null} if {@code GENERATE_HEADERS} is used or if {@code INFER} is used and no headers were found inside of the file. */
|
||||
public String[] getDefinedColumnNames() {
|
||||
ensureHeadersDetected();
|
||||
return definedColumnNames;
|
||||
}
|
||||
|
||||
public int getColumnCount() {
|
||||
ensureHeadersDetected();
|
||||
return effectiveColumnNames.length;
|
||||
}
|
||||
|
||||
private void ensureHeadersDetected() {
|
||||
if (effectiveColumnNames == null) {
|
||||
detectHeaders();
|
||||
}
|
||||
}
|
||||
|
||||
private void detectHeaders() {
|
||||
skipFirstRows();
|
||||
Row firstRow = loadNextRow();
|
||||
if (firstRow == null) {
|
||||
effectiveColumnNames = new String[0];
|
||||
headerProblems = Collections.emptyList();
|
||||
return;
|
||||
}
|
||||
|
||||
int expectedColumnCount = firstRow.cells.length;
|
||||
boolean wereHeadersDefined = false;
|
||||
WithProblems<List<String>> headerNames;
|
||||
String[] currentRow = readNextRow();
|
||||
|
||||
// Skip the first N rows.
|
||||
for (long i = 0; currentRow != null && i < skipRows; ++i) {
|
||||
currentRow = readNextRow();
|
||||
}
|
||||
|
||||
// If there are no rows to even infer the headers, we return an empty table.
|
||||
if (currentRow == null) {
|
||||
return new WithProblems<>(new Table(new Column[0]), Collections.emptyList());
|
||||
}
|
||||
|
||||
int expectedColumnCount = currentRow.length;
|
||||
initBuilders(expectedColumnCount);
|
||||
|
||||
switch (headerBehavior) {
|
||||
case INFER -> {
|
||||
String[] firstRow = currentRow;
|
||||
String[] secondRow = readNextRow();
|
||||
Row secondRow = loadNextRow();
|
||||
if (secondRow == null) {
|
||||
// If there is only one row in the file, we generate the headers and stop further processing (as nothing more to process).
|
||||
/** If there is only one row in the file, we generate the headers and
|
||||
* stop further processing (as nothing more to process). */
|
||||
headerNames = generateDefaultHeaders(expectedColumnCount);
|
||||
appendRowIfLimitPermits(firstRow);
|
||||
currentRow = null;
|
||||
pendingRows.add(firstRow);
|
||||
} else {
|
||||
assert cellTypeGuesser != null;
|
||||
boolean firstAllText = Arrays.stream(firstRow).allMatch(this::isPlainText);
|
||||
boolean secondAllText = Arrays.stream(secondRow).allMatch(this ::isPlainText);
|
||||
boolean firstAllText = Arrays.stream(firstRow.cells).allMatch(this::isPlainText);
|
||||
boolean secondAllText = Arrays.stream(secondRow.cells).allMatch(this ::isPlainText);
|
||||
boolean useFirstRowAsHeader = firstAllText && !secondAllText;
|
||||
if (useFirstRowAsHeader) {
|
||||
headerNames = headersFromRow(firstRow);
|
||||
appendRowIfLimitPermits(secondRow);
|
||||
headerNames = headersFromRow(firstRow.cells);
|
||||
wereHeadersDefined = true;
|
||||
pendingRows.add(secondRow);
|
||||
} else {
|
||||
headerNames = generateDefaultHeaders(expectedColumnCount);
|
||||
appendRowIfLimitPermits(firstRow);
|
||||
appendRowIfLimitPermits(secondRow);
|
||||
pendingRows.add(firstRow);
|
||||
pendingRows.add(secondRow);
|
||||
}
|
||||
|
||||
currentRow = readNextRow();
|
||||
}
|
||||
}
|
||||
case USE_FIRST_ROW_AS_HEADERS -> {
|
||||
headerNames = headersFromRow(currentRow);
|
||||
// We have 'used up' the first row, so we load a next one.
|
||||
currentRow = readNextRow();
|
||||
headerNames = headersFromRow(firstRow.cells);
|
||||
wereHeadersDefined = true;
|
||||
}
|
||||
case GENERATE_HEADERS -> {
|
||||
headerNames = generateDefaultHeaders(expectedColumnCount);
|
||||
pendingRows.add(firstRow);
|
||||
}
|
||||
case GENERATE_HEADERS -> headerNames = generateDefaultHeaders(expectedColumnCount);
|
||||
default -> throw new IllegalStateException("Impossible branch.");
|
||||
}
|
||||
|
||||
while (currentRow != null && canFitMoreRows()) {
|
||||
headerProblems = headerNames.problems();
|
||||
effectiveColumnNames = headerNames.value().toArray(new String[0]);
|
||||
if (wereHeadersDefined) {
|
||||
definedColumnNames = effectiveColumnNames;
|
||||
}
|
||||
}
|
||||
|
||||
private void skipFirstRows() {
|
||||
for (long i = 0; i < skipRows; ++i) {
|
||||
loadNextRow();
|
||||
}
|
||||
}
|
||||
|
||||
/** Reads the input stream and returns a Table. */
|
||||
public WithProblems<Table> read() {
|
||||
ensureHeadersDetected();
|
||||
initBuilders(getColumnCount());
|
||||
while (canFitMoreRows()) {
|
||||
var currentRow = readNextRow();
|
||||
if (currentRow == null) break;
|
||||
appendRow(currentRow);
|
||||
currentRow = readNextRow();
|
||||
}
|
||||
|
||||
parser.stopParsing();
|
||||
|
||||
Column[] columns = new Column[builders.length];
|
||||
for (int i = 0; i < builders.length; i++) {
|
||||
String columnName = headerNames.value().get(i);
|
||||
String columnName = effectiveColumnNames[i];
|
||||
StringStorage col = builders[i].seal();
|
||||
|
||||
WithProblems<Storage> parseResult = valueParser.parseColumn(columnName, col);
|
||||
@ -353,7 +418,7 @@ public class DelimitedReader {
|
||||
|
||||
columns[i] = new Column(columnName, new DefaultIndex(storage.size()), storage);
|
||||
}
|
||||
return new WithProblems<>(new Table(columns), getReportedProblems(headerNames.problems()));
|
||||
return new WithProblems<>(new Table(columns), getReportedProblems(headerProblems));
|
||||
}
|
||||
|
||||
private void initBuilders(int count) {
|
||||
|
1
test/Table_Tests/data/.gitignore
vendored
Normal file
1
test/Table_Tests/data/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
*.bak
|
@ -5,7 +5,7 @@ from Standard.Table.Data.Column_Selector import By_Name, By_Index
|
||||
import Standard.Table.Data.Sort_Column
|
||||
import Standard.Table.Data.Sort_Column_Selector
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings
|
||||
from Standard.Table.Errors as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings
|
||||
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
|
||||
|
||||
import Standard.Test
|
||||
|
@ -4,7 +4,7 @@ import Standard.Test.Problems
|
||||
|
||||
import Standard.Base.Error.Problem_Behavior
|
||||
import Standard.Table.Data.Column_Name_Mapping
|
||||
from Standard.Table.Error as Table_Errors import all
|
||||
from Standard.Table.Errors as Table_Errors import all
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import all
|
||||
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
|
||||
from Standard.Table.Data.Position as Position_Module import all
|
||||
|
@ -4,7 +4,7 @@ from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encodi
|
||||
|
||||
import Standard.Table
|
||||
import Standard.Table.Data.Column
|
||||
from Standard.Table.Error import all
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
import Standard.Base.Data.Time.Date
|
||||
import Standard.Base.Data.Time
|
||||
|
@ -13,7 +13,7 @@ from Standard.Table.Data.Aggregate_Column import all
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Data.Sql import Sql_Type
|
||||
from Standard.Table import No_Such_Column_Error
|
||||
from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
|
||||
from Standard.Table.Errors as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
|
||||
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
|
||||
|
||||
spec =
|
||||
|
@ -8,7 +8,7 @@ import Standard.Table.Data.Sort_Column_Selector
|
||||
import Standard.Table.Data.Sort_Column
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
|
||||
from Standard.Table.Errors as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
|
||||
import project.Database.Helpers.Name_Generator
|
||||
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
|
@ -4,7 +4,7 @@ from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encodi
|
||||
|
||||
import Standard.Table
|
||||
import Standard.Table.Data.Column
|
||||
from Standard.Table.Error import all
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
import Standard.Table.Io.File_Read
|
||||
from Standard.Table.Io.File_Format import Delimited
|
||||
@ -129,6 +129,8 @@ spec =
|
||||
'a,b,c\nd,e,f\r1,2,3'.write (path 'mixed.csv')
|
||||
File.read (path 'mixed.csv') (Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error . should_fail_with Invalid_Row
|
||||
|
||||
['crlf.csv', 'lf.csv', 'cr.csv', 'mixed.csv'].each (path >> .delete)
|
||||
|
||||
Test.specify "should work with Windows-1252 encoding" <|
|
||||
table = File.read (enso_project.data / "windows.csv") (Delimited "," headers=True encoding=Encoding.windows_1252) Problem_Behavior.Report_Error
|
||||
table.columns.map .name . should_equal ['a', 'b', 'c']
|
||||
@ -156,6 +158,7 @@ spec =
|
||||
table.at 'c' . to_vector . should_equal ['\uFFFD(\uFFFD(']
|
||||
problems_1 = [Encoding_Error "Encoding issues at bytes 13, 15."]
|
||||
Problems.test_problem_handling action_1 problems_1 tester_1
|
||||
utf8_file.delete
|
||||
|
||||
action_2 on_problems =
|
||||
(enso_project.data / "utf16_invalid.csv").read (Delimited "," headers=True encoding=Encoding.utf_16_be) on_problems
|
||||
|
@ -1,18 +1,20 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Error.Problem_Behavior
|
||||
import Standard.Base.System.File.Existing_File_Behavior
|
||||
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
|
||||
|
||||
import Standard.Table
|
||||
import Standard.Table.Data.Column
|
||||
from Standard.Table.Error import all
|
||||
|
||||
import Standard.Base.Data.Time.Date
|
||||
import Standard.Base.Data.Time.Time_Of_Day
|
||||
|
||||
import Standard.Table
|
||||
import Standard.Table.Data.Column
|
||||
from Standard.Table.Errors import all
|
||||
import Standard.Table.Io.File_Read
|
||||
from Standard.Table.Io.File_Format import Delimited
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
import Standard.Table.Io.Quote_Style
|
||||
import Standard.Table.Data.Match_Columns
|
||||
import Standard.Table.Data.Column_Name_Mapping
|
||||
from Standard.Table.Errors as Table_Errors import Column_Count_Mismatch, Column_Name_Mismatch
|
||||
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
@ -39,6 +41,16 @@ spec =
|
||||
3,2.2,z,[[[My Type :: 10]]]
|
||||
text = File.read_text file
|
||||
text.should_equal expected_text+'\n'
|
||||
file.delete
|
||||
|
||||
Test.specify "should be able to write an empty table" <|
|
||||
table = Table.new []
|
||||
file = (enso_project.data / "transient" / "empty.csv")
|
||||
file.delete_if_exists
|
||||
table.write file
|
||||
text = File.read_text file
|
||||
text.should_equal ''
|
||||
file.delete
|
||||
|
||||
Test.specify 'should quote values that contain the delimiter or quotes, in the [,""] variant' <|
|
||||
data_formatter = Data_Formatter decimal_point=","
|
||||
@ -54,6 +66,7 @@ spec =
|
||||
"one, two, three","-1,5"
|
||||
text = File.read_text file
|
||||
text.should_equal expected_text+'\n'
|
||||
file.delete
|
||||
|
||||
Test.specify 'should quote values that contain the delimiter or quotes, in the [;\\\"] variant' <|
|
||||
data_formatter = Data_Formatter thousand_separator="'"
|
||||
@ -70,6 +83,7 @@ spec =
|
||||
"a\\b";
|
||||
text = File.read_text file
|
||||
text.should_equal expected_text+'\n'
|
||||
file.delete
|
||||
|
||||
Test.specify "should quote values that contain the delimiter or quotes, in the [\t''] variant" <|
|
||||
data_formatter = Data_Formatter thousand_separator="'"
|
||||
@ -85,6 +99,7 @@ spec =
|
||||
'a\tb'\t-1.2
|
||||
text = File.read_text file
|
||||
text.should_equal expected_text+'\n'
|
||||
file.delete
|
||||
|
||||
Test.specify "should correctly distinguish empty text from a missing value" <|
|
||||
table = Table.new [["A", [1,Nothing,3]], ["B", [Nothing,"","abc"]]]
|
||||
@ -98,6 +113,7 @@ spec =
|
||||
3,abc
|
||||
text = File.read_text file
|
||||
text.should_equal expected_text+'\n'
|
||||
file.delete
|
||||
|
||||
Test.specify 'should not quote values if quoting is disabled' <|
|
||||
format = File_Format.Delimited "," value_formatter=(Data_Formatter decimal_point=",") . without_quotes
|
||||
@ -113,6 +129,7 @@ spec =
|
||||
one, two, three,-1,5
|
||||
text = File.read_text file
|
||||
text.should_equal expected_text+'\n'
|
||||
file.delete
|
||||
|
||||
Test.specify 'should allow to always quote text and custom values, but for non-text primitves only if absolutely necessary' <|
|
||||
format = File_Format.Delimited "," value_formatter=(Data_Formatter thousand_separator='"' date_formats=["E, d MMM y"]) . with_quotes always_quote=True quote_escape='\\'
|
||||
@ -128,6 +145,7 @@ spec =
|
||||
"one, two, three",-1.5,42,"4\"000",
|
||||
text = File.read_text file
|
||||
text.should_equal expected_text+'\n'
|
||||
file.delete
|
||||
|
||||
Test.specify "should correctly handle alternative encodings" <|
|
||||
table = Table.new [["ąęćś", [0]], ["ß", ["żółw 🐢"]]]
|
||||
@ -139,6 +157,7 @@ spec =
|
||||
0,żółw 🐢
|
||||
text = File.read_text file encoding=Encoding.utf_16_be
|
||||
text.should_equal expected_text+'\n'
|
||||
file.delete
|
||||
|
||||
Test.specify "should correctly handle encoding errors" <|
|
||||
table = Table.new [["A", [0, 1]], ["B", ["słówka", "🐢"]]]
|
||||
@ -153,6 +172,7 @@ spec =
|
||||
text.should_equal expected_text+'\n'
|
||||
result . should_equal Nothing
|
||||
Warning.get_all result . map .value . should_equal [Encoding_Error "Encoding issues at characters 7, 8, 15."]
|
||||
file.delete
|
||||
|
||||
Test.specify "should allow only text columns if no formatter is specified" <|
|
||||
format = File_Format.Delimited "," value_formatter=Nothing
|
||||
@ -176,4 +196,132 @@ spec =
|
||||
text_2 = File.read_text file_2
|
||||
text_2.should_equal ""
|
||||
|
||||
file_1.delete
|
||||
file_2.delete
|
||||
|
||||
Test.specify "should create a new file in append mode if it didn't exist" <|
|
||||
table = Table.new [["A", [1,2,3]], ["B", [1.0,1.5,2.2]], ["C", ["x","y","z"]]]
|
||||
file = (enso_project.data / "transient" / "append_nonexistent.csv")
|
||||
file.delete_if_exists
|
||||
table.write file on_existing_file=Existing_File_Behavior.Append
|
||||
got_table = file.read
|
||||
got_table.should_equal table
|
||||
file.delete
|
||||
|
||||
Test.specify "should correctly append to an empty file" <|
|
||||
table = Table.new [["A", [1,2,3]], ["B", [1.0,1.5,2.2]], ["C", ["x","y","z"]]]
|
||||
file = (enso_project.data / "transient" / "append_empty.csv")
|
||||
file.delete_if_exists
|
||||
"".write file
|
||||
table.write file on_existing_file=Existing_File_Behavior.Append
|
||||
got_table = file.read
|
||||
got_table.should_equal table
|
||||
file.delete
|
||||
|
||||
Test.specify "should append to a file, matching columns by name (headers=Infer)" <|
|
||||
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]], ["C", ["x","y"]]]
|
||||
appending_table = Table.new [["B", [33,44]], ["A", [Nothing, 0]], ["C", ["a","BB"]]]
|
||||
file = (enso_project.data / "transient" / "append_by_name.csv")
|
||||
file.delete_if_exists
|
||||
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite
|
||||
appending_table.write file on_existing_file=Existing_File_Behavior.Append
|
||||
got_table = file.read
|
||||
expected_table = Table.new [["A", [1,2,Nothing,0]], ["B", [1.0,1.5,33,44]], ["C", ["x","y","a","BB"]]]
|
||||
got_table.should_equal expected_table
|
||||
file.delete
|
||||
|
||||
Test.specify "should append to a file, matching columns by name (headers=True)" <|
|
||||
existing_table = Table.new [["0", [1,2]], ["B1", [1.0,1.5]], ["C", ["x","y"]]]
|
||||
appending_table = Table.new [["B1", [33,44]], ["0", [Nothing, 0]], ["C", ["a","BB"]]]
|
||||
file = (enso_project.data / "transient" / "append_by_name_2.csv")
|
||||
file.delete_if_exists
|
||||
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite
|
||||
format = File_Format.Delimited "," . with_headers
|
||||
appending_table.write file format on_existing_file=Existing_File_Behavior.Append
|
||||
got_table = file.read format
|
||||
expected_table = Table.new [["0", [1,2,Nothing,0]], ["B1", [1.0,1.5,33,44]], ["C", ["x","y","a","BB"]]]
|
||||
got_table.should_equal expected_table
|
||||
file.delete
|
||||
|
||||
Test.specify "should fail when appending and matching columns by name but column names are not available in the file (headers=Infer)" <|
|
||||
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]], ["C", ["x","y"]]]
|
||||
appending_table = Table.new [["B", [33,44]], ["A", [Nothing, 0]], ["C", ["a","BB"]]]
|
||||
file = (enso_project.data / "transient" / "append_no_header.csv")
|
||||
file.delete_if_exists
|
||||
no_header_format = File_Format.Delimited "," . without_headers
|
||||
existing_table.write file no_header_format on_existing_file=Existing_File_Behavior.Overwrite
|
||||
appending_table.write file on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error
|
||||
file.delete
|
||||
|
||||
Test.specify "should fail when appending and matching columns by name but headers are disabled (headers=False)" <|
|
||||
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]], ["C", ["x","y"]]]
|
||||
appending_table = Table.new [["B", [33,44]], ["A", [Nothing, 0]], ["C", ["a","BB"]]]
|
||||
file = (enso_project.data / "transient" / "append_no_header.csv")
|
||||
file.delete_if_exists
|
||||
no_header_format = File_Format.Delimited "," . without_headers
|
||||
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite
|
||||
appending_table.write file no_header_format on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error
|
||||
file.delete
|
||||
|
||||
Test.specify "should fail on column mismatch when appending to a file by name" <|
|
||||
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]]]
|
||||
appending_table = Table.new [["B", [33,44]], ["X", [Nothing, 0]]]
|
||||
file = (enso_project.data / "transient" / "append_no_header.csv")
|
||||
file.delete_if_exists
|
||||
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite
|
||||
result = appending_table.write file on_existing_file=Existing_File_Behavior.Append
|
||||
result . should_fail_with Column_Name_Mismatch
|
||||
result.catch.missing . should_equal ["A"]
|
||||
result.catch.extras . should_equal ["X"]
|
||||
result.catch.to_display_text . should_equal "Columns mismatch. Missing from new data: [A] Extras in new data: [X]"
|
||||
file.delete
|
||||
|
||||
Test.specify "should append to a file, matching columns by position" <|
|
||||
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]], ["C", ["x","y"]]]
|
||||
appending_table = Table.new [["AA", [33,44]], ["", [Nothing, 0]], ["hmmm", ["a","BB"]]]
|
||||
|
||||
test_append initial_file_format append_format expected_table =
|
||||
file = (enso_project.data / "transient" / "append_by_position.csv")
|
||||
file.delete_if_exists
|
||||
existing_table.write file initial_file_format on_existing_file=Existing_File_Behavior.Overwrite
|
||||
appending_table.write file append_format match_columns=Match_Columns.By_Position on_existing_file=Existing_File_Behavior.Append
|
||||
read_format = initial_file_format
|
||||
got_table = file.read read_format
|
||||
got_table.should_equal expected_table
|
||||
file.delete
|
||||
|
||||
base_format = File_Format.Delimited ","
|
||||
no_headers = base_format . without_headers
|
||||
with_headers = base_format . with_headers
|
||||
|
||||
expected_table_with_headers = Table.new [["A", [1,2,33,44]], ["B", [1.0,1.5,Nothing,0]], ["C", ["x","y","a","BB"]]]
|
||||
expected_table_without_headers = expected_table_with_headers.rename_columns (Column_Name_Mapping.By_Position ["Column_1", "Column_2", "Column_3"])
|
||||
|
||||
test_append initial_file_format=with_headers append_format=no_headers expected_table_with_headers
|
||||
test_append initial_file_format=with_headers append_format=base_format expected_table_with_headers
|
||||
test_append initial_file_format=no_headers append_format=base_format expected_table_without_headers
|
||||
test_append initial_file_format=no_headers append_format=no_headers expected_table_without_headers
|
||||
|
||||
Test.specify "should fail on column count mismatch when appending to a file by position" <|
|
||||
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]], ["C", ["x","y"]]]
|
||||
appending_table_1 = Table.new [["B", [33,44]], ["X", [Nothing, 0]]]
|
||||
appending_table_2 = Table.new [["B", [33,44]], ["X", [Nothing, 0]], ["Y", ["a","BB"]], ["Z", [Nothing, 0]]]
|
||||
file = (enso_project.data / "transient" / "append_mismatch.csv")
|
||||
file.delete_if_exists
|
||||
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite
|
||||
|
||||
result_1 = appending_table_1.write file match_columns=Match_Columns.By_Position on_existing_file=Existing_File_Behavior.Append
|
||||
result_1 . should_fail_with Column_Count_Mismatch
|
||||
result_1.catch.expected . should_equal 3
|
||||
result_1.catch.actual . should_equal 2
|
||||
result_1.catch.to_display_text . should_equal "Expected 3 columns, got 2."
|
||||
|
||||
result_2 = appending_table_2.write file match_columns=Match_Columns.By_Position on_existing_file=Existing_File_Behavior.Append
|
||||
result_2 . should_fail_with Column_Count_Mismatch
|
||||
result_2.catch.expected . should_equal 3
|
||||
result_2.catch.actual . should_equal 4
|
||||
result_2.catch.to_display_text . should_equal "Expected 3 columns, got 4."
|
||||
|
||||
file.delete
|
||||
|
||||
main = Test.Suite.run_main spec
|
||||
|
@ -10,7 +10,7 @@ import Standard.Table.Data.Column_Name_Mapping
|
||||
import Standard.Table.Data.Match_Columns
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Index
|
||||
from Standard.Table.Io.Excel import Excel_Range, Sheet_Names, Range_Names, Sheet, Cell_Range
|
||||
from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch
|
||||
from Standard.Table.Errors as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
|
||||
import Standard.Test
|
||||
|
@ -2,7 +2,7 @@ from Standard.Base import all
|
||||
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
|
||||
import Standard.Table.Io.File_Read
|
||||
import Standard.Table.Io.File_Format
|
||||
from Standard.Table.Error import Unsupported_File_Type
|
||||
from Standard.Table.Errors import Unsupported_File_Type
|
||||
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
|
@ -13,7 +13,7 @@ import Standard.Visualization
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
from Standard.Table.Data.Column_Type_Selection as Column_Type_Selection_Module import Column_Type_Selection, Auto
|
||||
|
||||
from Standard.Table.Error as Table_Errors import Invalid_Format, Leading_Zeros, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector
|
||||
from Standard.Table.Errors as Table_Errors import Invalid_Format, Leading_Zeros, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector
|
||||
|
||||
spec = Test.group "Table.parse_values" <|
|
||||
Test.specify "should correctly parse integers" <|
|
||||
@ -44,7 +44,7 @@ spec = Test.group "Table.parse_values" <|
|
||||
t3 = t1.parse_values column_types=[Column_Type_Selection 0 Integer]
|
||||
t3.at "ints" . to_vector . should_equal t1_parsed
|
||||
Warning.get_all t3 . map .value . should_equal [Leading_Zeros "ints" Integer t1_zeros]
|
||||
|
||||
|
||||
t4 = t1.parse_values column_types=[Column_Type_Selection 0 Decimal]
|
||||
t4.at "ints" . to_vector . should_equal t1_parsed
|
||||
Warning.get_all t4 . map .value . should_equal [Leading_Zeros "ints" Decimal t1_zeros]
|
||||
|
@ -10,7 +10,7 @@ import Standard.Table.Data.Storage
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
import Standard.Visualization
|
||||
from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, No_Input_Columns_Selected, Missing_Input_Columns
|
||||
from Standard.Table.Errors as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, No_Input_Columns_Selected, Missing_Input_Columns
|
||||
|
||||
import project.Common_Table_Spec
|
||||
|
||||
|
@ -6,5 +6,5 @@ import Standard.Test
|
||||
Table.Table.should_equal expected =
|
||||
self_cols = self.columns
|
||||
that_cols = expected.columns
|
||||
self_cols.map .name . should_equal (that_cols.map .name)
|
||||
self_cols.map .to_vector . should_equal (that_cols.map .to_vector)
|
||||
self_cols.map .name . should_equal (that_cols.map .name) frames_to_skip=1
|
||||
self_cols.map .to_vector . should_equal (that_cols.map .to_vector) frames_to_skip=1
|
||||
|
@ -1,6 +1,6 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Error as Error_Module import all
|
||||
from Standard.Table.Errors as Error_Module import all
|
||||
import Standard.Base.Error.Problem_Behavior
|
||||
import Standard.Base.Data.Text.Matching
|
||||
import Standard.Test
|
||||
|
Loading…
Reference in New Issue
Block a user