Add appending support for Delimited files (#3573)

Implements https://www.pivotaltracker.com/story/show/182309839
This commit is contained in:
Radosław Waśko 2022-07-11 14:36:01 +02:00 committed by GitHub
parent b59a496589
commit df10e4ba7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 452 additions and 142 deletions

View File

@ -151,6 +151,7 @@
- [Added append support for `File_Format.Excel`.][3558]
- [Added support for custom encodings in `File_Format.Delimited` writing.][3564]
- [Allow filtering caught error type in `Error.catch`.][3574]
- [Implemented `Append` mode for `File_Format.Delimited`.][3573]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -241,6 +242,7 @@
[3558]: https://github.com/enso-org/enso/pull/3558
[3564]: https://github.com/enso-org/enso/pull/3564
[3574]: https://github.com/enso-org/enso/pull/3574
[3573]: https://github.com/enso-org/enso/pull/3573
#### Enso Compiler

View File

@ -20,7 +20,7 @@ from Standard.Table.Data.Column_Type_Selection as Column_Type_Selection_Module i
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector
from Standard.Table.Errors as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector
import Standard.Table.Data.Match_Columns
import Standard.Table.Data.Column_Name_Mapping

View File

@ -1,5 +1,8 @@
from Standard.Base import all
polyglot java import org.enso.table.error.ColumnCountMismatchException
polyglot java import org.enso.table.error.ColumnNameMismatchException
## One or more columns not found in the input table.
Can occur when using By_Name or By_Column.
type Missing_Input_Columns (criteria : [Text])
@ -196,9 +199,23 @@ Column_Count_Mismatch.to_display_text : Text
Column_Count_Mismatch.to_display_text =
"Expected " + self.expected.to_text + " columns, got " + self.actual.to_text + "."
## PRIVATE
Column_Count_Mismatch.handle_java_exception =
throw_column_count_mismatch caught_panic =
cause = caught_panic.payload.cause
Error.throw (Column_Count_Mismatch cause.getExpected cause.getActual)
Panic.catch ColumnCountMismatchException handler=throw_column_count_mismatch
## Indicates that the existing table has a different set of column names to the
new table.
type Column_Name_Mismatch expected actual message
type Column_Name_Mismatch missing extras message
Column_Name_Mismatch.to_display_text : Text
Column_Name_Mismatch.to_display_text = self.message
## PRIVATE
Column_Name_Mismatch.handle_java_exception =
throw_column_name_mismatch caught_panic =
cause = caught_panic.payload.cause
Error.throw (Column_Name_Mismatch (Vector.Vector cause.getMissing) (Vector.Vector cause.getExtras) cause.getMessage)
Panic.catch ColumnNameMismatchException handler=throw_column_name_mismatch

View File

@ -12,7 +12,7 @@ import Standard.Table.Data.Sort_Column
import Standard.Base.Data.Ordering.Comparator
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings
from Standard.Table.Errors as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings
polyglot java import org.enso.table.aggregations.Aggregator
polyglot java import org.enso.table.aggregations.Concatenate as ConcatenateAggregator

View File

@ -2,8 +2,8 @@ from Standard.Base import all
import Standard.Table
import Standard.Base.Error.Common as Errors
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior
from Standard.Table.Error as Table_Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Ignore
from Standard.Table.Errors as Table_Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
from Standard.Table.Io.File_Format import Infer
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
@ -49,7 +49,7 @@ read_file format file on_problems =
read_text : Text -> Delimited -> Problem_Behavior -> Table
read_text text format on_problems =
java_reader = StringReader.new text
Delimited_Reader.read_from_reader format java_reader on_problems
read_from_reader format java_reader on_problems
## PRIVATE
Reads an input stream according to the provided format.
@ -67,11 +67,8 @@ read_text text format on_problems =
- related_file: The file related to the provided `java_stream`, if available,
or `Nothing`. It is used for more detailed error reporting.
read_stream : Delimited -> Input_Stream -> Problem_Behavior -> Integer -> File | Nothing -> Any
read_stream format stream on_problems max_columns=4096 related_file=Nothing =
handle_io_exception ~action = Panic.catch_java IOException action java_exception->
Error.throw (File.wrap_io_exception related_file java_exception)
handle_io_exception <|
read_stream format stream on_problems max_columns=default_max_columns related_file=Nothing =
handle_io_exception related_file <|
stream.with_stream_decoder format.encoding on_problems reporting_stream_decoder->
read_from_reader format reporting_stream_decoder on_problems max_columns
@ -93,6 +90,14 @@ read_stream format stream on_problems max_columns=4096 related_file=Nothing =
integer.
read_from_reader : Delimited -> Reader -> Problem_Behavior -> Integer -> Any
read_from_reader format java_reader on_problems max_columns=4096 =
handle_illegal_arguments <| handle_parsing_failure <| handle_parsing_exception <|
reader = prepare_delimited_reader java_reader format max_columns on_problems
result_with_problems = reader.read
parsing_problems = Vector.Vector (result_with_problems.problems) . map translate_reader_problem
on_problems.attach_problems_after (Table.Table result_with_problems.value) parsing_problems
## PRIVATE
prepare_delimited_reader java_reader format max_columns on_problems =
java_headers = case format.headers of
True -> DelimitedReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
Infer -> DelimitedReader.HeaderBehavior.INFER
@ -101,40 +106,21 @@ read_from_reader format java_reader on_problems max_columns=4096 =
Nothing -> -1
Integer -> format.row_limit
_ -> Error.throw (Illegal_Argument_Error "`row_limit` should be Integer or Nothing.")
translate_illegal_argument caught_panic =
Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage)
handle_illegal_arguments = Panic.catch IllegalArgumentException handler=translate_illegal_argument
translate_parsing_failure caught_panic =
Error.throw (translate_reader_problem caught_panic.payload.cause.problem)
handle_parsing_failure = Panic.catch ParsingFailedException handler=translate_parsing_failure
translate_parsing_exception caught_panic =
cause = caught_panic.payload.cause.getCause
if Java.is_instance cause IOException then Panic.throw cause else
Error.throw (Parser_Error caught_panic.payload)
handle_parsing_exception = Panic.catch TextParsingException handler=translate_parsing_exception
handle_illegal_arguments <| handle_parsing_failure <| handle_parsing_exception <|
warnings_as_errors = on_problems == Problem_Behavior_Module.Report_Error
quote_characters = case format.quote_style of
Quote_Style.No_Quotes -> Pair Nothing Nothing
Quote_Style.With_Quotes _ quote quote_escape -> Pair quote quote_escape
base_parser = case format.quote_style of
Quote_Style.No_Quotes -> IdentityParser.new
Quote_Style.With_Quotes _ quote _ ->
QuoteStrippingParser.new quote
value_parser = if format.value_formatter.is_nothing then base_parser else
wrapped = format.value_formatter.wrap_base_parser base_parser
TypeInferringParser.new format.value_formatter.get_specific_type_parsers.to_array wrapped
cell_type_guesser = if format.headers != Infer then Nothing else
formatter = format.value_formatter.if_nothing Data_Formatter
TypeInferringParser.new formatter.get_specific_type_parsers.to_array IdentityParser.new
reader = DelimitedReader.new java_reader format.delimiter quote_characters.first quote_characters.second java_headers format.skip_rows row_limit max_columns value_parser cell_type_guesser format.keep_invalid_rows warnings_as_errors
result_with_problems = reader.read
parsing_problems = Vector.Vector (result_with_problems.problems) . map translate_reader_problem
on_problems.attach_problems_after (Table.Table result_with_problems.value) parsing_problems
warnings_as_errors = on_problems == Problem_Behavior_Module.Report_Error
quote_characters = case format.quote_style of
Quote_Style.No_Quotes -> Pair Nothing Nothing
Quote_Style.With_Quotes _ quote quote_escape -> Pair quote quote_escape
base_parser = case format.quote_style of
Quote_Style.No_Quotes -> IdentityParser.new
Quote_Style.With_Quotes _ quote _ ->
QuoteStrippingParser.new quote
value_parser = if format.value_formatter.is_nothing then base_parser else
wrapped = format.value_formatter.wrap_base_parser base_parser
TypeInferringParser.new format.value_formatter.get_specific_type_parsers.to_array wrapped
cell_type_guesser = if format.headers != Infer then Nothing else
formatter = format.value_formatter.if_nothing Data_Formatter
TypeInferringParser.new formatter.get_specific_type_parsers.to_array IdentityParser.new
DelimitedReader.new java_reader format.delimiter quote_characters.first quote_characters.second java_headers format.skip_rows row_limit max_columns value_parser cell_type_guesser format.keep_invalid_rows warnings_as_errors
translate_reader_problem problem =
invalid_row = [InvalidRow, (java_problem-> Invalid_Row java_problem.source_row java_problem.table_index (Vector.Vector java_problem.row))]
@ -147,3 +133,65 @@ translate_reader_problem problem =
found = translations.find t-> Java.is_instance problem t.first
if found.is_error then problem else
found.second problem
## PRIVATE
An internal type representing columns deduced from an existing file.
type Detected_Headers
## Indicates that the file did not exist or was empty.
Nothing
## Represents the headers found in the file.
type Existing_Headers (column_names : Vector Text)
## Indicates that the file exists but no headers have been found, so only positional column matching is possible.
type No_Headers (column_count : Integer)
## PRIVATE
Reads the beginning of the file to detect the existing headers and column
count.
detect_headers : File -> File_Format.Delimited -> Detected_Headers
detect_headers file format =
on_problems = Ignore
result = handle_io_exception file <| handle_illegal_arguments <| handle_parsing_failure <| handle_parsing_exception <|
file.with_input_stream [File.Option.Read] stream->
stream.with_stream_decoder format.encoding on_problems java_reader->
## We use the default `max_columns` setting. If we want to be able to
read files with unlimited column limits (risking OutOfMemory
exceptions), we can catch the exception indicating the limit has been
reached and restart parsing with an increased limit.
reader = prepare_delimited_reader java_reader format max_columns=default_max_columns on_problems
defined_columns = reader.getDefinedColumnNames
case defined_columns of
Nothing ->
column_count = reader.getColumnCount
if column_count == 0 then Nothing else
No_Headers column_count
_ -> Existing_Headers (Vector.Vector defined_columns)
result.catch File.File_Not_Found (_->Nothing)
## PRIVATE
handle_illegal_arguments =
translate_illegal_argument caught_panic =
Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage)
Panic.catch IllegalArgumentException handler=translate_illegal_argument
## PRIVATE
handle_parsing_failure =
translate_parsing_failure caught_panic =
Error.throw (translate_reader_problem caught_panic.payload.cause.problem)
Panic.catch ParsingFailedException handler=translate_parsing_failure
## PRIVATE
handle_parsing_exception =
translate_parsing_exception caught_panic =
cause = caught_panic.payload.cause.getCause
if Java.is_instance cause IOException then Panic.throw cause else
Error.throw (Parser_Error caught_panic.payload)
Panic.catch TextParsingException handler=translate_parsing_exception
## PRIVATE
handle_io_exception related_file ~action = Panic.catch_java IOException action java_exception->
Error.throw (File.wrap_io_exception related_file java_exception)
## PRIVATE
default_max_columns = 4096

View File

@ -4,19 +4,22 @@ import Standard.Table
import Standard.Base.Error.Common as Errors
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior
import Standard.Base.System.File.Existing_File_Behavior
from Standard.Table.Error as Table_Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows
from Standard.Table.Errors as Table_Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows, Column_Count_Mismatch, Column_Name_Mismatch
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
from Standard.Table.Io.File_Format import Infer
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
import Standard.Table.Data.Storage
import Standard.Table.Io.Quote_Style
from Standard.Table.Internal.Delimited_Reader import Existing_Headers, No_Headers
import Standard.Table.Data.Match_Columns
polyglot java import org.enso.table.write.DelimitedWriter
polyglot java import org.enso.table.write.WriteQuoteBehavior
polyglot java import java.io.PrintWriter
polyglot java import java.io.IOException
polyglot java import org.enso.table.formatting.TextFormatter
polyglot java import org.enso.table.util.ColumnMapper
polyglot java import java.io.PrintWriter
polyglot java import java.io.StringWriter
polyglot java import java.io.IOException
## Writes a delimited file according to the provided format.
@ -29,15 +32,45 @@ polyglot java import java.io.StringWriter
operation. By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
write_file : Table -> File_Format.Delimited -> File -> Existing_File_Behavior -> Problem_Behavior -> Any
write_file table format file on_existing_file on_problems =
write_file : Table -> File_Format.Delimited -> File -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Any
write_file table format file on_existing_file match_columns on_problems =
case on_existing_file of
Existing_File_Behavior.Append ->
Errors.unimplemented "Appending to an existing File_Format.Delimited file is not implemented yet."
append_to_file table format file match_columns on_problems
_ ->
on_existing_file.write file stream->
write_to_stream table format stream on_problems related_file=file
## PRIVATE
Handles appending to an existing file, ensuring that the columns are matched
against the ones already in the file.
If the file does not exist or is empty, it acts like a regular overwrite.
append_to_file : Table -> File_Format.Delimited -> File -> Match_Columns -> Problem_Behavior -> Any
append_to_file table format file match_columns on_problems =
Column_Name_Mismatch.handle_java_exception <| Column_Count_Mismatch.handle_java_exception <|
preexisting_headers = Delimited_Reader.detect_headers file format
reordered_java_table = case preexisting_headers of
Nothing -> table.java_table
Existing_Headers column_names -> case match_columns of
Match_Columns.By_Name ->
ColumnMapper.mapColumnsByName table.java_table column_names.to_array
Match_Columns.By_Position ->
column_count = column_names.length
ColumnMapper.mapColumnsByPosition table.java_table column_count
No_Headers column_count -> case match_columns of
Match_Columns.By_Name ->
Error.throw (Illegal_Argument_Error "Cannot append by name when headers are not present in the existing data.")
Match_Columns.By_Position ->
ColumnMapper.mapColumnsByPosition table.java_table column_count
reordered_table = Table.Table reordered_java_table
writing_new_file = preexisting_headers == Nothing
amended_format = case writing_new_file && (should_write_headers format.headers) of
True -> format.with_headers
False -> format.without_headers
Existing_File_Behavior.Append.write file stream->
write_to_stream reordered_table amended_format stream on_problems related_file=file
## PRIVATE
Returns a Text value representing the table in the delimited format.
write_text : Table -> File_Format.Delimited -> Text
@ -95,10 +128,13 @@ write_to_writer table format java_writer =
quote_characters = case format.quote_style of
Quote_Style.No_Quotes -> Pair Nothing Nothing
Quote_Style.With_Quotes _ quote quote_escape -> Pair quote quote_escape
write_headers = case format.headers of
True -> True
Infer -> True
False -> False
write_headers = should_write_headers format.headers
new_line = '\n'
writer = DelimitedWriter.new java_writer column_formatters.to_array format.delimiter new_line quote_characters.first quote_characters.second quote_behavior write_headers
writer.write table.java_table
## PRIVATE
should_write_headers headers = case headers of
True -> True
Infer -> True
False -> False

View File

@ -1,6 +1,6 @@
from Standard.Base import all
from Standard.Table.Error as Table_Errors import Invalid_Format, Leading_Zeros
from Standard.Table.Errors as Table_Errors import Invalid_Format, Leading_Zeros
polyglot java import org.enso.table.parsing.problems.InvalidFormat
polyglot java import org.enso.table.parsing.problems.LeadingZeros

View File

@ -4,7 +4,7 @@ from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Prob
import Standard.Base.Runtime.Ref
import Standard.Table.Internal.Vector_Builder
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Too_Many_Column_Names_Provided, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Column_Matched_By_Multiple_Selectors
from Standard.Table.Errors as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Too_Many_Column_Names_Provided, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Column_Matched_By_Multiple_Selectors
type Problem_Builder
type Problem_Builder oob_indices duplicate_column_selectors input_indices_already_matched missing_input_columns other

View File

@ -6,7 +6,7 @@ import Standard.Base.Data.Ordering.Vector_Lexicographic_Order
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning
import Standard.Table.Data.Position
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Too_Many_Column_Names_Provided, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, No_Input_Columns_Selected
from Standard.Table.Errors as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Column_Selectors, Input_Indices_Already_Matched, Too_Many_Column_Names_Provided, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, No_Input_Columns_Selected
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Name, By_Index, By_Column
import Standard.Table.Data.Column_Name_Mapping
import Standard.Table.Internal.Unique_Name_Strategy

View File

@ -5,7 +5,7 @@ import Standard.Base.System.File.Option
from Standard.Table.Io.File_Format import Infer
import Standard.Table.Data.Table
from Standard.Table.Error as Error_Module import Invalid_Location, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch
from Standard.Table.Errors as Error_Module import Invalid_Location, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch
import Standard.Base.Error.Common as Errors
import Standard.Table.Data.Match_Columns
@ -17,8 +17,6 @@ polyglot java import org.enso.table.write.ExistingDataMode
polyglot java import org.enso.table.error.ExistingDataException
polyglot java import org.enso.table.error.RangeExceededException
polyglot java import org.enso.table.error.InvalidLocationException
polyglot java import org.enso.table.error.ColumnCountMismatchException
polyglot java import org.enso.table.error.ColumnNameMismatchException
polyglot java import java.lang.IllegalArgumentException
polyglot java import java.lang.IllegalStateException
@ -283,16 +281,6 @@ handle_writer ~writer =
throw_existing_data caught_panic = Error.throw (Existing_Data caught_panic.payload.cause.getMessage)
handle_existing_data = Panic.catch ExistingDataException handler=throw_existing_data
throw_column_count_mismatch caught_panic =
cause = caught_panic.payload.cause
Error.throw (Column_Count_Mismatch cause.getExpected cause.getActual)
handle_column_count_mismatch = Panic.catch ColumnCountMismatchException handler=throw_column_count_mismatch
throw_column_name_mismatch caught_panic =
cause = caught_panic.payload.cause
Error.throw (Column_Name_Mismatch (Vector.Vector cause.getMissing) (Vector.Vector cause.getExtras) cause.getMessage)
handle_column_name_mismatch = Panic.catch ColumnNameMismatchException handler=throw_column_name_mismatch
## Illegal argument can occur if appending in an invalid mode
illegal_argument caught_panic = Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage caught_panic.payload.cause)
handle_illegal_argument = Panic.catch IllegalArgumentException handler=illegal_argument
@ -301,5 +289,7 @@ handle_writer ~writer =
throw_illegal_state caught_panic = Panic.throw (Illegal_State_Error caught_panic.payload.cause.getMessage)
handle_illegal_state = Panic.catch IllegalStateException handler=throw_illegal_state
handle_illegal_state <| handle_column_name_mismatch <| handle_column_count_mismatch <| handle_bad_location <|
handle_illegal_argument <| handle_range_exceeded <| handle_existing_data <| writer
handle_illegal_state <| Column_Name_Mismatch.handle_java_exception <|
Column_Count_Mismatch.handle_java_exception <| handle_bad_location <|
handle_illegal_argument <| handle_range_exceeded <| handle_existing_data <|
writer

View File

@ -8,7 +8,7 @@ from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding
import Standard.Base.Runtime.Ref
import Standard.Table.Internal.Delimited_Reader
import Standard.Table.Internal.Delimited_Writer
from Standard.Table.Error as Table_Errors import Unsupported_File_Type
from Standard.Table.Errors as Table_Errors import Unsupported_File_Type
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
import Standard.Table.Io.Excel as Excel_Module
@ -118,8 +118,8 @@ type Delimited
## Implements the `Table.write` for this `File_Format`.
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> Nothing
write_table file table on_existing_file _ on_problems =
Delimited_Writer.write_file table self file on_existing_file on_problems
write_table file table on_existing_file match_columns on_problems =
Delimited_Writer.write_file table self file on_existing_file match_columns on_problems
## PRIVATE
Clone the instance with some properties overridden.

View File

@ -3,19 +3,12 @@ package org.enso.table.read;
import com.univocity.parsers.csv.CsvFormat;
import com.univocity.parsers.csv.CsvParser;
import com.univocity.parsers.csv.CsvParserSettings;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.enso.table.data.column.builder.string.StringStorageBuilder;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.StringStorage;
import org.enso.table.data.index.DefaultIndex;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.enso.table.problems.WithProblems;
import org.enso.table.parsing.DatatypeParser;
import org.enso.table.parsing.TypeInferringParser;
import org.enso.table.parsing.problems.AdditionalInvalidRows;
@ -23,8 +16,13 @@ import org.enso.table.parsing.problems.InvalidRow;
import org.enso.table.parsing.problems.MismatchedQuote;
import org.enso.table.parsing.problems.NoOpProblemAggregator;
import org.enso.table.problems.Problem;
import org.enso.table.problems.WithProblems;
import org.enso.table.util.NameDeduplicator;
import java.io.Reader;
import java.util.*;
import java.util.stream.Collectors;
/** A helper for reading delimited (CSV-like) files. */
public class DelimitedReader {
@ -203,13 +201,38 @@ public class DelimitedReader {
}
/**
* Reads the next row and updates the current line accordingly.
* Loads a next row from the CSV file.
*
* <p>This is an internal function that just loads the row but does not update the state nor take
* into consideration pending rows. The regular reading process should use {@code readNextRow}
* instead.
*/
private Row loadNextRow() {
long line = parser.getContext().currentLine() + 1;
String[] cells = parser.parseNext();
if (cells == null) return null;
return new Row(line, cells);
}
private record Row(long lineNumber, String[] cells) {}
private final Queue<Row> pendingRows = new ArrayDeque<>(2);
/**
* Reads the next row and updates the current line accordingly. It takes into consideration the
* pending rows that have already been loaded when inferring the headers but were still not
* processed.
*
* <p>Will return {@code null} if no more rows are available.
*/
private String[] readNextRow() {
currentLine = parser.getContext().currentLine() + 1;
return parser.parseNext();
Row row = pendingRows.isEmpty() ? loadNextRow() : pendingRows.remove();
if (row == null) {
return null;
}
currentLine = row.lineNumber;
return row.cells;
}
private void appendRow(String[] row) {
@ -280,69 +303,111 @@ public class DelimitedReader {
return parsed instanceof String;
}
/** Reads the input stream and returns a Table. */
public WithProblems<Table> read() {
/** The column names as defined in the input (if applicable, otherwise null). */
private String[] definedColumnNames = null;
/** The effective column names.
*
* If {@code GENERATE_HEADERS} is used or if {@code INFER} is used and no headers are found, this will be populated with automatically generated column names. */
private String[] effectiveColumnNames;
private List<Problem> headerProblems;
/** Returns the column names that are defined in the input.
*
* Will return {@code null} if {@code GENERATE_HEADERS} is used or if {@code INFER} is used and no headers were found inside of the file. */
public String[] getDefinedColumnNames() {
ensureHeadersDetected();
return definedColumnNames;
}
public int getColumnCount() {
ensureHeadersDetected();
return effectiveColumnNames.length;
}
private void ensureHeadersDetected() {
if (effectiveColumnNames == null) {
detectHeaders();
}
}
private void detectHeaders() {
skipFirstRows();
Row firstRow = loadNextRow();
if (firstRow == null) {
effectiveColumnNames = new String[0];
headerProblems = Collections.emptyList();
return;
}
int expectedColumnCount = firstRow.cells.length;
boolean wereHeadersDefined = false;
WithProblems<List<String>> headerNames;
String[] currentRow = readNextRow();
// Skip the first N rows.
for (long i = 0; currentRow != null && i < skipRows; ++i) {
currentRow = readNextRow();
}
// If there are no rows to even infer the headers, we return an empty table.
if (currentRow == null) {
return new WithProblems<>(new Table(new Column[0]), Collections.emptyList());
}
int expectedColumnCount = currentRow.length;
initBuilders(expectedColumnCount);
switch (headerBehavior) {
case INFER -> {
String[] firstRow = currentRow;
String[] secondRow = readNextRow();
Row secondRow = loadNextRow();
if (secondRow == null) {
// If there is only one row in the file, we generate the headers and stop further processing (as nothing more to process).
/** If there is only one row in the file, we generate the headers and
* stop further processing (as nothing more to process). */
headerNames = generateDefaultHeaders(expectedColumnCount);
appendRowIfLimitPermits(firstRow);
currentRow = null;
pendingRows.add(firstRow);
} else {
assert cellTypeGuesser != null;
boolean firstAllText = Arrays.stream(firstRow).allMatch(this::isPlainText);
boolean secondAllText = Arrays.stream(secondRow).allMatch(this ::isPlainText);
boolean firstAllText = Arrays.stream(firstRow.cells).allMatch(this::isPlainText);
boolean secondAllText = Arrays.stream(secondRow.cells).allMatch(this ::isPlainText);
boolean useFirstRowAsHeader = firstAllText && !secondAllText;
if (useFirstRowAsHeader) {
headerNames = headersFromRow(firstRow);
appendRowIfLimitPermits(secondRow);
headerNames = headersFromRow(firstRow.cells);
wereHeadersDefined = true;
pendingRows.add(secondRow);
} else {
headerNames = generateDefaultHeaders(expectedColumnCount);
appendRowIfLimitPermits(firstRow);
appendRowIfLimitPermits(secondRow);
pendingRows.add(firstRow);
pendingRows.add(secondRow);
}
currentRow = readNextRow();
}
}
case USE_FIRST_ROW_AS_HEADERS -> {
headerNames = headersFromRow(currentRow);
// We have 'used up' the first row, so we load a next one.
currentRow = readNextRow();
headerNames = headersFromRow(firstRow.cells);
wereHeadersDefined = true;
}
case GENERATE_HEADERS -> {
headerNames = generateDefaultHeaders(expectedColumnCount);
pendingRows.add(firstRow);
}
case GENERATE_HEADERS -> headerNames = generateDefaultHeaders(expectedColumnCount);
default -> throw new IllegalStateException("Impossible branch.");
}
while (currentRow != null && canFitMoreRows()) {
headerProblems = headerNames.problems();
effectiveColumnNames = headerNames.value().toArray(new String[0]);
if (wereHeadersDefined) {
definedColumnNames = effectiveColumnNames;
}
}
private void skipFirstRows() {
for (long i = 0; i < skipRows; ++i) {
loadNextRow();
}
}
/** Reads the input stream and returns a Table. */
public WithProblems<Table> read() {
ensureHeadersDetected();
initBuilders(getColumnCount());
while (canFitMoreRows()) {
var currentRow = readNextRow();
if (currentRow == null) break;
appendRow(currentRow);
currentRow = readNextRow();
}
parser.stopParsing();
Column[] columns = new Column[builders.length];
for (int i = 0; i < builders.length; i++) {
String columnName = headerNames.value().get(i);
String columnName = effectiveColumnNames[i];
StringStorage col = builders[i].seal();
WithProblems<Storage> parseResult = valueParser.parseColumn(columnName, col);
@ -353,7 +418,7 @@ public class DelimitedReader {
columns[i] = new Column(columnName, new DefaultIndex(storage.size()), storage);
}
return new WithProblems<>(new Table(columns), getReportedProblems(headerNames.problems()));
return new WithProblems<>(new Table(columns), getReportedProblems(headerProblems));
}
private void initBuilders(int count) {

1
test/Table_Tests/data/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.bak

View File

@ -5,7 +5,7 @@ from Standard.Table.Data.Column_Selector import By_Name, By_Index
import Standard.Table.Data.Sort_Column
import Standard.Table.Data.Sort_Column_Selector
from Standard.Table.Data.Aggregate_Column import all
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings
from Standard.Table.Errors as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
import Standard.Test

View File

@ -4,7 +4,7 @@ import Standard.Test.Problems
import Standard.Base.Error.Problem_Behavior
import Standard.Table.Data.Column_Name_Mapping
from Standard.Table.Error as Table_Errors import all
from Standard.Table.Errors as Table_Errors import all
from Standard.Table.Data.Column_Selector as Column_Selector_Module import all
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
from Standard.Table.Data.Position as Position_Module import all

View File

@ -4,7 +4,7 @@ from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encodi
import Standard.Table
import Standard.Table.Data.Column
from Standard.Table.Error import all
from Standard.Table.Errors import all
import Standard.Base.Data.Time.Date
import Standard.Base.Data.Time

View File

@ -13,7 +13,7 @@ from Standard.Table.Data.Aggregate_Column import all
from Standard.Database import all
from Standard.Database.Data.Sql import Sql_Type
from Standard.Table import No_Such_Column_Error
from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
from Standard.Table.Errors as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
spec =

View File

@ -8,7 +8,7 @@ import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
import Standard.Test
import Standard.Test.Problems
from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
from Standard.Table.Errors as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns
import project.Database.Helpers.Name_Generator
from Standard.Table.Data.Aggregate_Column import all

View File

@ -4,7 +4,7 @@ from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encodi
import Standard.Table
import Standard.Table.Data.Column
from Standard.Table.Error import all
from Standard.Table.Errors import all
import Standard.Table.Io.File_Read
from Standard.Table.Io.File_Format import Delimited
@ -129,6 +129,8 @@ spec =
'a,b,c\nd,e,f\r1,2,3'.write (path 'mixed.csv')
File.read (path 'mixed.csv') (Delimited "," headers=True value_formatter=Nothing) Problem_Behavior.Report_Error . should_fail_with Invalid_Row
['crlf.csv', 'lf.csv', 'cr.csv', 'mixed.csv'].each (path >> .delete)
Test.specify "should work with Windows-1252 encoding" <|
table = File.read (enso_project.data / "windows.csv") (Delimited "," headers=True encoding=Encoding.windows_1252) Problem_Behavior.Report_Error
table.columns.map .name . should_equal ['a', 'b', 'c']
@ -156,6 +158,7 @@ spec =
table.at 'c' . to_vector . should_equal ['\uFFFD(\uFFFD(']
problems_1 = [Encoding_Error "Encoding issues at bytes 13, 15."]
Problems.test_problem_handling action_1 problems_1 tester_1
utf8_file.delete
action_2 on_problems =
(enso_project.data / "utf16_invalid.csv").read (Delimited "," headers=True encoding=Encoding.utf_16_be) on_problems

View File

@ -1,18 +1,20 @@
from Standard.Base import all
import Standard.Base.Error.Problem_Behavior
import Standard.Base.System.File.Existing_File_Behavior
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
import Standard.Table
import Standard.Table.Data.Column
from Standard.Table.Error import all
import Standard.Base.Data.Time.Date
import Standard.Base.Data.Time.Time_Of_Day
import Standard.Table
import Standard.Table.Data.Column
from Standard.Table.Errors import all
import Standard.Table.Io.File_Read
from Standard.Table.Io.File_Format import Delimited
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
import Standard.Table.Io.Quote_Style
import Standard.Table.Data.Match_Columns
import Standard.Table.Data.Column_Name_Mapping
from Standard.Table.Errors as Table_Errors import Column_Count_Mismatch, Column_Name_Mismatch
import Standard.Test
import Standard.Test.Problems
@ -39,6 +41,16 @@ spec =
3,2.2,z,[[[My Type :: 10]]]
text = File.read_text file
text.should_equal expected_text+'\n'
file.delete
Test.specify "should be able to write an empty table" <|
table = Table.new []
file = (enso_project.data / "transient" / "empty.csv")
file.delete_if_exists
table.write file
text = File.read_text file
text.should_equal ''
file.delete
Test.specify 'should quote values that contain the delimiter or quotes, in the [,""] variant' <|
data_formatter = Data_Formatter decimal_point=","
@ -54,6 +66,7 @@ spec =
"one, two, three","-1,5"
text = File.read_text file
text.should_equal expected_text+'\n'
file.delete
Test.specify 'should quote values that contain the delimiter or quotes, in the [;\\\"] variant' <|
data_formatter = Data_Formatter thousand_separator="'"
@ -70,6 +83,7 @@ spec =
"a\\b";
text = File.read_text file
text.should_equal expected_text+'\n'
file.delete
Test.specify "should quote values that contain the delimiter or quotes, in the [\t''] variant" <|
data_formatter = Data_Formatter thousand_separator="'"
@ -85,6 +99,7 @@ spec =
'a\tb'\t-1.2
text = File.read_text file
text.should_equal expected_text+'\n'
file.delete
Test.specify "should correctly distinguish empty text from a missing value" <|
table = Table.new [["A", [1,Nothing,3]], ["B", [Nothing,"","abc"]]]
@ -98,6 +113,7 @@ spec =
3,abc
text = File.read_text file
text.should_equal expected_text+'\n'
file.delete
Test.specify 'should not quote values if quoting is disabled' <|
format = File_Format.Delimited "," value_formatter=(Data_Formatter decimal_point=",") . without_quotes
@ -113,6 +129,7 @@ spec =
one, two, three,-1,5
text = File.read_text file
text.should_equal expected_text+'\n'
file.delete
Test.specify 'should allow to always quote text and custom values, but for non-text primitves only if absolutely necessary' <|
format = File_Format.Delimited "," value_formatter=(Data_Formatter thousand_separator='"' date_formats=["E, d MMM y"]) . with_quotes always_quote=True quote_escape='\\'
@ -128,6 +145,7 @@ spec =
"one, two, three",-1.5,42,"4\"000",
text = File.read_text file
text.should_equal expected_text+'\n'
file.delete
Test.specify "should correctly handle alternative encodings" <|
table = Table.new [["ąęćś", [0]], ["ß", ["żółw 🐢"]]]
@ -139,6 +157,7 @@ spec =
0,żółw 🐢
text = File.read_text file encoding=Encoding.utf_16_be
text.should_equal expected_text+'\n'
file.delete
Test.specify "should correctly handle encoding errors" <|
table = Table.new [["A", [0, 1]], ["B", ["słówka", "🐢"]]]
@ -153,6 +172,7 @@ spec =
text.should_equal expected_text+'\n'
result . should_equal Nothing
Warning.get_all result . map .value . should_equal [Encoding_Error "Encoding issues at characters 7, 8, 15."]
file.delete
Test.specify "should allow only text columns if no formatter is specified" <|
format = File_Format.Delimited "," value_formatter=Nothing
@ -176,4 +196,132 @@ spec =
text_2 = File.read_text file_2
text_2.should_equal ""
file_1.delete
file_2.delete
Test.specify "should create a new file in append mode if it didn't exist" <|
table = Table.new [["A", [1,2,3]], ["B", [1.0,1.5,2.2]], ["C", ["x","y","z"]]]
file = (enso_project.data / "transient" / "append_nonexistent.csv")
file.delete_if_exists
table.write file on_existing_file=Existing_File_Behavior.Append
got_table = file.read
got_table.should_equal table
file.delete
Test.specify "should correctly append to an empty file" <|
table = Table.new [["A", [1,2,3]], ["B", [1.0,1.5,2.2]], ["C", ["x","y","z"]]]
file = (enso_project.data / "transient" / "append_empty.csv")
file.delete_if_exists
"".write file
table.write file on_existing_file=Existing_File_Behavior.Append
got_table = file.read
got_table.should_equal table
file.delete
Test.specify "should append to a file, matching columns by name (headers=Infer)" <|
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]], ["C", ["x","y"]]]
appending_table = Table.new [["B", [33,44]], ["A", [Nothing, 0]], ["C", ["a","BB"]]]
file = (enso_project.data / "transient" / "append_by_name.csv")
file.delete_if_exists
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite
appending_table.write file on_existing_file=Existing_File_Behavior.Append
got_table = file.read
expected_table = Table.new [["A", [1,2,Nothing,0]], ["B", [1.0,1.5,33,44]], ["C", ["x","y","a","BB"]]]
got_table.should_equal expected_table
file.delete
Test.specify "should append to a file, matching columns by name (headers=True)" <|
existing_table = Table.new [["0", [1,2]], ["B1", [1.0,1.5]], ["C", ["x","y"]]]
appending_table = Table.new [["B1", [33,44]], ["0", [Nothing, 0]], ["C", ["a","BB"]]]
file = (enso_project.data / "transient" / "append_by_name_2.csv")
file.delete_if_exists
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite
format = File_Format.Delimited "," . with_headers
appending_table.write file format on_existing_file=Existing_File_Behavior.Append
got_table = file.read format
expected_table = Table.new [["0", [1,2,Nothing,0]], ["B1", [1.0,1.5,33,44]], ["C", ["x","y","a","BB"]]]
got_table.should_equal expected_table
file.delete
Test.specify "should fail when appending and matching columns by name but column names are not available in the file (headers=Infer)" <|
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]], ["C", ["x","y"]]]
appending_table = Table.new [["B", [33,44]], ["A", [Nothing, 0]], ["C", ["a","BB"]]]
file = (enso_project.data / "transient" / "append_no_header.csv")
file.delete_if_exists
no_header_format = File_Format.Delimited "," . without_headers
existing_table.write file no_header_format on_existing_file=Existing_File_Behavior.Overwrite
appending_table.write file on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error
file.delete
Test.specify "should fail when appending and matching columns by name but headers are disabled (headers=False)" <|
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]], ["C", ["x","y"]]]
appending_table = Table.new [["B", [33,44]], ["A", [Nothing, 0]], ["C", ["a","BB"]]]
file = (enso_project.data / "transient" / "append_no_header.csv")
file.delete_if_exists
no_header_format = File_Format.Delimited "," . without_headers
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite
appending_table.write file no_header_format on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument_Error
file.delete
Test.specify "should fail on column mismatch when appending to a file by name" <|
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]]]
appending_table = Table.new [["B", [33,44]], ["X", [Nothing, 0]]]
file = (enso_project.data / "transient" / "append_no_header.csv")
file.delete_if_exists
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite
result = appending_table.write file on_existing_file=Existing_File_Behavior.Append
result . should_fail_with Column_Name_Mismatch
result.catch.missing . should_equal ["A"]
result.catch.extras . should_equal ["X"]
result.catch.to_display_text . should_equal "Columns mismatch. Missing from new data: [A] Extras in new data: [X]"
file.delete
Test.specify "should append to a file, matching columns by position" <|
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]], ["C", ["x","y"]]]
appending_table = Table.new [["AA", [33,44]], ["", [Nothing, 0]], ["hmmm", ["a","BB"]]]
test_append initial_file_format append_format expected_table =
file = (enso_project.data / "transient" / "append_by_position.csv")
file.delete_if_exists
existing_table.write file initial_file_format on_existing_file=Existing_File_Behavior.Overwrite
appending_table.write file append_format match_columns=Match_Columns.By_Position on_existing_file=Existing_File_Behavior.Append
read_format = initial_file_format
got_table = file.read read_format
got_table.should_equal expected_table
file.delete
base_format = File_Format.Delimited ","
no_headers = base_format . without_headers
with_headers = base_format . with_headers
expected_table_with_headers = Table.new [["A", [1,2,33,44]], ["B", [1.0,1.5,Nothing,0]], ["C", ["x","y","a","BB"]]]
expected_table_without_headers = expected_table_with_headers.rename_columns (Column_Name_Mapping.By_Position ["Column_1", "Column_2", "Column_3"])
test_append initial_file_format=with_headers append_format=no_headers expected_table_with_headers
test_append initial_file_format=with_headers append_format=base_format expected_table_with_headers
test_append initial_file_format=no_headers append_format=base_format expected_table_without_headers
test_append initial_file_format=no_headers append_format=no_headers expected_table_without_headers
Test.specify "should fail on column count mismatch when appending to a file by position" <|
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]], ["C", ["x","y"]]]
appending_table_1 = Table.new [["B", [33,44]], ["X", [Nothing, 0]]]
appending_table_2 = Table.new [["B", [33,44]], ["X", [Nothing, 0]], ["Y", ["a","BB"]], ["Z", [Nothing, 0]]]
file = (enso_project.data / "transient" / "append_mismatch.csv")
file.delete_if_exists
existing_table.write file on_existing_file=Existing_File_Behavior.Overwrite
result_1 = appending_table_1.write file match_columns=Match_Columns.By_Position on_existing_file=Existing_File_Behavior.Append
result_1 . should_fail_with Column_Count_Mismatch
result_1.catch.expected . should_equal 3
result_1.catch.actual . should_equal 2
result_1.catch.to_display_text . should_equal "Expected 3 columns, got 2."
result_2 = appending_table_2.write file match_columns=Match_Columns.By_Position on_existing_file=Existing_File_Behavior.Append
result_2 . should_fail_with Column_Count_Mismatch
result_2.catch.expected . should_equal 3
result_2.catch.actual . should_equal 4
result_2.catch.to_display_text . should_equal "Expected 3 columns, got 4."
file.delete
main = Test.Suite.run_main spec

View File

@ -10,7 +10,7 @@ import Standard.Table.Data.Column_Name_Mapping
import Standard.Table.Data.Match_Columns
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Index
from Standard.Table.Io.Excel import Excel_Range, Sheet_Names, Range_Names, Sheet, Cell_Range
from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch
from Standard.Table.Errors as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
import Standard.Test

View File

@ -2,7 +2,7 @@ from Standard.Base import all
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
import Standard.Table.Io.File_Read
import Standard.Table.Io.File_Format
from Standard.Table.Error import Unsupported_File_Type
from Standard.Table.Errors import Unsupported_File_Type
import Standard.Test
import Standard.Test.Problems

View File

@ -13,7 +13,7 @@ import Standard.Visualization
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
from Standard.Table.Data.Column_Type_Selection as Column_Type_Selection_Module import Column_Type_Selection, Auto
from Standard.Table.Error as Table_Errors import Invalid_Format, Leading_Zeros, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector
from Standard.Table.Errors as Table_Errors import Invalid_Format, Leading_Zeros, Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector
spec = Test.group "Table.parse_values" <|
Test.specify "should correctly parse integers" <|
@ -44,7 +44,7 @@ spec = Test.group "Table.parse_values" <|
t3 = t1.parse_values column_types=[Column_Type_Selection 0 Integer]
t3.at "ints" . to_vector . should_equal t1_parsed
Warning.get_all t3 . map .value . should_equal [Leading_Zeros "ints" Integer t1_zeros]
t4 = t1.parse_values column_types=[Column_Type_Selection 0 Decimal]
t4.at "ints" . to_vector . should_equal t1_parsed
Warning.get_all t4 . map .value . should_equal [Leading_Zeros "ints" Decimal t1_zeros]

View File

@ -10,7 +10,7 @@ import Standard.Table.Data.Storage
import Standard.Test
import Standard.Test.Problems
import Standard.Visualization
from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, No_Input_Columns_Selected, Missing_Input_Columns
from Standard.Table.Errors as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, No_Input_Columns_Selected, Missing_Input_Columns
import project.Common_Table_Spec

View File

@ -6,5 +6,5 @@ import Standard.Test
Table.Table.should_equal expected =
self_cols = self.columns
that_cols = expected.columns
self_cols.map .name . should_equal (that_cols.map .name)
self_cols.map .to_vector . should_equal (that_cols.map .to_vector)
self_cols.map .name . should_equal (that_cols.map .name) frames_to_skip=1
self_cols.map .to_vector . should_equal (that_cols.map .to_vector) frames_to_skip=1

View File

@ -1,6 +1,6 @@
from Standard.Base import all
from Standard.Table.Error as Error_Module import all
from Standard.Table.Errors as Error_Module import all
import Standard.Base.Error.Problem_Behavior
import Standard.Base.Data.Text.Matching
import Standard.Test