mirror of
https://github.com/enso-org/enso.git
synced 2024-11-23 08:08:34 +03:00
Remove duplicate Line_Ending_Style
and update defaults (#3597)
Implements https://www.pivotaltracker.com/story/show/182749831
This commit is contained in:
parent
c6d0843a2c
commit
ee91656f30
@ -644,6 +644,19 @@ type File
|
||||
resource = Managed_Resource.register stream close_stream
|
||||
Output_Stream self resource
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Reads last `n` bytes from the file (or less if the file is too small) and
|
||||
returns a vector of bytes.
|
||||
read_last_bytes : Integer -> Vector ! File_Error
|
||||
read_last_bytes n =
|
||||
handle_java_exceptions self <|
|
||||
Vector.Vector (self.read_last_bytes_builtin n)
|
||||
|
||||
## PRIVATE
|
||||
read_last_bytes_builtin : Integer -> Array
|
||||
read_last_bytes_builtin n = @Builtin_Method "File.read_last_bytes_builtin"
|
||||
|
||||
## Lists files contained in the directory denoted by this file.
|
||||
|
||||
Arguments:
|
||||
|
@ -112,8 +112,8 @@ type Delimited
|
||||
`Nothing` or dropping the excess columns) or dropped.
|
||||
- line_endings: Sets the line ending style to use. Defaults to `Infer` -
|
||||
when reading a file or appending to an existing file, the line endings
|
||||
are detected from file contents; when writing a new file, the OS
|
||||
defaults are used.
|
||||
are detected from file contents; when writing a new file in `Infer`
|
||||
mode the `Unix` line endings are used.
|
||||
- comment_character: Sets the character which indicates the start of a
|
||||
comment within a delimited file. Any line that begins with the comment
|
||||
character is skipped. The comment character is treated as any other
|
||||
|
@ -1,29 +0,0 @@
|
||||
from Standard.Base import Nothing
|
||||
from Standard.Table.IO.File_Format import Infer
|
||||
|
||||
## Specifies what line endings to use in a file format.
|
||||
type Line_Ending_Style
|
||||
## The line ending style is chosen automatically.
|
||||
|
||||
When reading a file or appending to an existing file, the line endings
|
||||
are detected from file contents. When writing a new file, the OS defaults
|
||||
are used.
|
||||
Infer
|
||||
|
||||
## The UNIX line endings.
|
||||
type Unix_Line_Endings
|
||||
|
||||
## The Windows line endings.
|
||||
type Windows_Line_Endings
|
||||
|
||||
## The classic Mac OS line endings. Used for legacy applications, as modern
|
||||
Mac OS uses the UNIX line endings.
|
||||
type Classic_Mac_Line_Endings
|
||||
|
||||
## PRIVATE
|
||||
line_separator_sequence : Line_Ending_Style -> Text
|
||||
line_separator_sequence line_endings = case line_endings of
|
||||
Unix_Line_Endings -> '\n'
|
||||
Windows_Line_Endings -> '\r\n'
|
||||
Classic_Mac_Line_Endings -> '\r'
|
||||
Infer -> Nothing
|
@ -1,15 +1,16 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Table
|
||||
|
||||
import Standard.Base.Data.Statistics
|
||||
import Standard.Base.Error.Common as Errors
|
||||
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Ignore
|
||||
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Ignore, Report_Error
|
||||
from Standard.Table.Errors as Table_Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Parser_Error, Additional_Invalid_Rows
|
||||
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
|
||||
from Standard.Table.IO.File_Format import Infer
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
import Standard.Table.IO.Quote_Style
|
||||
from Standard.Table.IO.Line_Ending_Style import line_separator_sequence
|
||||
|
||||
polyglot java import org.enso.base.encoding.NewlineDetector
|
||||
polyglot java import org.enso.table.read.DelimitedReader
|
||||
polyglot java import org.enso.table.read.ParsingFailedException
|
||||
polyglot java import org.enso.table.parsing.problems.InvalidRow
|
||||
@ -19,7 +20,6 @@ polyglot java import org.enso.table.util.problems.DuplicateNames
|
||||
polyglot java import org.enso.table.util.problems.InvalidNames
|
||||
polyglot java import java.io.IOException
|
||||
polyglot java import com.univocity.parsers.common.TextParsingException
|
||||
polyglot java import org.enso.base.Encoding_Utils
|
||||
polyglot java import java.io.InputStream
|
||||
polyglot java import java.io.Reader
|
||||
polyglot java import java.io.StringReader
|
||||
@ -97,7 +97,7 @@ read_from_reader format java_reader on_problems max_columns=4096 =
|
||||
on_problems.attach_problems_after (Table.Table result_with_problems.value) parsing_problems
|
||||
|
||||
## PRIVATE
|
||||
prepare_delimited_reader java_reader format max_columns on_problems =
|
||||
prepare_delimited_reader java_reader format max_columns on_problems newline_override=Nothing =
|
||||
java_headers = case format.headers of
|
||||
True -> DelimitedReader.HeaderBehavior.USE_FIRST_ROW_AS_HEADERS
|
||||
Infer -> DelimitedReader.HeaderBehavior.INFER
|
||||
@ -106,7 +106,7 @@ prepare_delimited_reader java_reader format max_columns on_problems =
|
||||
Nothing -> -1
|
||||
Integer -> format.row_limit
|
||||
_ -> Error.throw (Illegal_Argument_Error "`row_limit` should be Integer or Nothing.")
|
||||
warnings_as_errors = on_problems == Problem_Behavior_Module.Report_Error
|
||||
warnings_as_errors = on_problems == Report_Error
|
||||
quote_characters = case format.quote_style of
|
||||
Quote_Style.No_Quotes -> Pair Nothing Nothing
|
||||
Quote_Style.With_Quotes _ quote quote_escape -> Pair quote quote_escape
|
||||
@ -120,7 +120,9 @@ prepare_delimited_reader java_reader format max_columns on_problems =
|
||||
cell_type_guesser = if format.headers != Infer then Nothing else
|
||||
formatter = format.value_formatter.if_nothing Data_Formatter
|
||||
TypeInferringParser.new formatter.get_specific_type_parsers.to_array IdentityParser.new
|
||||
newline = line_separator_sequence format.line_endings
|
||||
newline = newline_override.if_nothing <| case format.line_endings of
|
||||
Infer -> Nothing
|
||||
endings -> endings.to_text
|
||||
DelimitedReader.new java_reader format.delimiter quote_characters.first quote_characters.second java_headers format.skip_rows row_limit max_columns value_parser cell_type_guesser format.keep_invalid_rows newline format.comment_character warnings_as_errors
|
||||
|
||||
translate_reader_problem problem =
|
||||
@ -147,11 +149,18 @@ type Detected_Headers
|
||||
## Indicates that the file exists but no headers have been found, so only positional column matching is possible.
|
||||
type No_Headers (column_count : Integer)
|
||||
|
||||
## PRIVATE
|
||||
An internal type representing metadata describing the format of a specific
|
||||
Delimited file.
|
||||
type Detected_File_Metadata
|
||||
type Detected_File_Metadata (headers : Detected_Headers) (line_separator : Text|Nothing)
|
||||
## PRIVATE
|
||||
An internal type representing metadata describing the format of a specific
|
||||
Delimited file.
|
||||
|
||||
Arguments:
|
||||
- headers: headers present in the file.
|
||||
- line_separator: line separator detected from file contents, if any.
|
||||
- ends_with_newline: specifies if the last line ends with a line
|
||||
separator that is consistent with the detected one.
|
||||
- has_any_content: specifies if the file contains any content.
|
||||
type Detected_File_Metadata (headers : Detected_Headers) (line_separator : Text|Nothing) (ends_with_newline : Boolean) (has_any_content : Boolean)
|
||||
|
||||
## PRIVATE
|
||||
Reads the beginning of the file to detect the existing headers and column
|
||||
@ -160,13 +169,18 @@ detect_metadata : File -> File_Format.Delimited -> Detected_Headers
|
||||
detect_metadata file format =
|
||||
on_problems = Ignore
|
||||
result = handle_io_exception file <| Illegal_Argument_Error.handle_java_exception <| handle_parsing_failure <| handle_parsing_exception <|
|
||||
trailing_line_separator = newline_at_eof file format.encoding
|
||||
has_trailing_line_separator = trailing_line_separator.is_nothing.not
|
||||
file.with_input_stream [File.Option.Read] stream->
|
||||
stream.with_stream_decoder format.encoding on_problems java_reader->
|
||||
## We don't need to close this one, as closing the parent stream
|
||||
will suffice.
|
||||
newline_detecting_reader = NewlineDetector.new java_reader
|
||||
## We use the default `max_columns` setting. If we want to be able to
|
||||
read files with unlimited column limits (risking OutOfMemory
|
||||
exceptions), we can catch the exception indicating the limit has been
|
||||
reached and restart parsing with an increased limit.
|
||||
reader = prepare_delimited_reader java_reader format max_columns=default_max_columns on_problems
|
||||
reader = prepare_delimited_reader newline_detecting_reader format max_columns=default_max_columns on_problems newline_override=trailing_line_separator
|
||||
defined_columns = reader.getDefinedColumnNames
|
||||
headers = case defined_columns of
|
||||
Nothing ->
|
||||
@ -174,9 +188,34 @@ detect_metadata file format =
|
||||
if column_count == 0 then Nothing else
|
||||
No_Headers column_count
|
||||
_ -> Existing_Headers (Vector.Vector defined_columns)
|
||||
line_separator = reader.getEffectiveLineSeparator
|
||||
Detected_File_Metadata headers line_separator
|
||||
result.catch File.File_Not_Found (_->(Detected_File_Metadata Nothing Nothing))
|
||||
line_separator_from_parser = reader.getEffectiveLineSeparator
|
||||
has_seen_newline = newline_detecting_reader.newlineEncountered
|
||||
## If the parser has seen a newline, we can trust that it
|
||||
detected the newline correctly. However if it has not, we
|
||||
cannot trust it as it tends to just fall back to the system
|
||||
default which is wrong. Thus we return the trailing line
|
||||
separator (which may be `Nothing`).
|
||||
effective_line_separator = case has_seen_newline of
|
||||
True -> line_separator_from_parser
|
||||
False -> trailing_line_separator
|
||||
has_any_content = reader.getVisitedCharactersCount > 0
|
||||
Detected_File_Metadata headers effective_line_separator has_trailing_line_separator has_any_content
|
||||
result.catch File.File_Not_Found (_->(Detected_File_Metadata Nothing Nothing False False))
|
||||
|
||||
## PRIVATE
|
||||
Checks if the file has a newline at the end.
|
||||
|
||||
Returns the newline sequence if found, `Nothing` otherwise.
|
||||
newline_at_eof : File -> Encoding -> Text|Nothing
|
||||
newline_at_eof file encoding =
|
||||
newlines = ['\r\n', '\n', '\r']
|
||||
newline_bytes = newlines.map (x-> x.bytes encoding Report_Error)
|
||||
most_bytes = newline_bytes.map .length . compute Statistics.Maximum
|
||||
file_last_bytes = file.read_last_bytes most_bytes
|
||||
result = newlines.zip newline_bytes . find pair->
|
||||
bytes = pair.second
|
||||
bytes == (file_last_bytes.take_end bytes.length)
|
||||
result.first . catch Nothing
|
||||
|
||||
## PRIVATE
|
||||
handle_parsing_failure =
|
||||
|
@ -11,7 +11,7 @@ from Standard.Table.IO.File_Format import Infer
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
import Standard.Table.Data.Storage
|
||||
import Standard.Table.IO.Quote_Style
|
||||
from Standard.Table.IO.Line_Ending_Style import line_separator_sequence
|
||||
import Standard.Base.Data.Text.Line_Ending_Style
|
||||
from Standard.Table.Internal.Delimited_Reader import Existing_Headers, No_Headers
|
||||
import Standard.Table.Data.Match_Columns
|
||||
|
||||
@ -54,16 +54,17 @@ append_to_file table format file match_columns on_problems =
|
||||
inferring_format = format.with_line_endings Infer
|
||||
metadata = Delimited_Reader.detect_metadata file inferring_format
|
||||
preexisting_headers = metadata.headers
|
||||
case format.line_endings of
|
||||
Infer -> Nothing
|
||||
effective_line_separator = case format.line_endings of
|
||||
Infer -> metadata.line_separator.if_nothing default_line_separator_for_writing.to_text
|
||||
other_ending_style ->
|
||||
selected_separator = line_separator_sequence other_ending_style
|
||||
selected_separator = other_ending_style.to_text
|
||||
existing_separator = metadata.line_separator
|
||||
if selected_separator != existing_separator then
|
||||
if existing_separator.is_nothing.not && (selected_separator != existing_separator) then
|
||||
Panic.throw <| Illegal_Argument_Error <|
|
||||
# Ensure that these are properly escaped once `to_text` meaning is changed.
|
||||
"The explicitly provided line endings (" + selected_separator.to_text + ") do not match the line endings in the file (" + existing_separator.to_text + ")."
|
||||
effective_line_separator = metadata.line_separator
|
||||
other_ending_style.to_text
|
||||
|
||||
reordered_java_table = case preexisting_headers of
|
||||
Nothing -> table.java_table
|
||||
Existing_Headers column_names -> case match_columns of
|
||||
@ -82,8 +83,10 @@ append_to_file table format file match_columns on_problems =
|
||||
amended_format = case writing_new_file && (should_write_headers format.headers) of
|
||||
True -> format.with_headers
|
||||
False -> format.without_headers
|
||||
needs_leading_newline =
|
||||
metadata.has_any_content && metadata.ends_with_newline.not
|
||||
Existing_File_Behavior.Append.write file stream->
|
||||
write_to_stream reordered_table amended_format stream on_problems related_file=file separator_override=effective_line_separator
|
||||
write_to_stream reordered_table amended_format stream on_problems related_file=file separator_override=effective_line_separator needs_leading_newline=needs_leading_newline
|
||||
|
||||
## PRIVATE
|
||||
Returns a Text value representing the table in the delimited format.
|
||||
@ -108,14 +111,14 @@ write_text table format =
|
||||
or `Nothing`. It is used for more detailed error reporting.
|
||||
- separator_override: An optional override for the line separator to use
|
||||
instead of the one from `format`.
|
||||
write_to_stream : Table -> File_Format.Delimited -> Output_Stream -> Problem_Behavior -> File | Nothing -> Text | Nothing -> Any
|
||||
write_to_stream table format stream on_problems related_file=Nothing separator_override=Nothing =
|
||||
write_to_stream : Table -> File_Format.Delimited -> Output_Stream -> Problem_Behavior -> File | Nothing -> Text | Nothing -> Boolean -> Any
|
||||
write_to_stream table format stream on_problems related_file=Nothing separator_override=Nothing needs_leading_newline=False =
|
||||
handle_io_exception ~action = Panic.catch IOException action caught_panic->
|
||||
Error.throw (File.wrap_io_exception related_file caught_panic.payload.cause)
|
||||
|
||||
handle_io_exception <|
|
||||
stream.with_stream_encoder format.encoding on_problems reporting_stream_encoder->
|
||||
write_to_writer table format reporting_stream_encoder separator_override=separator_override
|
||||
write_to_writer table format reporting_stream_encoder separator_override=separator_override needs_leading_newline=needs_leading_newline
|
||||
|
||||
## PRIVATE
|
||||
Writes data to the provided `Writer` according to the provided format.
|
||||
@ -129,8 +132,8 @@ write_to_stream table format stream on_problems related_file=Nothing separator_o
|
||||
- java_writer: A Java `Writer` to which characters will be written.
|
||||
- separator_override: An optional override for the line separator to use
|
||||
instead of the one from `format`.
|
||||
write_to_writer : Table -> File_Format.Delimited -> Writer -> Text | Nothing -> Any
|
||||
write_to_writer table format java_writer separator_override=Nothing =
|
||||
write_to_writer : Table -> File_Format.Delimited -> Writer -> Text | Nothing -> Boolean -> Any
|
||||
write_to_writer table format java_writer separator_override=Nothing needs_leading_newline=False =
|
||||
column_formatters = Panic.recover Illegal_Argument_Error <| case format.value_formatter of
|
||||
Nothing -> table.columns.map column-> case column.storage_type of
|
||||
Storage.Text -> TextFormatter.new
|
||||
@ -148,9 +151,12 @@ write_to_writer table format java_writer separator_override=Nothing =
|
||||
Quote_Style.With_Quotes _ quote quote_escape -> Pair quote quote_escape
|
||||
write_headers = should_write_headers format.headers
|
||||
newline = separator_override.if_nothing <|
|
||||
separator_from_format = line_separator_sequence format.line_endings
|
||||
separator_from_format.if_nothing System.default_line_separator
|
||||
writer = DelimitedWriter.new java_writer column_formatters.to_array format.delimiter newline quote_characters.first quote_characters.second quote_behavior write_headers
|
||||
case format.line_endings of
|
||||
Infer -> default_line_separator_for_writing.to_text
|
||||
endings -> endings.to_text
|
||||
if needs_leading_newline then
|
||||
java_writer.write newline
|
||||
writer = DelimitedWriter.new java_writer column_formatters.to_array format.delimiter newline quote_characters.first quote_characters.second format.comment_character quote_behavior write_headers
|
||||
writer.write table.java_table
|
||||
|
||||
## PRIVATE
|
||||
@ -158,3 +164,8 @@ should_write_headers headers = case headers of
|
||||
True -> True
|
||||
Infer -> True
|
||||
False -> False
|
||||
|
||||
## PRIVATE
|
||||
The default line separator used for newly written delimited files, if no
|
||||
specific separator has been set.
|
||||
default_line_separator_for_writing = Line_Ending_Style.Unix
|
||||
|
@ -733,8 +733,8 @@ run_spec ~behavior =
|
||||
case ex of
|
||||
Failure _ -> ex
|
||||
Finished_With_Error err stack_trace_text ->
|
||||
Failure ("An unexpected error was returned: " + err.to_text + '\n' + stack_trace_text)
|
||||
_ -> Failure ("An unexpected panic was thrown: " + ex.to_text + '\n' + maybeExc.get_stack_trace_text)
|
||||
Failure ("An unexpected error was returned: " + err.to_display_text + '\n' + stack_trace_text)
|
||||
_ -> Failure ("An unexpected panic was thrown: " + ex.to_display_text + '\n' + maybeExc.get_stack_trace_text)
|
||||
result
|
||||
|
||||
## PRIVATE
|
||||
|
@ -0,0 +1,41 @@
|
||||
package org.enso.interpreter.runtime.data;
|
||||
|
||||
import com.oracle.truffle.api.interop.InteropLibrary;
|
||||
import com.oracle.truffle.api.interop.TruffleObject;
|
||||
import com.oracle.truffle.api.library.ExportLibrary;
|
||||
import com.oracle.truffle.api.library.ExportMessage;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
@ExportLibrary(InteropLibrary.class)
|
||||
public final class ArrayOverBuffer implements TruffleObject {
|
||||
private final ByteBuffer buffer;
|
||||
|
||||
private ArrayOverBuffer(ByteBuffer buffer) {
|
||||
this.buffer = buffer;
|
||||
}
|
||||
|
||||
@ExportMessage
|
||||
Object readArrayElement(long index) {
|
||||
return (long) buffer.get(buffer.position() + Math.toIntExact(index));
|
||||
}
|
||||
|
||||
@ExportMessage
|
||||
boolean hasArrayElements() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@ExportMessage
|
||||
boolean isArrayElementReadable(long index) {
|
||||
return index >= 0 && index < getArraySize();
|
||||
}
|
||||
|
||||
@ExportMessage
|
||||
long getArraySize() {
|
||||
return buffer.remaining();
|
||||
}
|
||||
|
||||
public static ArrayOverBuffer wrapBuffer(ByteBuffer buffer) {
|
||||
return new ArrayOverBuffer(buffer);
|
||||
}
|
||||
}
|
@ -17,8 +17,11 @@ import org.enso.interpreter.runtime.library.dispatch.MethodDispatchLibrary;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.SeekableByteChannel;
|
||||
import java.nio.file.CopyOption;
|
||||
import java.nio.file.OpenOption;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.nio.file.attribute.PosixFilePermission;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.ZonedDateTime;
|
||||
@ -51,6 +54,23 @@ public class EnsoFile implements TruffleObject {
|
||||
return this.truffleFile.newInputStream(opts);
|
||||
}
|
||||
|
||||
@Builtin.Method(name = "read_last_bytes_builtin")
|
||||
@Builtin.WrapException(from = IOException.class, to = PolyglotError.class, propagate = true)
|
||||
public ArrayOverBuffer readLastBytes(long n) throws IOException {
|
||||
try (SeekableByteChannel channel =
|
||||
this.truffleFile.newByteChannel(Set.of(StandardOpenOption.READ))) {
|
||||
int bytesToRead = Math.toIntExact(Math.min(channel.size(), n));
|
||||
channel.position(channel.size() - bytesToRead);
|
||||
ByteBuffer buffer = ByteBuffer.allocate(bytesToRead);
|
||||
while (buffer.hasRemaining()) {
|
||||
channel.read(buffer);
|
||||
}
|
||||
|
||||
buffer.flip();
|
||||
return ArrayOverBuffer.wrapBuffer(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
@Builtin.Method(name = "resolve")
|
||||
@Builtin.Specialize
|
||||
public EnsoFile resolve(String subPath) {
|
||||
|
@ -41,6 +41,7 @@ import org.enso.polyglot.data.TypeGraph;
|
||||
UnresolvedConversion.class,
|
||||
UnresolvedSymbol.class,
|
||||
Array.class,
|
||||
ArrayOverBuffer.class,
|
||||
EnsoBigInteger.class,
|
||||
ManagedResource.class,
|
||||
ModuleScope.class,
|
||||
@ -128,7 +129,7 @@ public class Types {
|
||||
return Constants.UNRESOLVED_SYMBOL;
|
||||
} else if (TypesGen.isManagedResource(value)) {
|
||||
return ConstantsGen.MANAGED_RESOURCE;
|
||||
} else if (TypesGen.isArray(value)) {
|
||||
} else if (TypesGen.isArray(value) || TypesGen.isArrayOverBuffer(value)) {
|
||||
return ConstantsGen.ARRAY;
|
||||
} else if (TypesGen.isModuleScope(value)) {
|
||||
return Constants.MODULE_SCOPE;
|
||||
|
@ -40,6 +40,7 @@ public record TypeWithKind(String baseType, TypeKind kind) {
|
||||
List.of(
|
||||
"org.enso.interpreter.runtime.callable.atom.Atom",
|
||||
"org.enso.interpreter.runtime.data.Array",
|
||||
"org.enso.interpreter.runtime.data.ArrayOverBuffer",
|
||||
"org.enso.interpreter.runtime.data.EnsoFile",
|
||||
"org.enso.interpreter.runtime.data.EnsoDate",
|
||||
"org.enso.interpreter.runtime.data.ManagedResource",
|
||||
|
@ -0,0 +1,41 @@
|
||||
package org.enso.base.encoding;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/** A reader that wraps another reader and checks if a newline character has been encountered. */
|
||||
public class NewlineDetector extends Reader {
|
||||
private final Reader underlying;
|
||||
private boolean newlineEncountered = false;
|
||||
|
||||
public NewlineDetector(Reader underlying) {
|
||||
this.underlying = underlying;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(char[] cbuf, int off, int len) throws IOException {
|
||||
int read = underlying.read(cbuf, off, len);
|
||||
|
||||
if (!newlineEncountered) {
|
||||
for (int i = 0; i < read; ++i) {
|
||||
char c = cbuf[off + i];
|
||||
if (c == '\n' || c == '\r') {
|
||||
newlineEncountered = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return read;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
underlying.close();
|
||||
}
|
||||
|
||||
/** Checks if a newline character has been encountered within data that has been read so far. */
|
||||
public boolean newlineEncountered() {
|
||||
return newlineEncountered;
|
||||
}
|
||||
}
|
@ -41,7 +41,7 @@ public class DelimitedReader {
|
||||
private final DatatypeParser valueParser;
|
||||
private final TypeInferringParser cellTypeGuesser;
|
||||
private final boolean keepInvalidRows;
|
||||
private final String newlineSetting;
|
||||
private String newlineSetting;
|
||||
private final boolean warningsAsErrors;
|
||||
private final NoOpProblemAggregator noOpProblemAggregator = new NoOpProblemAggregator();
|
||||
private long invalidRowsCount = 0;
|
||||
@ -353,18 +353,22 @@ public class DelimitedReader {
|
||||
return effectiveColumnNames.length;
|
||||
}
|
||||
|
||||
/** Returns the line separator used in the file.
|
||||
/** Returns the line separator.
|
||||
*
|
||||
* If a specific separator is set at construction, it is just returned. If it
|
||||
* was set to null, the separator inferred from the file contents is returned.
|
||||
* If it was provided explicitly at construction, the selected separator is used.
|
||||
* If the initial separator was set to {@code null}, the reader tries to detect
|
||||
* the separator from file contents.
|
||||
*/
|
||||
public String getEffectiveLineSeparator() {
|
||||
if (newlineSetting != null) {
|
||||
return newlineSetting;
|
||||
} else {
|
||||
if (newlineSetting == null) {
|
||||
ensureHeadersDetected();
|
||||
return parser.getDetectedFormat().getLineSeparatorString();
|
||||
}
|
||||
return newlineSetting;
|
||||
}
|
||||
|
||||
public long getVisitedCharactersCount() {
|
||||
ensureHeadersDetected();
|
||||
return parser.getContext().currentChar();
|
||||
}
|
||||
|
||||
private void ensureHeadersDetected() {
|
||||
@ -375,6 +379,12 @@ public class DelimitedReader {
|
||||
|
||||
private void detectHeaders() {
|
||||
Row firstRow = loadNextRow();
|
||||
|
||||
// Resolve the newline separator:
|
||||
if (newlineSetting == null) {
|
||||
newlineSetting = parser.getDetectedFormat().getLineSeparatorString();
|
||||
}
|
||||
|
||||
if (firstRow == null) {
|
||||
effectiveColumnNames = new String[0];
|
||||
headerProblems = Collections.emptyList();
|
||||
@ -389,7 +399,7 @@ public class DelimitedReader {
|
||||
case INFER -> {
|
||||
Row secondRow = loadNextRow();
|
||||
if (secondRow == null) {
|
||||
/** If there is only one row in the file, we generate the headers and
|
||||
/* If there is only one row in the file, we generate the headers and
|
||||
* stop further processing (as nothing more to process). */
|
||||
headerNames = generateDefaultHeaders(expectedColumnCount);
|
||||
pendingRows.add(firstRow);
|
||||
|
@ -21,6 +21,8 @@ public class DelimitedWriter {
|
||||
private final char quoteChar;
|
||||
private final char quoteEscapeChar;
|
||||
|
||||
private final char commentChar;
|
||||
|
||||
private final String quoteReplacement;
|
||||
|
||||
private final String quoteEscapeReplacement;
|
||||
@ -35,6 +37,7 @@ public class DelimitedWriter {
|
||||
String newline,
|
||||
String quote,
|
||||
String quoteEscape,
|
||||
String comment,
|
||||
WriteQuoteBehavior writeQuoteBehavior,
|
||||
boolean writeHeaders) {
|
||||
this.newline = newline;
|
||||
@ -97,6 +100,17 @@ public class DelimitedWriter {
|
||||
quoteEscapeReplacement = null;
|
||||
}
|
||||
|
||||
if (comment != null) {
|
||||
if (comment.length() != 1) {
|
||||
throw new IllegalArgumentException(
|
||||
"The comment character must consist of exactly 1 codepoint.");
|
||||
}
|
||||
|
||||
commentChar = comment.charAt(0);
|
||||
} else {
|
||||
commentChar = '\0';
|
||||
}
|
||||
|
||||
this.writeQuoteBehavior = writeQuoteBehavior;
|
||||
this.writeHeaders = writeHeaders;
|
||||
emptyValue = this.quote + "" + this.quote;
|
||||
@ -172,7 +186,11 @@ public class DelimitedWriter {
|
||||
boolean containsQuote = value.indexOf(quoteChar) >= 0;
|
||||
boolean containsQuoteEscape = quoteEscape != null && value.indexOf(quoteEscapeChar) >= 0;
|
||||
boolean shouldQuote =
|
||||
wantsQuoting || containsQuote || containsQuoteEscape || value.indexOf(delimiter) >= 0;
|
||||
wantsQuoting
|
||||
|| containsQuote
|
||||
|| containsQuoteEscape
|
||||
|| value.indexOf(delimiter) >= 0
|
||||
|| value.indexOf(commentChar) >= 0;
|
||||
if (!shouldQuote) {
|
||||
return value;
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ import Standard.Table.IO.File_Read
|
||||
from Standard.Table.IO.File_Format import Delimited
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
import Standard.Table.IO.Quote_Style
|
||||
from Standard.Table.IO.Line_Ending_Style import all
|
||||
import Standard.Base.Data.Text.Line_Ending_Style
|
||||
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
@ -144,17 +144,17 @@ spec =
|
||||
['a', 'b', 'c\nd', 'e', 'f\n1', 2, 3].map_with_index i-> v->
|
||||
["Column_" + (i+1).to_text, [v]]
|
||||
File.read file format . should_equal reference_table
|
||||
File.read file (format.with_line_endings Unix_Line_Endings) . should_equal reference_table
|
||||
File.read file (format.with_line_endings Classic_Mac_Line_Endings) . should_equal collapsed_table
|
||||
File.read file (format.with_line_endings Windows_Line_Endings) . should_equal collapsed_table
|
||||
File.read file (format.with_line_endings Line_Ending_Style.Unix) . should_equal reference_table
|
||||
File.read file (format.with_line_endings Line_Ending_Style.Mac_Legacy) . should_equal collapsed_table
|
||||
File.read file (format.with_line_endings Line_Ending_Style.Windows) . should_equal collapsed_table
|
||||
file.delete
|
||||
|
||||
file_2 = enso_project.data / "transient" / "crlf.csv"
|
||||
lines.join '\r\n' . write file_2
|
||||
File.read file_2 (format.with_line_endings Windows_Line_Endings) . should_equal reference_table
|
||||
File.read file_2 (format.with_line_endings Line_Ending_Style.Windows) . should_equal reference_table
|
||||
|
||||
# For some reason loading the CRLF file in Unix mode trims the CR characters. We may want to revisit this at some point.
|
||||
table = File.read file_2 (format.with_line_endings Unix_Line_Endings)
|
||||
table = File.read file_2 (format.with_line_endings Line_Ending_Style.Unix)
|
||||
table . should_equal reference_table
|
||||
file_2.delete
|
||||
|
||||
@ -399,6 +399,6 @@ spec =
|
||||
Delimited ',' . with_comments . should_equal (Delimited ',' comment_character='#')
|
||||
Delimited ',' . with_comments ';' . should_equal (Delimited ',' comment_character=';')
|
||||
Delimited ',' comment_character='#' . without_comments . should_equal (Delimited ',' comment_character=Nothing)
|
||||
Delimited ',' . with_line_endings Unix_Line_Endings . should_equal (Delimited ',' line_endings=Unix_Line_Endings)
|
||||
Delimited ',' . with_line_endings Line_Ending_Style.Unix . should_equal (Delimited ',' line_endings=Line_Ending_Style.Unix)
|
||||
|
||||
main = Test.Suite.run_main spec
|
||||
|
@ -2,6 +2,7 @@ from Standard.Base import all
|
||||
from Standard.Base.Error.Problem_Behavior import all
|
||||
import Standard.Base.System.File.Existing_File_Behavior
|
||||
from Standard.Base.Data.Text.Encoding as Encoding_Module import Encoding, Encoding_Error
|
||||
import Standard.Base.Data.Text.Line_Ending_Style
|
||||
import Standard.Base.Data.Time.Date
|
||||
import Standard.Base.Data.Time.Time_Of_Day
|
||||
import Standard.Base.System
|
||||
@ -13,9 +14,9 @@ import Standard.Table.IO.File_Read
|
||||
from Standard.Table.IO.File_Format import Delimited
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
import Standard.Table.IO.Quote_Style
|
||||
from Standard.Table.IO.Line_Ending_Style import all
|
||||
import Standard.Table.Data.Match_Columns
|
||||
import Standard.Table.Data.Column_Name_Mapping
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name
|
||||
from Standard.Table.Errors as Table_Errors import Column_Count_Mismatch, Column_Name_Mismatch
|
||||
|
||||
import Standard.Test
|
||||
@ -29,9 +30,13 @@ type My_Type
|
||||
to_text : Text
|
||||
to_text = "[[[My Type :: " + self.x.to_text + "]]]"
|
||||
|
||||
default_line_endings_for_new_files = Line_Ending_Style.Unix
|
||||
join_lines lines trailing_newline=True =
|
||||
eol = default_line_endings_for_new_files.to_text
|
||||
if trailing_newline then lines.join eol suffix=eol else lines.join eol
|
||||
|
||||
spec =
|
||||
line_ending_pairs = [[Unix_Line_Endings, '\n'], [Windows_Line_Endings, '\r\n'], [Classic_Mac_Line_Endings, '\r']]
|
||||
system_separator = if System.os == "windows" then '\r\n' else '\n'
|
||||
line_ending_pairs = [[Line_Ending_Style.Unix, '\n'], [Line_Ending_Style.Windows, '\r\n'], [Line_Ending_Style.Mac_Legacy, '\r']]
|
||||
Test.group "Delimited File Writing" <|
|
||||
Test.specify "should correctly write a simple table" <|
|
||||
table = Table.new [["A", [1,2,3]], ["B", [1.0,1.5,2.2]], ["C", ["x","y","z"]], ["D", ["a", 2, My_Type 10]]]
|
||||
@ -131,6 +136,27 @@ spec =
|
||||
text.should_equal expected_text
|
||||
file.delete
|
||||
|
||||
Test.specify "should quote values containing the comment symbol if comments are enabled" <|
|
||||
table = Table.new [["#", ['b', 'x', '#']], ["B", [Nothing,"#","abc"]]]
|
||||
file = (enso_project.data / "transient" / "comments.csv")
|
||||
file.delete_if_exists
|
||||
table.write file on_problems=Report_Error . should_succeed
|
||||
expected_text = join_lines ['#,B','b,', 'x,#', '#,abc']
|
||||
text = File.read_text file
|
||||
text.should_equal expected_text
|
||||
file.delete
|
||||
|
||||
format = File_Format.Delimited ',' . with_comments
|
||||
table.write file format on_problems=Report_Error . should_succeed
|
||||
expected_text_2 = normalize_lines <| """
|
||||
"#",B
|
||||
b,
|
||||
x,"#"
|
||||
"#",abc
|
||||
text_2 = File.read_text file
|
||||
text_2.should_equal expected_text_2
|
||||
file.delete
|
||||
|
||||
Test.specify 'should not quote values if quoting is disabled' <|
|
||||
format = File_Format.Delimited "," value_formatter=(Data_Formatter decimal_point=",") . without_quotes
|
||||
table = Table.new [['The Column "Name"', ["foo","'bar'",'"baz"', 'one, two, three']], ["Hello, Column?", [1.0, 1000000.5, 2.2, -1.5]]]
|
||||
@ -187,8 +213,7 @@ spec =
|
||||
text = File.read_text file encoding=Encoding.ascii
|
||||
text.should_equal expected_text
|
||||
result . should_equal Nothing
|
||||
sep_length = System.default_line_separator.codepoints.length
|
||||
positions = [6 + sep_length, 7 + sep_length, 13 + 2*sep_length]
|
||||
positions = [7, 8, 15]
|
||||
msg = "Encoding issues at codepoints " +
|
||||
positions.map .to_text . join separator=", " suffix="."
|
||||
Warning.get_all result . map .value . should_equal [Encoding_Error msg]
|
||||
@ -238,6 +263,17 @@ spec =
|
||||
got_table.should_equal table
|
||||
file.delete
|
||||
|
||||
Test.specify "should correctly append to a file with a missing newline at EOF" <|
|
||||
table = Table.new [["A", [1,2,3]], ["B", [1.0,1.5,2.2]], ["C", ["x","y","z"]]]
|
||||
file = (enso_project.data / "transient" / "append_missing_newline.csv")
|
||||
file.delete_if_exists
|
||||
'A,B,C\r0,0,0'.write file
|
||||
table.write file on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
text = File.read_text file
|
||||
expected_lines = ["A,B,C", "0,0,0", "1,1.0,x", "2,1.5,y", "3,2.2,z"]
|
||||
text.should_equal (expected_lines.join '\r' suffix='\r')
|
||||
file.delete
|
||||
|
||||
Test.specify "should append to a file, matching columns by name (headers=Infer)" <|
|
||||
existing_table = Table.new [["A", [1,2]], ["B", [1.0,1.5]], ["C", ["x","y"]]]
|
||||
appending_table = Table.new [["B", [33,44]], ["A", [Nothing, 0]], ["C", ["a","BB"]]]
|
||||
@ -358,7 +394,7 @@ spec =
|
||||
text.should_equal (expected_lines.join separator suffix=separator)
|
||||
file.delete
|
||||
|
||||
Test.specify "should use the system default line ending style when appending to an empty or nonexistent file" <|
|
||||
Test.specify "should use Unix line ending style when appending to an empty or nonexistent file" <|
|
||||
empty_file = (enso_project.data / "transient" / "empty.csv")
|
||||
"".write empty_file
|
||||
nonexistent_file = (enso_project.data / "transient" / "nonexistent.csv")
|
||||
@ -369,7 +405,7 @@ spec =
|
||||
table_to_append.write empty_file on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
|
||||
expected_lines = ["a,d", "x,z", "y,w"]
|
||||
expected_text = (expected_lines.join system_separator suffix=system_separator)
|
||||
expected_text = join_lines expected_lines
|
||||
File.read_text empty_file . should_equal expected_text
|
||||
File.read_text nonexistent_file . should_equal expected_text
|
||||
|
||||
@ -389,13 +425,130 @@ spec =
|
||||
text.should_equal expected_text
|
||||
file.delete
|
||||
|
||||
Test.specify "should use the existing line ending style when appending to a file consisting of only comments missing last EOL" <|
|
||||
initial_lines = ["# comment 1", "# comment 2 without EOL"]
|
||||
table_to_append = Table.new [["a", ["x", "y"]], ["b", ["z", "w"]]]
|
||||
expected_lines = initial_lines + ["a,b", "x,z", "y,w"]
|
||||
file = (enso_project.data / "transient" / "endings_comments_only.csv")
|
||||
line_ending_pairs.each setting->
|
||||
separator=setting.second
|
||||
file.delete_if_exists
|
||||
(initial_lines.join separator).write file
|
||||
format = File_Format.Delimited ',' . with_comments
|
||||
table_to_append.write file format on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
text = File.read_text file
|
||||
expected_text = expected_lines.join separator suffix=separator
|
||||
text.should_equal expected_text
|
||||
file.delete
|
||||
|
||||
Test.specify "should correctly handle append edge cases" <|
|
||||
table = Table.new [["a", [1, 2]]]
|
||||
file = (enso_project.data / "transient" / "append_edge_cases.csv")
|
||||
file.delete_if_exists
|
||||
|
||||
format = File_Format.Delimited ',' . without_headers
|
||||
|
||||
# A long line but without a trailing newline
|
||||
base_line = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-ABC"
|
||||
expected_lines_1 = [base_line, "1", "2"]
|
||||
# 1 character with trailing newline
|
||||
line_ending_pairs.each setting->
|
||||
separator=setting.second
|
||||
(base_line+separator).write file
|
||||
table.write file format on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
|
||||
text = File.read_text file
|
||||
expected_text = expected_lines_1.join separator suffix=separator
|
||||
text.should_equal expected_text
|
||||
file.delete
|
||||
|
||||
base_line.write file
|
||||
table.write file format on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
|
||||
File.read_text file . should_equal <| normalize_lines base_line+'\n1\n2\n'
|
||||
file.delete
|
||||
|
||||
# 1 character without trailing newline
|
||||
"#".write file
|
||||
table.write file format on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
|
||||
File.read_text file . should_equal <| normalize_lines '#\n1\n2\n'
|
||||
file.delete
|
||||
|
||||
"#".write file
|
||||
table.write file format.with_comments on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
|
||||
File.read_text file . should_equal <| normalize_lines '#\n1\n2\n'
|
||||
file.delete
|
||||
|
||||
expected_lines_2 = ["#", "1", "2"]
|
||||
# 1 character with trailing newline
|
||||
line_ending_pairs.each setting->
|
||||
[format.with_comments, format].each format->
|
||||
separator=setting.second
|
||||
("#"+separator).write file
|
||||
table.write file format on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
|
||||
text = File.read_text file
|
||||
expected_text = expected_lines_2.join separator suffix=separator
|
||||
text.should_equal expected_text
|
||||
file.delete
|
||||
|
||||
["B", "#"].each middle_line->
|
||||
expected_lines_3 = ["A", middle_line, "1", "2"]
|
||||
[format.with_comments, format].each format->
|
||||
# 2 lines without trailing newline
|
||||
line_ending_pairs.each setting->
|
||||
separator=setting.second
|
||||
("A"+separator+middle_line).write file
|
||||
table.write file format on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
|
||||
text = File.read_text file
|
||||
expected_text = expected_lines_3.join separator suffix=separator
|
||||
text.should_equal expected_text
|
||||
file.delete
|
||||
|
||||
# 2 lines with trailing newline
|
||||
line_ending_pairs.each setting->
|
||||
separator=setting.second
|
||||
("A"+separator+middle_line+separator).write file
|
||||
table.write file format on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
|
||||
text = File.read_text file
|
||||
expected_text = expected_lines_3.join separator suffix=separator
|
||||
text.should_equal expected_text
|
||||
file.delete
|
||||
|
||||
Test.specify "should use the existing line ending style when appending to a file consisting of only one comment with EOL" <|
|
||||
initial_line = "# comment 1 with EOL"
|
||||
table_to_append = Table.new [["a", ["x", "y"]], ["b", ["z", "w"]]]
|
||||
expected_lines = [initial_line] + ["a,b", "x,z", "y,w"]
|
||||
file = (enso_project.data / "transient" / "endings_comments_only.csv")
|
||||
line_ending_pairs.each setting->
|
||||
separator=setting.second
|
||||
file.delete_if_exists
|
||||
(initial_line+separator).write file
|
||||
format = File_Format.Delimited ',' . with_comments
|
||||
table_to_append.write file format on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
text = File.read_text file
|
||||
expected_text = expected_lines.join separator suffix=separator
|
||||
text.should_equal expected_text
|
||||
file.delete
|
||||
|
||||
Test.specify "should use the Unix line ending style when appending to a file consisting of only one comment and missing the EOL" <|
|
||||
initial_lines = ["# comment 1 without EOL"]
|
||||
table_to_append = Table.new [["a", ["x", "y"]], ["b", ["z", "w"]]]
|
||||
expected_lines = initial_lines + ["a,b", "x,z", "y,w"]
|
||||
file = (enso_project.data / "transient" / "endings_comments_only.csv")
|
||||
file.delete_if_exists
|
||||
(join_lines initial_lines trailing_newline=False).write file
|
||||
format = File_Format.Delimited ',' . with_comments
|
||||
table_to_append.write file format on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
text = File.read_text file
|
||||
expected_text = join_lines expected_lines
|
||||
text.should_equal expected_text
|
||||
file.delete
|
||||
|
||||
Test.specify "should fail if explicitly provided line endings do not match line endings in the file when appending" <|
|
||||
initial_table = Table.new [["a", [1, 2]]]
|
||||
table_to_append = Table.new [["a", ["x", "y"]]]
|
||||
file = (enso_project.data / "transient" / "endings_mismatch.csv")
|
||||
file.delete_if_exists
|
||||
initial_table.write file (File_Format.Delimited ',' line_endings=Classic_Mac_Line_Endings)
|
||||
result = table_to_append.write file (File_Format.Delimited ',' line_endings=Unix_Line_Endings) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position
|
||||
initial_table.write file (File_Format.Delimited ',' line_endings=Line_Ending_Style.Mac_Legacy)
|
||||
result = table_to_append.write file (File_Format.Delimited ',' line_endings=Line_Ending_Style.Unix) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position
|
||||
result . should_fail_with Illegal_Argument_Error
|
||||
result.catch.message . should_equal "The explicitly provided line endings ('\n') do not match the line endings in the file ('\r')."
|
||||
file.delete
|
||||
|
@ -9,6 +9,7 @@ import project.Delimited_Write_Spec
|
||||
import project.Excel_Spec
|
||||
import project.Json_Spec
|
||||
import project.Table_Spec
|
||||
import project.Table_Date_Spec
|
||||
import project.Aggregate_Column_Spec
|
||||
import project.Aggregate_Spec
|
||||
|
||||
@ -20,6 +21,7 @@ in_memory_spec =
|
||||
Excel_Spec.spec
|
||||
Json_Spec.spec
|
||||
Table_Spec.spec
|
||||
Table_Date_Spec.spec
|
||||
Aggregate_Column_Spec.spec
|
||||
Aggregate_Spec.spec
|
||||
|
||||
|
@ -1,14 +1,14 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Time.Date
|
||||
import Standard.Base.Data.Text.Line_Ending_Style
|
||||
|
||||
import Standard.Table
|
||||
import Standard.Table.Data.Column
|
||||
import Standard.Table.Io.File_Format
|
||||
import Standard.Table.IO.File_Format
|
||||
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
|
||||
from Standard.Table.Io.Line_Ending_Style import Unix_Line_Endings
|
||||
|
||||
import Standard.Test
|
||||
import project.Util
|
||||
from project.Util import all
|
||||
|
||||
spec =
|
||||
c_number = ["Number", [71, 72, 73, 74, 75, 76, 77]]
|
||||
@ -40,15 +40,16 @@ spec =
|
||||
|
||||
Test.group "Should be able to serialise a table with Dates to Text" <|
|
||||
Test.specify "should serialise back to input" <|
|
||||
expected_text = (enso_project.data / "prime_ministers.csv").read_text
|
||||
delimited = Text.from expected format=(File_Format.Delimited "," line_endings=Unix_Line_Endings)
|
||||
expected_text = normalize_lines <|
|
||||
(enso_project.data / "prime_ministers.csv").read_text
|
||||
delimited = Text.from expected format=(File_Format.Delimited "," line_endings=Line_Ending_Style.Unix)
|
||||
delimited.should_equal expected_text
|
||||
|
||||
Test.specify "should serialise dates with format" <|
|
||||
test_table = Table.new [c_from]
|
||||
expected_text = 'From\n04.05.1979\n28.11.1990\n02.05.1997\n27.06.2007\n11.05.2010\n13.07.2016\n24.07.2019\n'
|
||||
data_formatter = Data_Formatter . with_datetime_formats date_formats=["dd.MM.yyyy"]
|
||||
delimited = Text.from test_table format=(File_Format.Delimited "," value_formatter=data_formatter line_endings=Unix_Line_Endings)
|
||||
delimited = Text.from test_table format=(File_Format.Delimited "," value_formatter=data_formatter line_endings=Line_Ending_Style.Unix)
|
||||
delimited.should_equal expected_text
|
||||
|
||||
main = Test.Suite.run_main spec
|
||||
|
@ -1,6 +1,7 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Base.System
|
||||
import Standard.Base.Data.Text.Line_Ending_Style
|
||||
import Standard.Table
|
||||
import Standard.Table.Data.Column
|
||||
import Standard.Test
|
||||
@ -16,7 +17,7 @@ Column.Column.should_equal expected =
|
||||
self.length.should_equal expected.length
|
||||
self.to_vector.should_equal expected.to_vector
|
||||
|
||||
normalize_lines string line_separator=System.default_line_separator newline_at_end=True =
|
||||
normalize_lines string line_separator=Line_Ending_Style.Unix.to_text newline_at_end=True =
|
||||
case newline_at_end of
|
||||
True -> string.lines.join line_separator suffix=line_separator
|
||||
False -> string.lines.join line_separator
|
||||
|
@ -93,6 +93,17 @@ spec =
|
||||
contents = File.read_bytes full_path
|
||||
contents.take_start 6 . should_equal [67, 117, 112, 99, 97, 107]
|
||||
|
||||
Test.specify "should allow to read last n bytes from a file" <|
|
||||
file = enso_project.data / "transient" / "bytes.txt"
|
||||
data = [1, 0, 0, 1, 2, 100, 20]
|
||||
data.write_bytes file
|
||||
file.read_last_bytes 0 . should_equal []
|
||||
file.read_last_bytes 1 . should_equal [20]
|
||||
file.read_last_bytes 2 . should_equal [100, 20]
|
||||
file.read_last_bytes 5 . should_equal [0, 1, 2, 100, 20]
|
||||
file.read_last_bytes 1000 . should_equal data
|
||||
file.delete
|
||||
|
||||
Test.specify "should handle exceptions when reading a non-existent file" <|
|
||||
file = File.new "does_not_exist.txt"
|
||||
File.read_bytes "does_not_exist.txt" . should_fail_with File.File_Not_Found
|
||||
|
Loading…
Reference in New Issue
Block a user