mirror of
https://github.com/enso-org/enso.git
synced 2024-10-26 21:19:02 +03:00
Give file read its own helper widget for delimiters. (#8627)
Give file read its own helper widget for delimiters. Remove newline add none. The file read delimiter is similar but different to the split one and so should have its own set of options.
This commit is contained in:
parent
20531d51df
commit
bf8dd1888c
@ -599,6 +599,8 @@
|
||||
- [Support for loading big Excel files.][8403]
|
||||
- [Added new `Filter_Condition`s - `Equal_Ignore_Case`, `Is_Nan`, `Is_Infinite`
|
||||
and `Is_Finite`.][8539]
|
||||
- [Added text_length to Column][8606]
|
||||
- [Added none delimiter option for Data.Read][8627]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -858,6 +860,8 @@
|
||||
[8403]: https://github.com/enso-org/enso/pull/8403
|
||||
[8539]: https://github.com/enso-org/enso/pull/8539
|
||||
[8564]: https://github.com/enso-org/enso/pull/8564
|
||||
[8606]: https://github.com/enso-org/enso/pull/8606
|
||||
[8627]: https://github.com/enso-org/enso/pull/8627
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
@ -1011,7 +1015,6 @@
|
||||
- [Upgrade GraalVM to 23.1.0 JDK21][7991]
|
||||
- [Added opt-in type checks of return type][8502]
|
||||
- [DataflowError.withoutTrace doesn't store stacktrace][8608]
|
||||
- [Added text_length to Column][8606]
|
||||
|
||||
[3227]: https://github.com/enso-org/enso/pull/3227
|
||||
[3248]: https://github.com/enso-org/enso/pull/3248
|
||||
@ -1164,7 +1167,6 @@
|
||||
[7991]: https://github.com/enso-org/enso/pull/7991
|
||||
[8502]: https://github.com/enso-org/enso/pull/8502
|
||||
[8608]: https://github.com/enso-org/enso/pull/8608
|
||||
[8606]: https://github.com/enso-org/enso/pull/8606
|
||||
|
||||
# Enso 2.0.0-alpha.18 (2021-10-12)
|
||||
|
||||
|
@ -19,6 +19,12 @@ make_delimiter_selector : Widget
|
||||
make_delimiter_selector =
|
||||
make_single_choice [',', ';', '|', ['{tab}', "'\t'"], ['{space}', "' '"], ['{newline}', "['\n', '\r\n', '\r']"], '_', ['Custom', "'?'"]]
|
||||
|
||||
## PRIVATE
|
||||
Creates a Single_Choice Widget for file read delimiters.
|
||||
make_file_read_delimiter_selector : Widget
|
||||
make_file_read_delimiter_selector =
|
||||
make_single_choice [',', ';', '|', ['{tab}', "'\t'"], ['{space}', "' '"], ['{none}', "''"], '_', ['Custom', "'?'"]]
|
||||
|
||||
## PRIVATE
|
||||
Creates a Single_Choice Widget for parsing dates.
|
||||
make_date_format_selector : Date -> Widget
|
||||
|
@ -2,7 +2,7 @@ from Standard.Base import all
|
||||
import Standard.Base.Network.HTTP.Response.Response
|
||||
import Standard.Base.System.File_Format_Metadata.File_Format_Metadata
|
||||
import Standard.Base.System.Input_Stream.Input_Stream
|
||||
from Standard.Base.Widget_Helpers import make_delimiter_selector
|
||||
from Standard.Base.Widget_Helpers import make_file_read_delimiter_selector
|
||||
|
||||
import project.Data.Data_Formatter.Data_Formatter
|
||||
import project.Data.Match_Columns.Match_Columns
|
||||
@ -52,7 +52,7 @@ type Delimited_Format
|
||||
character if it anywhere else than at the beginning of the line. This
|
||||
option is only applicable for read mode and does not affect writing. It
|
||||
defaults to `Nothing` which means that comments are disabled.
|
||||
@delimiter make_delimiter_selector
|
||||
@delimiter make_file_read_delimiter_selector
|
||||
@encoding Encoding.default_widget
|
||||
Delimited (delimiter:Text=',') (encoding:Encoding=Encoding.utf_8) (skip_rows:Integer=0) (row_limit:Integer|Nothing=Nothing) (quote_style:Quote_Style=Quote_Style.With_Quotes) (headers:Boolean|Infer=Infer) (value_formatter:Data_Formatter|Nothing=Data_Formatter.Value) (keep_invalid_rows:Boolean=True) (line_endings:Line_Ending_Style|Infer=Infer) (comment_character:Text|Nothing=Nothing)
|
||||
|
||||
|
@ -42,7 +42,9 @@ public class DelimitedReader {
|
||||
* <p>I considered to choose `\u0F8EE` which comes from the Private Use Area of the Basic
|
||||
* Multilingual Plane. Is has no meaning designated by the Unicode standard.
|
||||
*/
|
||||
public static final char UNUSED_CHARACTER = '\0';
|
||||
public static final char COMMENT_CHARACTER = '\0';
|
||||
|
||||
public static final char UNUSED_CHARACTER = '\uF8EE';
|
||||
|
||||
private static final String COLUMN_NAME = "Column";
|
||||
private static final char noQuoteCharacter = '\0';
|
||||
@ -113,15 +115,17 @@ public class DelimitedReader {
|
||||
String commentCharacter,
|
||||
boolean warningsAsErrors,
|
||||
ProblemAggregator problemAggregator) {
|
||||
if (delimiter.isEmpty()) {
|
||||
throw new IllegalArgumentException("Empty delimiters are not supported.");
|
||||
}
|
||||
if (delimiter.length() > 1) {
|
||||
throw new IllegalArgumentException(
|
||||
"Delimiters consisting of multiple characters or code units are not supported.");
|
||||
}
|
||||
|
||||
this.delimiter = delimiter.charAt(0);
|
||||
if (delimiter.isEmpty()) {
|
||||
// User wants to read each row into a single cell. So we delimit on a character that we assume
|
||||
// is not in user data
|
||||
this.delimiter = UNUSED_CHARACTER;
|
||||
} else {
|
||||
this.delimiter = delimiter.charAt(0);
|
||||
}
|
||||
|
||||
if (quote != null) {
|
||||
if (quote.isEmpty()) {
|
||||
@ -198,7 +202,7 @@ public class DelimitedReader {
|
||||
}
|
||||
|
||||
if (commentCharacter == null) {
|
||||
format.setComment(UNUSED_CHARACTER);
|
||||
format.setComment(COMMENT_CHARACTER);
|
||||
} else {
|
||||
if (commentCharacter.length() != 1) {
|
||||
throw new IllegalArgumentException(
|
||||
|
@ -207,7 +207,7 @@ public class DelimitedWriter {
|
||||
* TODO This should be checking if commenting is enabled, but currently
|
||||
* due to limitations of the reader library it is always enabled, just
|
||||
* sometimes the comment char is set to `\0`. See the documentation of
|
||||
* {@link DelimitedReader#UNUSED_CHARACTER}.
|
||||
* {@link DelimitedReader#COMMENT_CHARACTER}.
|
||||
*
|
||||
* See issue https://github.com/enso-org/enso/issues/5655
|
||||
*/
|
||||
|
@ -398,6 +398,15 @@ spec =
|
||||
t2.at "c" . to_vector . should_equal [3, 6]
|
||||
t2.columns.map .name . should_equal ["a", "b", "c"]
|
||||
|
||||
Test.specify "should be able to read in a file without splitting it to columns" <|
|
||||
t1 = (enso_project.data / "data_small.csv") . read (Delimited "" headers=False)
|
||||
expected = ['Code,Index,Flag,Value,ValueWithNothing,TextWithNothing,"Hexadecimal",Leading0s,QuotedNumbers,"Mixed Types"']
|
||||
+ ['gxl,7,True,38.76109,63.13, pq6igd2wyd ,4DD4675B,001,"1","33"']
|
||||
+ ['wca,0,False,-66.77495,31," 2pr4102wc4 ",,002,"2",']
|
||||
+ ['nfw,1, True , 88.65713\t\t\t,-68.71,"",01896EAB,123,,45']
|
||||
+ ['der,7,True,0.86658,,,F32E1EFE,,"34",True']
|
||||
t1.at 0 . to_vector . should_equal expected
|
||||
|
||||
Test.specify "should be able to parse raw text" <|
|
||||
text1 = """
|
||||
a,b,c
|
||||
|
Loading…
Reference in New Issue
Block a user