diff --git a/CHANGELOG.md b/CHANGELOG.md index a4c500fe40..7f559f4976 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -599,6 +599,8 @@ - [Support for loading big Excel files.][8403] - [Added new `Filter_Condition`s - `Equal_Ignore_Case`, `Is_Nan`, `Is_Infinite` and `Is_Finite`.][8539] +- [Added text_length to Column][8606] +- [Added none delimiter option for Data.Read][8627] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -858,6 +860,8 @@ [8403]: https://github.com/enso-org/enso/pull/8403 [8539]: https://github.com/enso-org/enso/pull/8539 [8564]: https://github.com/enso-org/enso/pull/8564 +[8606]: https://github.com/enso-org/enso/pull/8606 +[8627]: https://github.com/enso-org/enso/pull/8627 #### Enso Compiler @@ -1011,7 +1015,6 @@ - [Upgrade GraalVM to 23.1.0 JDK21][7991] - [Added opt-in type checks of return type][8502] - [DataflowError.withoutTrace doesn't store stacktrace][8608] -- [Added text_length to Column][8606] [3227]: https://github.com/enso-org/enso/pull/3227 [3248]: https://github.com/enso-org/enso/pull/3248 @@ -1164,7 +1167,6 @@ [7991]: https://github.com/enso-org/enso/pull/7991 [8502]: https://github.com/enso-org/enso/pull/8502 [8608]: https://github.com/enso-org/enso/pull/8608 -[8606]: https://github.com/enso-org/enso/pull/8606 # Enso 2.0.0-alpha.18 (2021-10-12) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Widget_Helpers.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Widget_Helpers.enso index 0cca65d7fb..18c2b320f9 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Widget_Helpers.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Widget_Helpers.enso @@ -19,6 +19,12 @@ make_delimiter_selector : Widget make_delimiter_selector = make_single_choice [',', ';', '|', ['{tab}', "'\t'"], ['{space}', "' '"], ['{newline}', "['\n', '\r\n', '\r']"], '_', ['Custom', "'?'"]] + ## PRIVATE + Creates a Single_Choice Widget for file read delimiters. +make_file_read_delimiter_selector : Widget +make_file_read_delimiter_selector = + make_single_choice [',', ';', '|', ['{tab}', "'\t'"], ['{space}', "' '"], ['{none}', "''"], '_', ['Custom', "'?'"]] + ## PRIVATE Creates a Single_Choice Widget for parsing dates. make_date_format_selector : Date -> Widget diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso index 43a565f989..bc457b137c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso @@ -2,7 +2,7 @@ from Standard.Base import all import Standard.Base.Network.HTTP.Response.Response import Standard.Base.System.File_Format_Metadata.File_Format_Metadata import Standard.Base.System.Input_Stream.Input_Stream -from Standard.Base.Widget_Helpers import make_delimiter_selector +from Standard.Base.Widget_Helpers import make_file_read_delimiter_selector import project.Data.Data_Formatter.Data_Formatter import project.Data.Match_Columns.Match_Columns @@ -52,7 +52,7 @@ type Delimited_Format character if it anywhere else than at the beginning of the line. This option is only applicable for read mode and does not affect writing. It defaults to `Nothing` which means that comments are disabled. - @delimiter make_delimiter_selector + @delimiter make_file_read_delimiter_selector @encoding Encoding.default_widget Delimited (delimiter:Text=',') (encoding:Encoding=Encoding.utf_8) (skip_rows:Integer=0) (row_limit:Integer|Nothing=Nothing) (quote_style:Quote_Style=Quote_Style.With_Quotes) (headers:Boolean|Infer=Infer) (value_formatter:Data_Formatter|Nothing=Data_Formatter.Value) (keep_invalid_rows:Boolean=True) (line_endings:Line_Ending_Style|Infer=Infer) (comment_character:Text|Nothing=Nothing) diff --git a/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java b/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java index 5f545fe7c5..3c6df5d9bd 100644 --- a/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java +++ b/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java @@ -42,7 +42,9 @@ public class DelimitedReader { *

I considered to choose `\u0F8EE` which comes from the Private Use Area of the Basic * Multilingual Plane. Is has no meaning designated by the Unicode standard. */ - public static final char UNUSED_CHARACTER = '\0'; + public static final char COMMENT_CHARACTER = '\0'; + + public static final char UNUSED_CHARACTER = '\uF8EE'; private static final String COLUMN_NAME = "Column"; private static final char noQuoteCharacter = '\0'; @@ -113,15 +115,17 @@ public class DelimitedReader { String commentCharacter, boolean warningsAsErrors, ProblemAggregator problemAggregator) { - if (delimiter.isEmpty()) { - throw new IllegalArgumentException("Empty delimiters are not supported."); - } if (delimiter.length() > 1) { throw new IllegalArgumentException( "Delimiters consisting of multiple characters or code units are not supported."); } - - this.delimiter = delimiter.charAt(0); + if (delimiter.isEmpty()) { + // User wants to read each row into a single cell. So we delimit on a character that we assume + // is not in user data + this.delimiter = UNUSED_CHARACTER; + } else { + this.delimiter = delimiter.charAt(0); + } if (quote != null) { if (quote.isEmpty()) { @@ -198,7 +202,7 @@ public class DelimitedReader { } if (commentCharacter == null) { - format.setComment(UNUSED_CHARACTER); + format.setComment(COMMENT_CHARACTER); } else { if (commentCharacter.length() != 1) { throw new IllegalArgumentException( diff --git a/std-bits/table/src/main/java/org/enso/table/write/DelimitedWriter.java b/std-bits/table/src/main/java/org/enso/table/write/DelimitedWriter.java index 87cdd778d0..aba91c189c 100644 --- a/std-bits/table/src/main/java/org/enso/table/write/DelimitedWriter.java +++ b/std-bits/table/src/main/java/org/enso/table/write/DelimitedWriter.java @@ -207,7 +207,7 @@ public class DelimitedWriter { * TODO This should be checking if commenting is enabled, but currently * due to limitations of the reader library it is always enabled, just * sometimes the comment char is set to `\0`. See the documentation of - * {@link DelimitedReader#UNUSED_CHARACTER}. + * {@link DelimitedReader#COMMENT_CHARACTER}. * * See issue https://github.com/enso-org/enso/issues/5655 */ diff --git a/test/Table_Tests/src/IO/Delimited_Read_Spec.enso b/test/Table_Tests/src/IO/Delimited_Read_Spec.enso index 0c15f4333e..590b5bc478 100644 --- a/test/Table_Tests/src/IO/Delimited_Read_Spec.enso +++ b/test/Table_Tests/src/IO/Delimited_Read_Spec.enso @@ -398,6 +398,15 @@ spec = t2.at "c" . to_vector . should_equal [3, 6] t2.columns.map .name . should_equal ["a", "b", "c"] + Test.specify "should be able to read in a file without splitting it to columns" <| + t1 = (enso_project.data / "data_small.csv") . read (Delimited "" headers=False) + expected = ['Code,Index,Flag,Value,ValueWithNothing,TextWithNothing,"Hexadecimal",Leading0s,QuotedNumbers,"Mixed Types"'] + + ['gxl,7,True,38.76109,63.13, pq6igd2wyd ,4DD4675B,001,"1","33"'] + + ['wca,0,False,-66.77495,31," 2pr4102wc4 ",,002,"2",'] + + ['nfw,1, True , 88.65713\t\t\t,-68.71,"",01896EAB,123,,45'] + + ['der,7,True,0.86658,,,F32E1EFE,,"34",True'] + t1.at 0 . to_vector . should_equal expected + Test.specify "should be able to parse raw text" <| text1 = """ a,b,c