mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 14:52:01 +03:00
parent
6a09f12f3c
commit
f5db35af07
@ -902,7 +902,7 @@ type Column
|
||||
|
||||
## Parsing values is not supported in database columns.
|
||||
@type Widget_Helpers.parse_type_selector
|
||||
parse : (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Problem_Behavior -> Column
|
||||
parse : Value_Type | Auto -> Text | Data_Formatter -> Problem_Behavior -> Column
|
||||
parse self type=Auto format=Data_Formatter.Value on_problems=Report_Warning =
|
||||
_ = [type, format, on_problems]
|
||||
Error.throw <| Unsupported_Database_Operation.Error "`Column.parse` is not implemented yet for the Database backends."
|
||||
|
@ -1382,8 +1382,8 @@ type Table
|
||||
|
||||
## Parsing values is not supported in database tables, the table has to be
|
||||
loaded into memory first with `read`.
|
||||
parse_values : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table
|
||||
parse_values columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning =
|
||||
parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table
|
||||
parse columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning =
|
||||
## Avoid unused arguments warning. We cannot rename arguments to `_`,
|
||||
because we need to keep the API consistent with the in-memory table.
|
||||
_ = [columns, type, format, error_on_missing_columns, on_problems]
|
||||
|
@ -16,7 +16,6 @@ import project.Internal.Widget_Helpers
|
||||
|
||||
from project.Data.Table import print_table
|
||||
from project.Data.Type.Value_Type import Value_Type, Auto
|
||||
from project.Data.Type.Value_Type_Helpers import ensure_valid_parse_target
|
||||
from project.Errors import No_Index_Set_Error, Floating_Point_Equality, Invalid_Value_Type
|
||||
|
||||
polyglot java import org.enso.table.data.column.operation.map.MapOperationProblemBuilder
|
||||
@ -1029,15 +1028,16 @@ type Column
|
||||
|
||||
example_contains = Examples.text_column_1.parse Boolean 'Yes|No'
|
||||
@type Widget_Helpers.parse_type_selector
|
||||
parse : (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Problem_Behavior -> Column
|
||||
parse : Value_Type | Auto -> Text | Data_Formatter -> Problem_Behavior -> Column
|
||||
parse self type=Auto format=Data_Formatter.Value on_problems=Report_Warning =
|
||||
Value_Type.expect_text self.value_type related_column=self.name <| ensure_valid_parse_target type <|
|
||||
Value_Type.expect_text self.value_type related_column=self.name <|
|
||||
formatter = case format of
|
||||
_ : Text ->
|
||||
Data_Formatter.Value.with_format type format
|
||||
_ -> format
|
||||
_ : Data_Formatter -> format
|
||||
_ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter.")
|
||||
|
||||
parser = if type == Auto then formatter.make_auto_parser else formatter.make_datatype_parser type
|
||||
parser = formatter.make_value_type_parser type
|
||||
storage = self.java_column.getStorage
|
||||
new_storage_and_problems = parser.parseColumn self.name storage
|
||||
|
||||
|
@ -2,7 +2,7 @@ from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
import project.Internal.Parse_Values_Helper
|
||||
from project.Data.Type.Value_Type import Value_Type, Auto
|
||||
from project.Data.Type.Value_Type import Value_Type, Auto, Bits
|
||||
|
||||
polyglot java import org.enso.table.parsing.IntegerParser
|
||||
polyglot java import org.enso.table.parsing.DecimalParser
|
||||
@ -67,16 +67,15 @@ type Data_Formatter
|
||||
|
||||
Arguments:
|
||||
- text: Text value to parse.
|
||||
- datatype: Text value to parse.
|
||||
- datatype: The expected Enso type to parse the value into. If set to
|
||||
`Auto`, the type will be inferred automatically.
|
||||
- on_problems: Specifies the behavior when a problem occurs.
|
||||
By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
parse : Text -> (Auto|Integer|Number|Date|Date_Time|Time_Of_Day|Boolean) -> Problem_Behavior -> Any
|
||||
parse self text datatype=Auto on_problems=Problem_Behavior.Report_Warning =
|
||||
parser = case datatype of
|
||||
Auto -> self.make_auto_parser
|
||||
_ -> self.make_datatype_parser datatype
|
||||
parser = self.make_datatype_parser datatype
|
||||
result = parser.parseIndependentValue text
|
||||
problems = Vector.from_polyglot_array result.problems . map (Parse_Values_Helper.translate_parsing_problem datatype)
|
||||
on_problems.attach_problems_after result.value problems
|
||||
@ -145,23 +144,25 @@ type Data_Formatter
|
||||
It is mostly a convenience function to easily specify a datatype format.
|
||||
|
||||
Arguments:
|
||||
- type: The datatype for which to change the format. The format can be
|
||||
changed only for Date_Time, Date, Time_Of_Day and Boolean types.
|
||||
- type: The value type for which to change the format. The format can be
|
||||
changed only for `Date_Time`, `Date`, `Time` and `Boolean` value types.
|
||||
- format: The new format string to set. For dates, it is the usual date
|
||||
format notation, and for booleans it should be two values that
|
||||
represent true and false, separated by a `|`.
|
||||
with_format : (Auto|Integer|Number|Date|Date_Time|Time_Of_Day|Boolean) -> Text -> Data_Formatter
|
||||
with_format : Value_Type | Auto -> Text -> Data_Formatter
|
||||
with_format self type format = case type of
|
||||
Auto -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Auto`.")
|
||||
Integer -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Integer`.")
|
||||
Decimal -> Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Decimal`.")
|
||||
Date -> self.with_datetime_formats date_formats=[format]
|
||||
Date_Time -> self.with_datetime_formats datetime_formats=[format]
|
||||
Time_Of_Day -> self.with_datetime_formats time_formats=[format]
|
||||
Boolean ->
|
||||
Value_Type.Date -> self.with_datetime_formats date_formats=[format]
|
||||
Value_Type.Time -> self.with_datetime_formats time_formats=[format]
|
||||
Value_Type.Date_Time _ ->
|
||||
self.with_datetime_formats datetime_formats=[format]
|
||||
Value_Type.Boolean ->
|
||||
formats = format.split "|"
|
||||
if formats.length != 2 then Error.throw (Illegal_Argument.Error "The `format` for Booleans must be a string with two values separated by `|`, for example: 'Yes|No'.") else
|
||||
self.with_boolean_values true_values=[formats.at 0] false_values=[formats.at 1]
|
||||
Auto ->
|
||||
Error.throw (Illegal_Argument.Error "Cannot specify a `format` with type `Auto`.")
|
||||
_ : Value_Type ->
|
||||
Error.throw (Illegal_Argument.Error "Cannot specify a `format` for type `"+type.to_text+"`.")
|
||||
|
||||
## PRIVATE
|
||||
Clone the instance with some properties overridden.
|
||||
@ -216,7 +217,26 @@ type Data_Formatter
|
||||
Date -> self.make_date_parser
|
||||
Date_Time -> self.make_date_time_parser
|
||||
Time_Of_Day -> self.make_time_of_day_parser
|
||||
_ -> Error.throw (Illegal_Argument.Error "Unsupported datatype: "+datatype.to_text)
|
||||
Auto -> self.make_auto_parser
|
||||
_ ->
|
||||
type_name = case datatype.to_text of
|
||||
text : Text -> text
|
||||
_ -> Meta.meta datatype . to_text
|
||||
Error.throw (Illegal_Argument.Error "Unsupported datatype: "+type_name)
|
||||
|
||||
## PRIVATE
|
||||
make_value_type_parser self value_type = case value_type of
|
||||
# TODO once we implement #5159 we will need to add checks for bounds here and support 16/32-bit ints
|
||||
Value_Type.Integer Bits.Bits_64 -> self.make_integer_parser
|
||||
# TODO once we implement #6109 we can support 32-bit floats
|
||||
Value_Type.Float Bits.Bits_64 -> self.make_decimal_parser
|
||||
Value_Type.Boolean -> self.make_boolean_parser
|
||||
Value_Type.Date -> self.make_date_parser
|
||||
Value_Type.Date_Time True -> self.make_date_time_parser
|
||||
Value_Type.Time -> self.make_time_of_day_parser
|
||||
Auto -> self.make_auto_parser
|
||||
_ ->
|
||||
Error.throw (Illegal_Argument.Error "Unsupported value type: "+value_type.to_display_text)
|
||||
|
||||
## PRIVATE
|
||||
get_specific_type_parsers self =
|
||||
|
@ -38,7 +38,6 @@ import project.Data.Expression.Expression_Error
|
||||
import project.Delimited.Delimited_Format.Delimited_Format
|
||||
|
||||
from project.Data.Type.Value_Type import Value_Type, Auto
|
||||
from project.Data.Type.Value_Type_Helpers import ensure_valid_parse_target
|
||||
from project.Internal.Rows_View import Rows_View
|
||||
from project.Errors import all
|
||||
|
||||
@ -796,31 +795,31 @@ type Table
|
||||
> Example
|
||||
Parse the first and last columns containing Yes/No values as booleans.
|
||||
|
||||
table.parse_values columns=[0, -1] type=Boolean format="Yes|No"
|
||||
table.parse columns=[0, -1] type=Boolean format="Yes|No"
|
||||
|
||||
> Example
|
||||
Parse dates in a column in the format `yyyy-MM-dd` (the default format).
|
||||
|
||||
table.parse_values "birthday" Date
|
||||
table.parse "birthday" Date
|
||||
|
||||
> Example
|
||||
Parse dates in a column in the format `dd/MM/yyyy`.
|
||||
|
||||
table.parse_values "birthday" Date 'dd/MM/yyyy'
|
||||
table.parse "birthday" Date 'dd/MM/yyyy'
|
||||
|
||||
> Example
|
||||
Parse all columns inferring their types, using `,` as the decimal point for numbers.
|
||||
|
||||
table.parse_values format=(Data_Formatter.Value.with_number_formatting decimal_point=',')
|
||||
parse_values : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> (Auto|Integer|Decimal|Date|Date_Time|Time_Of_Day|Boolean) -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table
|
||||
parse_values self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = ensure_valid_parse_target type <|
|
||||
table.parse format=(Data_Formatter.Value.with_number_formatting decimal_point=',')
|
||||
parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table
|
||||
parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning =
|
||||
formatter = case format of
|
||||
_ : Text ->
|
||||
Data_Formatter.Value.with_format type format
|
||||
_ -> format
|
||||
_ : Data_Formatter -> format
|
||||
_ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter.")
|
||||
|
||||
parser = if type == Auto then formatter.make_auto_parser else
|
||||
formatter.make_datatype_parser type
|
||||
parser = formatter.make_value_type_parser type
|
||||
|
||||
select_problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
|
||||
selected_columns = self.columns_helper.select_columns_helper columns reorder=True select_problem_builder
|
||||
|
@ -75,11 +75,3 @@ find_common_type types strict =
|
||||
# Double check if Mixed was really allowed to come out.
|
||||
if types.contains Value_Type.Mixed then Value_Type.Mixed else
|
||||
Nothing
|
||||
|
||||
## PRIVATE
|
||||
Checks if the given type is a valid target type for parsing.
|
||||
|
||||
This will be replaced once we change parse to rely on `Value_Type` instead.
|
||||
ensure_valid_parse_target type ~action =
|
||||
expected_types = [Auto, Integer, Decimal, Date, Date_Time, Time_Of_Day, Boolean]
|
||||
if expected_types.contains type . not then Error.throw (Illegal_Argument.Error "Unsupported target type "+type.to_text+".") else action
|
||||
|
@ -1,6 +1,7 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Table.Data.Expression.Expression_Error
|
||||
import Standard.Table.Data.Type.Value_Type.Value_Type
|
||||
|
||||
polyglot java import org.enso.table.error.ColumnCountMismatchException
|
||||
polyglot java import org.enso.table.error.ColumnNameMismatchException
|
||||
@ -243,18 +244,18 @@ type Invalid_Location
|
||||
Arguments:
|
||||
- column: the column in which the problematic cells appeared, if applicable.
|
||||
It may be empty if the value is parsed outside of a context of a column.
|
||||
- datatype: The expected datatype.
|
||||
- value_type: The expected value type.
|
||||
- cells: Contents of the cells that did not match the expected datatype
|
||||
format.
|
||||
type Invalid_Format
|
||||
## PRIVATE
|
||||
Error column:(Text|Nothing) (datatype:(Integer|Number|Date|Time|Time_Of_Day|Boolean)) (cells:[Text])
|
||||
Error column:(Text|Nothing) (value_type:Value_Type|Integer|Number|Date|Time|Time_Of_Day|Boolean) (cells:[Text])
|
||||
|
||||
## PRIVATE
|
||||
Pretty print the invalid format error.
|
||||
to_display_text : Text
|
||||
to_display_text self =
|
||||
self.cells.length+" cells in column "+self.column+" had invalid format for datatype "+self.datatype.to_text+"."
|
||||
self.cells.length+" cells in column "+self.column+" had invalid format for type "+self.value_type.to_text+"."
|
||||
|
||||
## Indicates that some values contained leading zeros even though these were not allowed.
|
||||
|
||||
@ -270,7 +271,7 @@ type Leading_Zeros
|
||||
## PRIVATE
|
||||
Pretty print the leading zeros error.
|
||||
to_display_text : Text
|
||||
to_display_text self = "Leading zeros in column "+self.column+" with datatype "+self.datatype.to_text+"."
|
||||
to_display_text self = "Leading zeros in column "+self.column+" with datatype "+self.value_type.to_text+"."
|
||||
|
||||
## Indicates that an empty file was encountered, so no data could be loaded.
|
||||
type Empty_File_Error
|
||||
|
@ -9,10 +9,10 @@ polyglot java import org.enso.table.parsing.problems.LeadingZeros
|
||||
## PRIVATE
|
||||
Translates a parse related problem additionally enriching it with expected
|
||||
datatype information that is not originally present on the Java side.
|
||||
translate_parsing_problem expected_datatype problem = case problem of
|
||||
translate_parsing_problem expected_value_type problem = case problem of
|
||||
java_problem : InvalidFormat ->
|
||||
Invalid_Format.Error java_problem.column expected_datatype (Vector.from_polyglot_array java_problem.cells)
|
||||
Invalid_Format.Error java_problem.column expected_value_type (Vector.from_polyglot_array java_problem.cells)
|
||||
java_problem : LeadingZeros ->
|
||||
Leading_Zeros.Error java_problem.column expected_datatype (Vector.from_polyglot_array java_problem.cells)
|
||||
Leading_Zeros.Error java_problem.column expected_value_type (Vector.from_polyglot_array java_problem.cells)
|
||||
_ ->
|
||||
Panic.throw (Illegal_State.Error "Reported an unknown problem type: "+problem.to_text)
|
||||
|
@ -16,8 +16,10 @@ make_column_name_selector table display=Display.Always =
|
||||
Selector for type argument on `Column.parse`.
|
||||
parse_type_selector : Single_Choice
|
||||
parse_type_selector =
|
||||
choice = ['Auto', 'Integer', 'Decimal', 'Date', 'Date_Time', 'Time_Of_Day', 'Boolean']
|
||||
Single_Choice display=Display.Always values=(choice.map n->(Option n))
|
||||
choice = ['Auto', 'Value_Type.Integer', 'Value_Type.Float', 'Value_Type.Date', 'Value_Type.Date_Time', 'Value_Type.Time', 'Value_Type.Boolean']
|
||||
names = ['Auto', 'Integer', 'Float', 'Date', 'Date_Time', 'Time', 'Boolean']
|
||||
options = names.zip choice . map pair-> Option pair.first pair.second
|
||||
Single_Choice display=Display.Always values=options
|
||||
|
||||
## PRIVATE
|
||||
Selector for type argument on `Column.parse`.
|
||||
|
@ -97,11 +97,14 @@ expect_warning expected_warning result =
|
||||
## UNSTABLE
|
||||
Checks if the provided value has a specific warning attached and if there are
|
||||
no other warnings.
|
||||
|
||||
As a utility, it also returns the found warning.
|
||||
|
||||
Arguments:
|
||||
- expected_warning: The expected warning. It can either by a warning type or
|
||||
a concrete value.
|
||||
- result: The value to check.
|
||||
expect_only_warning : Any -> Any -> Nothing
|
||||
expect_only_warning : Any -> Any -> Any
|
||||
expect_only_warning expected_warning result =
|
||||
warnings = get_attached_warnings result
|
||||
is_expected x =
|
||||
@ -114,6 +117,7 @@ expect_only_warning expected_warning result =
|
||||
if invalid.not_empty then
|
||||
loc = Meta.get_source_location 3
|
||||
Test.fail "Expected the result to contain only the warning: "+found.to_text+", but it also contained: "+invalid.to_text+' (at '+loc+').'
|
||||
found
|
||||
|
||||
|
||||
## UNSTABLE
|
||||
|
@ -40,10 +40,11 @@ public class TypeInferringParser extends DatatypeParser {
|
||||
|
||||
@Override
|
||||
public WithProblems<Storage<?>> parseColumn(String columnName, Storage<String> sourceStorage) {
|
||||
// If there are now rows, the Auto parser would guess some random type (the first one that is
|
||||
// checked). Instead,
|
||||
// we just return the empty column unchanged.
|
||||
if (sourceStorage.size() == 0) {
|
||||
// If there are no values, the Auto parser would guess some random type (the first one that is
|
||||
// checked). Instead, we just return the empty column unchanged.
|
||||
boolean hasNoValues =
|
||||
(sourceStorage.size() == 0) || (sourceStorage.countMissing() == sourceStorage.size());
|
||||
if (hasNoValues) {
|
||||
return fallbackParser.parseColumn(columnName, sourceStorage);
|
||||
}
|
||||
|
||||
|
@ -127,6 +127,27 @@ spec =
|
||||
And newlines toO!
|
||||
formatter.parse complex_text . should_equal complex_text
|
||||
|
||||
Test.specify "should report Invalid_Format errors" <|
|
||||
formatter = Data_Formatter.Value
|
||||
expect_warning r =
|
||||
r.should_equal Nothing
|
||||
Problems.expect_only_warning Invalid_Format r
|
||||
|
||||
r1 = formatter.parse "Text" datatype=Decimal
|
||||
w1 = expect_warning r1
|
||||
w1.value_type . should_equal Decimal
|
||||
w1.column . should_equal Nothing
|
||||
|
||||
expect_warning <| formatter.parse "Text" datatype=Integer
|
||||
expect_warning <| formatter.parse "Text" datatype=Boolean
|
||||
expect_warning <| formatter.parse "Text" datatype=Date
|
||||
expect_warning <| formatter.parse "Text" datatype=Date_Time
|
||||
expect_warning <| formatter.parse "Text" datatype=Time_Of_Day
|
||||
|
||||
Test.specify "should not allow unexpected types" <|
|
||||
formatter = Data_Formatter.Value
|
||||
formatter.parse "Text" datatype=List . should_fail_with Illegal_Argument
|
||||
|
||||
Test.group "DataFormatter.format" <|
|
||||
Test.specify "should handle Nothing" <|
|
||||
Data_Formatter.Value.format Nothing . should_equal Nothing
|
||||
|
@ -11,24 +11,24 @@ import Standard.Test.Extensions
|
||||
import project.Util
|
||||
|
||||
spec =
|
||||
Test.group "Table.parse_values" <|
|
||||
Test.group "Table.parse" <|
|
||||
Test.specify "should correctly parse integers" <|
|
||||
t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]]]
|
||||
t2 = t1.parse_values type=Integer
|
||||
t2 = t1.parse type=Value_Type.Integer
|
||||
t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
|
||||
|
||||
Test.specify "should correctly parse decimals" <|
|
||||
t1 = Table.new [["ints", ["0", "+0", "-0", "+1", "-1", "1", "12345", Nothing]]]
|
||||
t2 = t1.parse_values type=Decimal
|
||||
t2 = t1.parse type=Value_Type.Float
|
||||
t2.at "ints" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 12345, Nothing]
|
||||
t2.at "ints" . to_vector . map .to_text . should_equal ["0.0", "0.0", "-0.0", "1.0", "-1.0", "1.0", "12345.0", "Nothing"]
|
||||
|
||||
t3 = Table.new [["floats", ["0.0", "+0.0", "-0.0", "+1.0", "-1.0", "1.0", "0.0000", "10.", "12345."]]]
|
||||
t4 = t3.parse_values type=Decimal
|
||||
t4 = t3.parse type=Value_Type.Float
|
||||
t4.at "floats" . to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345]
|
||||
|
||||
t5 = Table.new [["floats", [".0", "0.", "1.", ".1", ".123", "-.1", "+.1", "+0.0", "0.1234", Nothing, "11111111.111"]]]
|
||||
t6 = t5.parse_values type=Decimal
|
||||
t6 = t5.parse type=Value_Type.Float
|
||||
t6.at "floats" . to_vector . should_equal [0.0, 0.0, 1.0, 0.1, 0.123, -0.1, 0.1, 0.0, 0.1234, Nothing, 11111111.111]
|
||||
|
||||
Test.specify "should warn on leading zeros in numbers, if asked" <|
|
||||
@ -37,56 +37,56 @@ spec =
|
||||
|
||||
t1_parsed = [0, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 12345, Nothing]
|
||||
t1_zeros = ["+00", "-00", "+01", "-01", "01", "000", "0010"]
|
||||
t3 = t1.parse_values type=Integer
|
||||
t3 = t1.parse type=Value_Type.Integer
|
||||
t3.at "ints" . to_vector . should_equal t1_parsed
|
||||
Problems.get_attached_warnings t3 . should_equal [Leading_Zeros.Error "ints" Integer t1_zeros]
|
||||
Problems.get_attached_warnings t3 . should_equal [Leading_Zeros.Error "ints" Value_Type.Integer t1_zeros]
|
||||
|
||||
t4 = t1.parse_values type=Decimal
|
||||
t4 = t1.parse type=Value_Type.Float
|
||||
t4.at "ints" . to_vector . should_equal t1_parsed
|
||||
Problems.get_attached_warnings t4 . should_equal [Leading_Zeros.Error "ints" Decimal t1_zeros]
|
||||
Problems.get_attached_warnings t4 . should_equal [Leading_Zeros.Error "ints" Value_Type.Float t1_zeros]
|
||||
|
||||
t5 = t2.parse_values type=Decimal
|
||||
t5 = t2.parse type=Value_Type.Float
|
||||
t5.at "floats" . to_vector . should_equal [0.0, 0.0, Nothing, Nothing, Nothing, 1.0]
|
||||
Problems.get_attached_warnings t5 . should_equal [Leading_Zeros.Error "floats" Decimal ["00.", "01.0", '-0010.0000']]
|
||||
Problems.get_attached_warnings t5 . should_equal [Leading_Zeros.Error "floats" Value_Type.Float ["00.", "01.0", '-0010.0000']]
|
||||
|
||||
opts = Data_Formatter.Value allow_leading_zeros=True
|
||||
t1_parsed_zeros = [0, 0, 0, 1, -1, 1, 0, 10, 12345, Nothing]
|
||||
t6 = t1.parse_values format=opts type=Integer
|
||||
t6 = t1.parse format=opts type=Value_Type.Integer
|
||||
t6.at "ints" . to_vector . should_equal t1_parsed_zeros
|
||||
Problems.assume_no_problems t6
|
||||
|
||||
t7 = t1.parse_values format=opts type=Decimal
|
||||
t7 = t1.parse format=opts type=Value_Type.Float
|
||||
t7.at "ints" . to_vector . should_equal t1_parsed_zeros
|
||||
Problems.assume_no_problems t7
|
||||
|
||||
t8 = t2.parse_values format=opts type=Decimal
|
||||
t8 = t2.parse format=opts type=Value_Type.Float
|
||||
t8.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.0, -10.0, 1.0]
|
||||
Problems.assume_no_problems t8
|
||||
|
||||
Test.specify "should correctly parse booleans" <|
|
||||
t1 = Table.new [["bools", ["true", "false", "True", "TRUE", "FALSE", Nothing, "False"]]]
|
||||
t2 = t1.parse_values type=Boolean
|
||||
t2 = t1.parse type=Value_Type.Boolean
|
||||
t2.at "bools" . to_vector . should_equal [True, False, True, True, False, Nothing, False]
|
||||
|
||||
t3 = Table.new [["bools", ["1", "0", "true", "yes", "oui", "no", "NO!"]]]
|
||||
t4 = t3.parse_values type=Boolean format="yes|no"
|
||||
t4 = t3.parse type=Value_Type.Boolean format="yes|no"
|
||||
t4.at "bools" . to_vector . should_equal [Nothing, Nothing, Nothing, True, Nothing, False, Nothing]
|
||||
|
||||
Test.specify "should correctly parse date and time" <|
|
||||
t1 = Table.new [["dates", ["2022-05-07", "2000-01-01", "2010-12-31"]]]
|
||||
t2 = t1.parse_values type=Date
|
||||
t2 = t1.parse type=Value_Type.Date
|
||||
t2.at "dates" . to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
|
||||
|
||||
t3 = Table.new [["datetimes", ["2022-05-07 23:59:59", "2000-01-01 00:00:00", "2010-12-31 12:34:56"]]]
|
||||
t4 = t3.parse_values type=Date_Time
|
||||
t4 = t3.parse type=Value_Type.Date_Time
|
||||
t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56]
|
||||
|
||||
t5 = Table.new [["times", ["23:59:59", "00:00:00", "12:34:56"]]]
|
||||
t6 = t5.parse_values type=Time_Of_Day
|
||||
t6 = t5.parse type=Value_Type.Time
|
||||
t6.at "times" . to_vector . should_equal [Time_Of_Day.new 23 59 59, Time_Of_Day.new, Time_Of_Day.new 12 34 56]
|
||||
|
||||
t7 = Table.new [["dates", ["07/05/2022", "01/01/2001", "31/12/2010"]]]
|
||||
t8 = t7.parse_values type=Date format="dd/MM/yyyy"
|
||||
t8 = t7.parse type=Value_Type.Date format="dd/MM/yyyy"
|
||||
t8.at "dates" . value_type . should_equal Value_Type.Date
|
||||
t8.at "dates" . to_vector . should_equal [Date.new 2022 5 7, Date.new 2001 1 1, Date.new 2010 12 31]
|
||||
|
||||
@ -94,15 +94,15 @@ spec =
|
||||
opts = Data_Formatter.Value date_formats=["d.M.y", "d MMM y[ G]", "E, d MMM y"] datetime_formats=["yyyy-MM-dd'T'HH:mm:ss", "dd/MM/yyyy HH:mm"] time_formats=["H:mm:ss.n", "h:mma"]
|
||||
|
||||
t1 = Table.new [["dates", ["1.2.476", "10 Jan 1900 AD", "Tue, 3 Jun 2008"]]]
|
||||
t2 = t1.parse_values format=opts type=Date
|
||||
t2 = t1.parse format=opts type=Value_Type.Date
|
||||
t2.at "dates" . to_vector . should_equal [Date.new 476 2 1, Date.new 1900 1 10, Date.new 2008 6 3]
|
||||
|
||||
t3 = Table.new [["datetimes", ["2011-12-03T10:15:30", "31/12/2012 22:33"]]]
|
||||
t4 = t3.parse_values format=opts type=Date_Time
|
||||
t4 = t3.parse format=opts type=Value_Type.Date_Time
|
||||
t4.at "datetimes" . to_vector . should_equal [Date_Time.new 2011 12 3 10 15 30, Date_Time.new 2012 12 31 22 33]
|
||||
|
||||
t5 = Table.new [["times", ["1:02:03.987654321", "1:30PM"]]]
|
||||
t6 = t5.parse_values format=opts type=Time_Of_Day
|
||||
t6 = t5.parse format=opts type=Value_Type.Time
|
||||
t6.at "times" . to_vector . should_equal [Time_Of_Day.new 1 2 3 nanosecond=987654321, Time_Of_Day.new 13 30 0 0]
|
||||
|
||||
Test.specify "should warn when cells do not fit the expected format" <|
|
||||
@ -113,51 +113,51 @@ spec =
|
||||
times = ["2001-01-01", "2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]
|
||||
t = Table.new [ints, floats, bools, ["times", times]]
|
||||
|
||||
t0 = t.parse_values type=Boolean
|
||||
t0 = t.parse type=Value_Type.Boolean
|
||||
t0.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing]
|
||||
t0.at "ints" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
Problems.expect_warning (Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]) t0
|
||||
Problems.expect_warning (Invalid_Format.Error "ints" Boolean ["0", "1", "1.0", "foobar", "", "--1", "+-1", "10", "-+1"]) t0
|
||||
Problems.expect_warning (Invalid_Format.Error "bools" Value_Type.Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]) t0
|
||||
Problems.expect_warning (Invalid_Format.Error "ints" Value_Type.Boolean ["0", "1", "1.0", "foobar", "", "--1", "+-1", "10", "-+1"]) t0
|
||||
|
||||
a1 = t.parse_values columns=["ints"] type=Integer on_problems=_
|
||||
a1 = t.parse columns=["ints"] type=Value_Type.Integer on_problems=_
|
||||
t1 t =
|
||||
t.at "ints" . to_vector . should_equal [0, 1, Nothing, Nothing, Nothing, Nothing, Nothing, 10, Nothing]
|
||||
p1 = [Invalid_Format.Error "ints" Integer ["1.0", "foobar", "", "--1", "+-1", "-+1"]]
|
||||
p1 = [Invalid_Format.Error "ints" Value_Type.Integer ["1.0", "foobar", "", "--1", "+-1", "-+1"]]
|
||||
Problems.test_problem_handling a1 p1 t1
|
||||
|
||||
a2 = t.parse_values columns=["floats"] type=Decimal on_problems=_
|
||||
a2 = t.parse columns=["floats"] type=Value_Type.Float on_problems=_
|
||||
t2 t =
|
||||
t.at "floats" . to_vector . should_equal [0, 2, Nothing, Nothing, Nothing, Nothing, Nothing, 100, Nothing]
|
||||
p2 = [Invalid_Format.Error "floats" Decimal ["1e6", "foobar", "", "--1", "+-1", "-+1"]]
|
||||
p2 = [Invalid_Format.Error "floats" Value_Type.Float ["1e6", "foobar", "", "--1", "+-1", "-+1"]]
|
||||
Problems.test_problem_handling a2 p2 t2
|
||||
|
||||
a3 = t.parse_values columns=["bools"] type=Boolean on_problems=_
|
||||
a3 = t.parse columns=["bools"] type=Value_Type.Boolean on_problems=_
|
||||
t3 t =
|
||||
t.at "bools" . to_vector . should_equal [True, False, Nothing, Nothing, Nothing, Nothing, Nothing, True, Nothing]
|
||||
p3 = [Invalid_Format.Error "bools" Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]]
|
||||
p3 = [Invalid_Format.Error "bools" Value_Type.Boolean ["fAlSE", "foobar", "", "0", "1", "truefalse"]]
|
||||
Problems.test_problem_handling a3 p3 t3
|
||||
|
||||
a4 = t.parse_values columns=["times"] type=Date on_problems=_
|
||||
a4 = t.parse columns=["times"] type=Value_Type.Date on_problems=_
|
||||
t4 t =
|
||||
t.at "times" . to_vector . should_equal [Date.new 2001 1 1, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
p4 = [Invalid_Format.Error "times" Date ["2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]]
|
||||
p4 = [Invalid_Format.Error "times" Value_Type.Date ["2001-01-01 12:34:56", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]]
|
||||
Problems.test_problem_handling a4 p4 t4
|
||||
|
||||
a5 = t.parse_values columns=["times"] type=Date_Time on_problems=_
|
||||
a5 = t.parse columns=["times"] type=Value_Type.Date_Time on_problems=_
|
||||
t5 t =
|
||||
t.at "times" . to_vector . should_equal [Nothing, Date_Time.new 2001 1 1 12 34 56, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
p5 = [Invalid_Format.Error "times" Date_Time ["2001-01-01", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]]
|
||||
p5 = [Invalid_Format.Error "times" Value_Type.Date_Time ["2001-01-01", "10:00:10", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]]
|
||||
Problems.test_problem_handling a5 p5 t5
|
||||
|
||||
a6 = t.parse_values columns=["times"] type=Time_Of_Day on_problems=_
|
||||
a6 = t.parse columns=["times"] type=Value_Type.Time on_problems=_
|
||||
t6 t =
|
||||
t.at "times" . to_vector . should_equal [Nothing, Nothing, Time_Of_Day.new 10 0 10 0, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
p6 = [Invalid_Format.Error "times" Time_Of_Day ["2001-01-01", "2001-01-01 12:34:56", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]]
|
||||
p6 = [Invalid_Format.Error "times" Value_Type.Time ["2001-01-01", "2001-01-01 12:34:56", "Tuesday", "foobar", "", "10:99:99", "1/2/2003", "2001-30-10"]]
|
||||
Problems.test_problem_handling a6 p6 t6
|
||||
|
||||
Test.specify "should leave not selected columns unaffected" <|
|
||||
t1 = Table.new [["A", ["1", "2"]], ["B", ["3", "4"]]]
|
||||
t2 = t1.parse_values columns="B"
|
||||
t2 = t1.parse columns="B"
|
||||
t2.at "A" . to_vector . should_equal ["1", "2"]
|
||||
t2.at "B" . to_vector . should_equal [3, 4]
|
||||
|
||||
@ -174,7 +174,7 @@ spec =
|
||||
c10 = ["mixeddates", ["2022-10-01", "2000-01-01 01:02:03", "01:02:03", Nothing]]
|
||||
c11 = ["text+ints", ["1", "2", " foobar", Nothing]]
|
||||
t = Table.new [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11]
|
||||
t2 = t.parse_values
|
||||
t2 = t.parse
|
||||
|
||||
Problems.assume_no_problems t2
|
||||
t2.at "ints" . to_vector . should_equal [1, 2, -123, Nothing]
|
||||
@ -191,19 +191,19 @@ spec =
|
||||
t2.at "text+ints" . to_vector . should_equal ["1", "2", "foobar", Nothing]
|
||||
|
||||
# In Auto mode, integers take precedence over booleans.
|
||||
t3 = Table.new [["bools", ["1", "0", "True"]], ["ints", ["1", "0", "0"]]] . parse_values format=(Data_Formatter.Value true_values=["1", "True"] false_values=["0", "False"])
|
||||
t3 = Table.new [["bools", ["1", "0", "True"]], ["ints", ["1", "0", "0"]]] . parse format=(Data_Formatter.Value true_values=["1", "True"] false_values=["0", "False"])
|
||||
t3.at "bools" . to_vector . should_equal [True, False, True]
|
||||
t3.at "ints" . to_vector . should_equal [1, 0, 0]
|
||||
|
||||
t4 = Table.new [c2] . parse_values format=(Data_Formatter.Value allow_leading_zeros=True)
|
||||
t4 = Table.new [c2] . parse format=(Data_Formatter.Value allow_leading_zeros=True)
|
||||
t4 . at "ints0" . to_vector . should_equal [1, 2, Nothing, -1]
|
||||
|
||||
t5 = t.parse_values columns="ints" type=Decimal
|
||||
t5 = t.parse columns="ints" type=Value_Type.Float
|
||||
t5.at "ints" . to_vector . should_equal [1.0, 2.0, -123.0, Nothing]
|
||||
# `ints` are requested to be parsed as decimals.
|
||||
t5.at "ints" . to_vector . first . should_be_a Decimal
|
||||
|
||||
t6 = t.parse_values columns=["floats", "text+ints"] type=Auto
|
||||
t6 = t.parse columns=["floats", "text+ints"] type=Auto
|
||||
# `floats` are auto-detected as decimals.
|
||||
t6.at "floats" . to_vector . should_equal [1.0, 2.2, Nothing, -1.0]
|
||||
# `text+ints` is attempted to be parsed (hence whitespace is stripped), but it only fits the text type.
|
||||
@ -214,27 +214,27 @@ spec =
|
||||
Test.specify "should allow to specify a thousands separator and a custom decimal point" <|
|
||||
opts = Data_Formatter.Value decimal_point=',' thousand_separator='_'
|
||||
t1 = Table.new [["floats", ["0,0", "+0,0", "-0,0", "+1,5", "-1,2", "1,0", "0,0000", "10_000,", ",0"]]]
|
||||
t2 = t1.parse_values format=opts
|
||||
t2 = t1.parse format=opts
|
||||
t2.at "floats" . to_vector . should_equal [0.0, 0.0, 0.0, 1.5, -1.2, 1.0, 0.0, 10000.0, 0.0]
|
||||
|
||||
t3 = Table.new [["xs", ["1,2", "1.3", "_0", "0_", "1_0_0"]]]
|
||||
t4 = t3.parse_values format=opts type=Decimal
|
||||
t4 = t3.parse format=opts type=Value_Type.Float
|
||||
t4.at "xs" . to_vector . should_equal [1.2, Nothing, Nothing, Nothing, 100.0]
|
||||
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "xs" Decimal ["1.3", "_0", "0_"]]
|
||||
t5 = t3.parse_values format=opts type=Integer
|
||||
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "xs" Value_Type.Float ["1.3", "_0", "0_"]]
|
||||
t5 = t3.parse format=opts type=Value_Type.Integer
|
||||
t5.at "xs" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, 100]
|
||||
Problems.get_attached_warnings t5 . should_equal [Invalid_Format.Error "xs" Integer ["1,2", "1.3", "_0", "0_"]]
|
||||
Problems.get_attached_warnings t5 . should_equal [Invalid_Format.Error "xs" Value_Type.Integer ["1,2", "1.3", "_0", "0_"]]
|
||||
|
||||
Test.specify "should allow to specify custom values for booleans" <|
|
||||
opts_1 = Data_Formatter.Value true_values=["1", "YES"] false_values=["0"]
|
||||
t1 = Table.new [["bools", ["1", "0", "YES", "1", "0"]]]
|
||||
t2 = t1.parse_values format=opts_1
|
||||
t2 = t1.parse format=opts_1
|
||||
t2.at "bools" . to_vector . should_equal [True, False, True, True, False]
|
||||
|
||||
t3 = Table.new [["bools", ["1", "NO", "False", "True", "YES", "no", "oui", "0"]]]
|
||||
t4 = t3.parse_values format=opts_1 type=Boolean
|
||||
t4 = t3.parse format=opts_1 type=Value_Type.Boolean
|
||||
t4.at "bools" . to_vector . should_equal [True, Nothing, Nothing, Nothing, True, Nothing, Nothing, False]
|
||||
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "bools" Boolean ["NO", "False", "True", "no", "oui"]]
|
||||
Problems.get_attached_warnings t4 . should_equal [Invalid_Format.Error "bools" Value_Type.Boolean ["NO", "False", "True", "no", "oui"]]
|
||||
|
||||
whitespace_table =
|
||||
ints = ["ints", ["0", "1 ", "0 1", " 2"]]
|
||||
@ -246,62 +246,62 @@ spec =
|
||||
Table.new [ints, floats, bools, dates, datetimes, times]
|
||||
|
||||
Test.specify "should trim input values by default" <|
|
||||
t1 = whitespace_table.parse_values columns="ints" type=Integer
|
||||
t1 = whitespace_table.parse columns="ints" type=Value_Type.Integer
|
||||
t1.at "ints" . to_vector . should_equal [0, 1, Nothing, 2]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "ints" Integer ["0 1"]) t1
|
||||
Problems.expect_only_warning (Invalid_Format.Error "ints" Value_Type.Integer ["0 1"]) t1
|
||||
|
||||
t2 = whitespace_table.parse_values columns="floats" type=Decimal
|
||||
t2 = whitespace_table.parse columns="floats" type=Value_Type.Float
|
||||
t2.at "floats" . to_vector . should_equal [0.0, 2.0, Nothing, 10.0]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "floats" Decimal ["- 1"]) t2
|
||||
Problems.expect_only_warning (Invalid_Format.Error "floats" Value_Type.Float ["- 1"]) t2
|
||||
|
||||
t3 = whitespace_table.parse_values columns="bools" type=Boolean
|
||||
t3 = whitespace_table.parse columns="bools" type=Value_Type.Boolean
|
||||
t3.at "bools" . to_vector . should_equal [True, False, Nothing, False]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "bools" Boolean ["t rue"]) t3
|
||||
Problems.expect_only_warning (Invalid_Format.Error "bools" Value_Type.Boolean ["t rue"]) t3
|
||||
|
||||
t4 = whitespace_table.parse_values columns="dates" type=Date
|
||||
t4 = whitespace_table.parse columns="dates" type=Value_Type.Date
|
||||
t4.at "dates" . to_vector . should_equal [Date.new 2022 1 1, Date.new 2022 7 17, Nothing, Nothing]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "dates" Date ["2022 - 07 - 17", ""]) t4
|
||||
Problems.expect_only_warning (Invalid_Format.Error "dates" Value_Type.Date ["2022 - 07 - 17", ""]) t4
|
||||
|
||||
t5 = whitespace_table.parse_values columns="datetimes" type=Date_Time
|
||||
t5 = whitespace_table.parse columns="datetimes" type=Value_Type.Date_Time
|
||||
t5.at "datetimes" . to_vector . should_equal [Date_Time.new 2022 1 1 11 59, Nothing, Nothing, Nothing]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "datetimes" Date_Time ["2022 - 07 - 17 1:2:3", "2022-01-01 11:59:00"]) t5
|
||||
Problems.expect_only_warning (Invalid_Format.Error "datetimes" Value_Type.Date_Time ["2022 - 07 - 17 1:2:3", "2022-01-01 11:59:00"]) t5
|
||||
|
||||
t6 = whitespace_table.parse_values columns="times" type=Time_Of_Day
|
||||
t6 = whitespace_table.parse columns="times" type=Value_Type.Time
|
||||
t6.at "times" . to_vector . should_equal [Time_Of_Day.new 11 0 0, Time_Of_Day.new, Nothing, Nothing]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "times" Time_Of_Day ["00 : 00 : 00"]) t6
|
||||
Problems.expect_only_warning (Invalid_Format.Error "times" Value_Type.Time ["00 : 00 : 00"]) t6
|
||||
|
||||
Test.specify "should fail to parse if whitespace is present and trimming is turned off" <|
|
||||
opts = Data_Formatter.Value trim_values=False
|
||||
t1 = whitespace_table.parse_values format=opts columns="ints" type=Integer
|
||||
t1 = whitespace_table.parse format=opts columns="ints" type=Value_Type.Integer
|
||||
t1.at "ints" . to_vector . should_equal [0, Nothing, Nothing, Nothing]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "ints" Integer ["1 ", "0 1", " 2"]) t1
|
||||
Problems.expect_only_warning (Invalid_Format.Error "ints" Value_Type.Integer ["1 ", "0 1", " 2"]) t1
|
||||
|
||||
t2 = whitespace_table.parse_values format=opts columns="floats" type=Decimal
|
||||
t2 = whitespace_table.parse format=opts columns="floats" type=Value_Type.Float
|
||||
t2.at "floats" . to_vector . should_equal [Nothing, Nothing, Nothing, 10.0]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "floats" Decimal ["0 ", " 2.0", "- 1"]) t2
|
||||
Problems.expect_only_warning (Invalid_Format.Error "floats" Value_Type.Float ["0 ", " 2.0", "- 1"]) t2
|
||||
|
||||
t3 = whitespace_table.parse_values format=opts columns="bools" type=Boolean
|
||||
t3 = whitespace_table.parse format=opts columns="bools" type=Value_Type.Boolean
|
||||
t3.at "bools" . to_vector . should_equal [Nothing, Nothing, Nothing, False]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "bools" Boolean ["True ", " false", "t rue"]) t3
|
||||
Problems.expect_only_warning (Invalid_Format.Error "bools" Value_Type.Boolean ["True ", " false", "t rue"]) t3
|
||||
|
||||
t4 = whitespace_table.parse_values format=opts columns="dates" type=Date
|
||||
t4 = whitespace_table.parse format=opts columns="dates" type=Value_Type.Date
|
||||
t4.at "dates" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "dates" Date [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""]) t4
|
||||
Problems.expect_only_warning (Invalid_Format.Error "dates" Value_Type.Date [" 2022-01-01", "2022-07-17 ", "2022 - 07 - 17", ""]) t4
|
||||
|
||||
t5 = whitespace_table.parse_values format=opts columns="datetimes" type=Date_Time
|
||||
t5 = whitespace_table.parse format=opts columns="datetimes" type=Value_Type.Date_Time
|
||||
t5.at "datetimes" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "datetimes" Date_Time [" 2022-01-01 11:59:00 ", "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"]) t5
|
||||
Problems.expect_only_warning (Invalid_Format.Error "datetimes" Value_Type.Date_Time [" 2022-01-01 11:59:00 ", "2022 - 07 - 17 1:2:3 ", "2022-01-01 11:59:00"]) t5
|
||||
|
||||
t6 = whitespace_table.parse_values format=opts columns="times" type=Time_Of_Day
|
||||
t6 = whitespace_table.parse format=opts columns="times" type=Value_Type.Time
|
||||
t6.at "times" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing]
|
||||
Problems.expect_only_warning (Invalid_Format.Error "times" Time_Of_Day ["11:00:00 ", " 00:00:00", "00 : 00 : 00"]) t6
|
||||
Problems.expect_only_warning (Invalid_Format.Error "times" Value_Type.Time ["11:00:00 ", " 00:00:00", "00 : 00 : 00"]) t6
|
||||
|
||||
Test.specify "should fallback to text if whitespace is present and trimming is turned off" <|
|
||||
c1 = ["1", " +2", "-123", Nothing]
|
||||
c2 = [" 1.0 ", "2.2", Nothing, "-1.0"]
|
||||
c3 = ["true", " False", Nothing, "True"]
|
||||
t = Table.new [["ints", c1], ["floats", c2], ["bools", c3]]
|
||||
t2 = t.parse_values format=(Data_Formatter.Value trim_values=False)
|
||||
t2 = t.parse format=(Data_Formatter.Value trim_values=False)
|
||||
|
||||
Warning.get_all t2 . should_equal []
|
||||
t2.at "ints" . to_vector . should_equal c1
|
||||
@ -310,7 +310,7 @@ spec =
|
||||
|
||||
Test.specify "should allow selecting columns by regex" <|
|
||||
t1 = Table.new [["An", ["1", "2", "3"]], ["Am", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]]
|
||||
r1 = t1.parse_values columns=[Column_Selector.By_Name "A.*" use_regex=True]
|
||||
r1 = t1.parse columns=[Column_Selector.By_Name "A.*" use_regex=True]
|
||||
r1.at "An" . to_vector . should_equal [1, 2, 3]
|
||||
r1.at "Am" . to_vector . should_equal [4, 5, 6]
|
||||
r1.at "C" . to_vector . should_equal ["7", "8", "9"]
|
||||
@ -318,15 +318,15 @@ spec =
|
||||
|
||||
Test.specify "should correctly handle problems: missing input columns" <|
|
||||
t1 = Table.new [["A", ["1", "2", "3"]]]
|
||||
r1 = t1.parse_values columns=["A", "B", "C", "E"] on_problems=Problem_Behavior.Ignore
|
||||
r1 = t1.parse columns=["A", "B", "C", "E"] on_problems=Problem_Behavior.Ignore
|
||||
r1.should_fail_with Missing_Input_Columns
|
||||
r1.catch.criteria . should_equal ["B", "C", "E"]
|
||||
|
||||
r2 = t1.parse_values columns=[Column_Selector.By_Name "A.+" use_regex=True]
|
||||
r2 = t1.parse columns=[Column_Selector.By_Name "A.+" use_regex=True]
|
||||
r2.should_fail_with Missing_Input_Columns
|
||||
r2.catch.criteria . should_equal ["A.+"]
|
||||
|
||||
action = t1.parse_values columns=["A", "B", "C", "E"] error_on_missing_columns=False on_problems=_
|
||||
action = t1.parse columns=["A", "B", "C", "E"] error_on_missing_columns=False on_problems=_
|
||||
tester table =
|
||||
table.at "A" . to_vector . should_equal [1, 2, 3]
|
||||
problems = [Missing_Input_Columns.Error ["B", "C", "E"]]
|
||||
@ -334,11 +334,11 @@ spec =
|
||||
|
||||
Test.specify "should correctly handle problems: out of bounds indices" <|
|
||||
t1 = Table.new [["A", ["1", "2", "3"]]]
|
||||
r1 = t1.parse_values columns=[0, -1, 42, -5]
|
||||
r1 = t1.parse columns=[0, -1, 42, -5]
|
||||
r1.should_fail_with Column_Indexes_Out_Of_Range
|
||||
r1.catch.indexes . should_equal [42, -5]
|
||||
|
||||
action = t1.parse_values columns=[0, -1, 42, -5] error_on_missing_columns=False on_problems=_
|
||||
action = t1.parse columns=[0, -1, 42, -5] error_on_missing_columns=False on_problems=_
|
||||
tester table =
|
||||
table.at "A" . to_vector . should_equal [1, 2, 3]
|
||||
problems = [Column_Indexes_Out_Of_Range.Error [42, -5]]
|
||||
@ -346,7 +346,7 @@ spec =
|
||||
|
||||
Test.specify "should allow mixed column selectors" <|
|
||||
t1 = Table.new [["Am", ["1", "2", "3"]], ["B", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]]
|
||||
r1 = t1.parse_values columns=[(Column_Selector.By_Name "A.*" use_regex=True), -2, "D"]
|
||||
r1 = t1.parse columns=[(Column_Selector.By_Name "A.*" use_regex=True), -2, "D"]
|
||||
r1.at "Am" . to_vector . should_equal [1, 2, 3]
|
||||
r1.at "B" . to_vector . should_equal ["4", "5", "6"]
|
||||
r1.at "C" . to_vector . should_equal [7, 8, 9]
|
||||
@ -354,7 +354,7 @@ spec =
|
||||
|
||||
Test.specify "should handle edge-cases: overlapping selectors" <|
|
||||
t1 = Table.new [["Am", ["1", "2", "3"]], ["B", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]]
|
||||
r1 = t1.parse_values columns=[(Column_Selector.By_Name "A.*" use_regex=True), 0, "D", -1, -1, 0, 3]
|
||||
r1 = t1.parse columns=[(Column_Selector.By_Name "A.*" use_regex=True), 0, "D", -1, -1, 0, 3]
|
||||
r1.at "Am" . to_vector . should_equal [1, 2, 3]
|
||||
r1.at "B" . to_vector . should_equal ["4", "5", "6"]
|
||||
r1.at "C" . to_vector . should_equal ["7", "8", "9"]
|
||||
@ -362,29 +362,29 @@ spec =
|
||||
|
||||
Test.specify "should error if invalid target type is provided" <|
|
||||
t1 = Table.new [["A", ["1", "2", "3"]]]
|
||||
t1.parse_values type=Nothing . should_fail_with Illegal_Argument
|
||||
t1.parse type=Nothing . should_fail_with Illegal_Argument
|
||||
|
||||
Test.specify "should error if the input column is not text" <|
|
||||
t1 = Table.new [["A", [1, 2, 3]], ["B", ["4", "5", "6"]], ["C", [7, 8, 9]], ["D", ["10", "11", "12"]]]
|
||||
r1 = t1.parse_values columns=["A", "B", "C"]
|
||||
r1 = t1.parse columns=["A", "B", "C"]
|
||||
r1.should_fail_with Invalid_Value_Type
|
||||
r1.catch.related_column . should_equal "A"
|
||||
r1.catch.expected.is_text.should_be_true
|
||||
|
||||
Test.specify "should error if no input columns selected, unless error_on_missing_columns=False" <|
|
||||
t1 = Table.new [["A", ["1", "2", "3"]]]
|
||||
r1 = t1.parse_values columns=[]
|
||||
r1 = t1.parse columns=[]
|
||||
r1.should_fail_with No_Input_Columns_Selected
|
||||
|
||||
r2 = t1.parse_values columns=[] error_on_missing_columns=False
|
||||
r2 = t1.parse columns=[] error_on_missing_columns=False
|
||||
r2 . should_equal t1
|
||||
Problems.expect_warning No_Input_Columns_Selected r2
|
||||
|
||||
r3 = t1.parse_values columns=[] error_on_missing_columns=False on_problems=Problem_Behavior.Ignore
|
||||
r3 = t1.parse columns=[] error_on_missing_columns=False on_problems=Problem_Behavior.Ignore
|
||||
r3 . should_equal t1
|
||||
Problems.assume_no_problems r3
|
||||
|
||||
r4 = t1.parse_values columns=["nonexistent column :D", -42] error_on_missing_columns=False on_problems=Problem_Behavior.Report_Warning
|
||||
r4 = t1.parse columns=["nonexistent column :D", -42] error_on_missing_columns=False on_problems=Problem_Behavior.Report_Warning
|
||||
r4 . should_equal t1
|
||||
Problems.expect_warning No_Input_Columns_Selected r4
|
||||
Problems.expect_warning (Missing_Input_Columns.Error ["nonexistent column :D"]) r4
|
||||
@ -393,87 +393,93 @@ spec =
|
||||
Test.group "Column.parse" <|
|
||||
Test.specify "should correctly parse integers" <|
|
||||
c1 = Column.from_vector "ints" ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]
|
||||
c2 = c1.parse Integer
|
||||
c2 = c1.parse type=Value_Type.Integer
|
||||
c2.name.should_equal c1.name
|
||||
c2 . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
|
||||
c2.value_type.should_equal Value_Type.Integer
|
||||
Problems.expect_warning Leading_Zeros c2
|
||||
|
||||
c3 = c1.parse Integer format=(Data_Formatter.Value.with_number_formatting allow_leading_zeros=True)
|
||||
c3 = c1.parse type=Value_Type.Integer format=(Data_Formatter.Value.with_number_formatting allow_leading_zeros=True)
|
||||
c3.to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345, Nothing]
|
||||
Problems.assume_no_problems c3
|
||||
|
||||
Test.specify "should correctly parse decimals" <|
|
||||
c1 = Column.from_vector "ints" ["0", "+0", "-0", "+1", "-1", "1", "000", "0010", "12345", Nothing]
|
||||
c2 = c1.parse Decimal
|
||||
c2 = c1.parse Value_Type.Float
|
||||
c2.name.should_equal c1.name
|
||||
c2 . to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
|
||||
c2.to_vector . should_equal [0, 0, 0, 1, -1, 1, Nothing, Nothing, 12345, Nothing]
|
||||
c2.value_type.should_equal Value_Type.Float
|
||||
c2.to_vector . map .to_text . should_equal ["0.0", "0.0", "-0.0", "1.0", "-1.0", "1.0", "Nothing", "Nothing", "12345.0", "Nothing"]
|
||||
Problems.expect_warning Leading_Zeros c2
|
||||
|
||||
c3 = Column.from_vector "floats" ["0.0", "+0.0", "-0.0", "+1.0", "-1.0", "1.0", "0.0000", "10.", "12345."]
|
||||
c4 = c3.parse Decimal
|
||||
c4 = c3.parse Value_Type.Float
|
||||
c4.to_vector . should_equal [0, 0, 0, 1, -1, 1, 0, 10, 12345]
|
||||
c4.value_type.is_floating_point.should_be_true
|
||||
Problems.assume_no_problems c4
|
||||
|
||||
c5 = Column.from_vector "floats" [".0", "0.", "1.", ".1", ".123", "-.1", "+.1", "+0.0", "0.1234", Nothing, "11111111.111"]
|
||||
c6 = c5.parse Decimal
|
||||
c6 = c5.parse Value_Type.Float
|
||||
c6.to_vector . should_equal [0.0, 0.0, 1.0, 0.1, 0.123, -0.1, 0.1, 0.0, 0.1234, Nothing, 11111111.111]
|
||||
Problems.assume_no_problems c6
|
||||
|
||||
Test.specify "should correctly parse booleans" <|
|
||||
c1 = Column.from_vector "bools" ["true", "false", "True", "TRUE", "FALSE", Nothing, "False"]
|
||||
c2 = c1.parse Boolean
|
||||
c2 = c1.parse type=Value_Type.Boolean
|
||||
c2.name.should_equal c1.name
|
||||
c2.to_vector . should_equal [True, False, True, True, False, Nothing, False]
|
||||
c2.value_type.should_equal Value_Type.Boolean
|
||||
c1.parse . to_vector . should_equal [True, False, True, True, False, Nothing, False]
|
||||
|
||||
c3 = Column.from_vector "bools" ["yes", "no", Nothing]
|
||||
c4 = c3.parse Boolean "yes|no"
|
||||
c4 = c3.parse type=Value_Type.Boolean "yes|no"
|
||||
c4.to_vector . should_equal [True, False, Nothing]
|
||||
|
||||
c5 = Column.from_vector "bools" ["true", "yes", "false"]
|
||||
c6 = c5.parse Boolean
|
||||
c6 = c5.parse type=Value_Type.Boolean
|
||||
c6.to_vector . should_equal [True, Nothing, False]
|
||||
w = Problems.get_attached_warnings c6 . find w-> w.is_a Invalid_Format
|
||||
w.column.should_equal "bools"
|
||||
w.datatype . should_equal Boolean
|
||||
w.value_type . should_equal Value_Type.Boolean
|
||||
w.cells . should_equal ["yes"]
|
||||
|
||||
Test.specify "should correctly parse date and time" <|
|
||||
c1 = Column.from_vector "date" ["2022-05-07", "2000-01-01", "2010-12-31"]
|
||||
c2 = c1.parse Date
|
||||
c2 = c1.parse type=Value_Type.Date
|
||||
c2.to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
|
||||
c2.value_type.should_equal Value_Type.Date
|
||||
|
||||
c3 = Column.from_vector "datetimes" ["2022-05-07 23:59:59", "2000-01-01 00:00:00", "2010-12-31 12:34:56", "2010-12-31T12:34:56", "2010-12-31 12:34:56.123"]
|
||||
c4 = c3.parse Date_Time
|
||||
c4 = c3.parse type=Value_Type.Date_Time
|
||||
c4.to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56, Date_Time.new 2010 12 31 12 34 56, Date_Time.new 2010 12 31 12 34 56 123]
|
||||
c4.value_type.should_equal Value_Type.Date_Time
|
||||
|
||||
c5 = Column.from_vector "times" ["23:59:59", "00:00:00", "12:34:56"]
|
||||
c6 = c5.parse Time_Of_Day
|
||||
c6 = c5.parse type=Value_Type.Time
|
||||
c6.to_vector . should_equal [Time_Of_Day.new 23 59 59, Time_Of_Day.new, Time_Of_Day.new 12 34 56]
|
||||
c6.value_type.should_equal Value_Type.Time
|
||||
|
||||
c7 = Column.from_vector "foo" ["2022-05-07 23:59:59", "42", "2010-12-31"]
|
||||
c8 = c7.parse Date_Time . to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Nothing, Nothing]
|
||||
c8 = c7.parse type=Value_Type.Date_Time . to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Nothing, Nothing]
|
||||
w = Problems.get_attached_warnings c8 . find w-> w.is_a Invalid_Format
|
||||
w.column.should_equal "foo"
|
||||
w.datatype . should_equal Date_Time
|
||||
w.value_type . should_equal Value_Type.Date_Time
|
||||
w.cells . should_equal ["42", "2010-12-31"]
|
||||
|
||||
Test.specify "should correctly parse date and time with format" <|
|
||||
c1 = Column.from_vector "date" ["5/7/2022", "1/1/2000", "12/31/2010"]
|
||||
c2 = c1.parse Date "M/d/yyyy"
|
||||
c2 = c1.parse type=Value_Type.Date "M/d/yyyy"
|
||||
c2.to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
|
||||
|
||||
c3 = Column.from_vector "datetimes" ["5/7/2022 23:59:59", "1/1/2000 00:00:00", "12/31/2010 12:34:56"]
|
||||
c4 = c3.parse Date_Time "M/d/yyyy HH:mm:ss"
|
||||
c4 = c3.parse type=Value_Type.Date_Time "M/d/yyyy HH:mm:ss"
|
||||
c4.to_vector . should_equal [Date_Time.new 2022 5 7 23 59 59, Date_Time.new 2000 1 1, Date_Time.new 2010 12 31 12 34 56]
|
||||
|
||||
Test.specify "should handle invalid format strings gracefully" <|
|
||||
c1 = Column.from_vector "date" ["5/7/2022", "1/1/2000", "12/31/2010"]
|
||||
c1.parse Date "M/d/fqsrf" . should_fail_with Illegal_Argument
|
||||
c1.parse Time_Of_Day "HH:mm:ss.fff" . should_fail_with Illegal_Argument
|
||||
c1.parse Date_Time "M/d/fqsrf HH:mm:ss.fff" . should_fail_with Illegal_Argument
|
||||
c1.parse type=Value_Type.Date "M/d/fqsrf" . should_fail_with Illegal_Argument
|
||||
c1.parse type=Value_Type.Time "HH:mm:ss.fff" . should_fail_with Illegal_Argument
|
||||
c1.parse type=Value_Type.Date_Time "M/d/fqsrf HH:mm:ss.fff" . should_fail_with Illegal_Argument
|
||||
|
||||
Test.specify "should correctly work in Auto mode" <|
|
||||
c1 = Column.from_vector "A" ["1", "2", "3"]
|
||||
@ -484,21 +490,26 @@ spec =
|
||||
c6 = Column.from_vector "F" ["this is here to ensure the column has type text... can be replaced one we have retyping"]
|
||||
c7 = Column.from_vector "G" ["true", "42"]
|
||||
c8 = Column.from_vector "H" ["text-to-force-value-type-to-be-text", Nothing, Nothing, Nothing]
|
||||
c8.value_type . should_equal Value_Type.Char
|
||||
|
||||
r1 = c1.parse
|
||||
r1.to_vector . should_equal [1, 2, 3]
|
||||
r1.value_type.should_equal Value_Type.Integer
|
||||
Problems.assume_no_problems r1
|
||||
|
||||
r2 = c2.parse
|
||||
r2.to_vector . should_equal [1.0, 2.5, 3.0]
|
||||
r2.value_type.should_equal Value_Type.Float
|
||||
Problems.assume_no_problems r2
|
||||
|
||||
r3 = c3.parse
|
||||
r3.to_vector . should_equal [Date.new 2022 5 7, Date.new 2000 1 1, Date.new 2010 12 31]
|
||||
r3.value_type.should_equal Value_Type.Date
|
||||
Problems.assume_no_problems r3
|
||||
|
||||
r4 = c4.parse
|
||||
r4.to_vector . should_equal [True, False, Nothing]
|
||||
r4.value_type.should_equal Value_Type.Boolean
|
||||
Problems.assume_no_problems r4
|
||||
|
||||
r5 = c5.parse
|
||||
@ -508,25 +519,27 @@ spec =
|
||||
c5.parse format="yes|no" . should_fail_with Illegal_Argument
|
||||
r5_2 = c5.parse format=(Data_Formatter.Value.with_boolean_values ["yes"] ["no"])
|
||||
r5_2.to_vector . should_equal [True, False]
|
||||
r5_2.value_type . should_equal Value_Type.Boolean
|
||||
Problems.assume_no_problems r5_2
|
||||
|
||||
r6 = (c6.drop 1).parse
|
||||
r6.to_vector . should_equal []
|
||||
Test.with_clue "r6.value_type == "+r6.value_type.to_text+"; " <|
|
||||
r6.value_type.is_text . should_be_true
|
||||
r6.value_type . should_equal Value_Type.Char
|
||||
Problems.assume_no_problems r6
|
||||
|
||||
r7 = c7.parse
|
||||
r7.to_vector . should_equal ["true", "42"]
|
||||
r7.value_type . should_equal Value_Type.Char
|
||||
Problems.assume_no_problems r7
|
||||
|
||||
r8 = c8.drop 1 . parse
|
||||
r8.value_type . should_equal Value_Type.Char
|
||||
r8.to_vector . should_equal [Nothing, Nothing, Nothing]
|
||||
Problems.assume_no_problems r8
|
||||
|
||||
Test.specify "should error if invalid target type is provided" <|
|
||||
c1 = Column.from_vector "A" ["1", "2", "3"]
|
||||
c1.parse Nothing . should_fail_with Illegal_Argument
|
||||
c1.parse type=Nothing . should_fail_with Illegal_Argument
|
||||
|
||||
Test.specify "should error if the input column is not text" <|
|
||||
c1 = Column.from_vector "A" [1, 2, 3]
|
||||
|
Loading…
Reference in New Issue
Block a user