From 8da06309e99f24d7062df5638f49e000fb292eff Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 9 Jul 2024 22:04:08 +0100 Subject: [PATCH] Date Time Pickers, Temporarily Disable Encoding.default (#10493) - Widgets for Date_Time, Time_Of_Day and Time_Zone. - Disable Encoding.default for now as big performance impact on CSVs. ![image](https://github.com/enso-org/enso/assets/4699705/c1b936f0-3ab4-490c-8fe5-2310ef1ed079) ![image](https://github.com/enso-org/enso/assets/4699705/d5e29ec4-cc52-41e5-a532-17cd6dff34b9) ![image](https://github.com/enso-org/enso/assets/4699705/61455519-ea63-4275-9c7a-603714ff9f85) ![image](https://github.com/enso-org/enso/assets/4699705/48ccd3ad-5e15-49f9-87cd-4710ca559843) --- .../Base/0.0.0-dev/src/Data/Locale.enso | 2 +- .../0.0.0-dev/src/Data/Text/Encoding.enso | 3 ++- .../0.0.0-dev/src/Data/Time/Date_Time.enso | 12 ++++++++++ .../0.0.0-dev/src/Data/Time/Time_Of_Day.enso | 7 ++++++ .../0.0.0-dev/src/Data/Time/Time_Zone.enso | 22 +++++++++++++++++-- .../main/java/org/enso/base/Time_Utils.java | 4 ++++ test/Base_Tests/src/Data/Json_Spec.enso | 2 +- .../src/Data/Text/Encoding_Spec.enso | 2 +- .../src/IO/Delimited_Read_Spec.enso | 10 ++++++--- .../src/IO/Delimited_Write_Spec.enso | 2 +- 10 files changed, 56 insertions(+), 10 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Locale.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Locale.enso index 52b5bf0acf..09aea22a60 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Locale.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Locale.enso @@ -460,7 +460,7 @@ type Locale to_display_text self = "Locale(" + self.to_text + ")" ## PRIVATE - Gets the default drop down option for this encoding. + Gets the default drop down option for Locale. default_widget : Widget default_widget = Widget.Single_Choice values=Locale.widget_options display=Display.When_Modified diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Encoding.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Encoding.enso index edeb5df713..6d0e62852c 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Encoding.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Encoding.enso @@ -71,7 +71,8 @@ type Encoding default -> Encoding = # This factory method is used to publicly expose the `Default` constructor. # The constructor itself has to be private, because we want to make `Value` constructor private, but all constructors must have the same privacy. - Encoding.Default + # ToDo: This is a workaround for performance issue. + Encoding.utf_8 ## PRIVATE A default encoding that will try to guess the encoding based on some heuristics. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Time.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Time.enso index c232c067fd..fce4ede75b 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Time.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Time.enso @@ -24,7 +24,9 @@ import project.Nothing.Nothing import project.Panic.Panic import project.Warning.Warning from project.Data.Boolean import Boolean, False, True +from project.Data.Time.Date import make_day_picker from project.Data.Text.Extensions import all +from project.Metadata import Display, Widget from project.Widget_Helpers import make_date_time_format_selector polyglot java import java.lang.ArithmeticException @@ -148,6 +150,16 @@ type Date_Time from Standard.Base import Date_Time, Time_Zone example_new = Date_Time.new 1986 8 5 + @year (Widget.Numeric_Input display=Display.Always) + @month (Widget.Numeric_Input minimum=1 maximum=12 display=Display.Always) + @day make_day_picker + @hour (Widget.Numeric_Input minimum=0 maximum=23 display=Display.Always) + @minute (Widget.Numeric_Input minimum=0 maximum=59 display=Display.Always) + @second (Widget.Numeric_Input minimum=0 maximum=59 display=Display.When_Modified) + @millisecond (Widget.Numeric_Input minimum=0 maximum=999 display=Display.When_Modified) + @microsecond (Widget.Numeric_Input minimum=0 maximum=999 display=Display.When_Modified) + @nanosecond (Widget.Numeric_Input minimum=0 maximum=999 display=Display.When_Modified) + @zone Time_Zone.default_widget new : Integer -> Integer -> Integer -> Integer -> Integer -> Integer -> Integer -> Integer -> Integer -> Time_Zone -> Date_Time ! Time_Error new year (month = 1) (day = 1) (hour = 0) (minute = 0) (second = 0) (millisecond = 0) (microsecond = 0) (nanosecond = 0) (zone = Time_Zone.system) = total_nanoseconds = nanosecond + microsecond * 1000 + millisecond * 1000000 diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Of_Day.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Of_Day.enso index d6eb7110b5..7397e45117 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Of_Day.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Of_Day.enso @@ -20,6 +20,7 @@ import project.Nothing.Nothing import project.Panic.Panic from project.Data.Boolean import Boolean, False, True from project.Data.Text.Extensions import all +from project.Metadata import Display, Widget from project.Widget_Helpers import make_time_format_selector polyglot java import java.lang.Exception as JException @@ -92,6 +93,12 @@ type Time_Of_Day from Standard.Base import Time_Of_Day example_epoch = Time_Of_Day.new hour=9 minute=30 + @hour (Widget.Numeric_Input minimum=0 maximum=23 display=Display.Always) + @minute (Widget.Numeric_Input minimum=0 maximum=59 display=Display.Always) + @second (Widget.Numeric_Input minimum=0 maximum=59 display=Display.When_Modified) + @millisecond (Widget.Numeric_Input minimum=0 maximum=999 display=Display.When_Modified) + @microsecond (Widget.Numeric_Input minimum=0 maximum=999 display=Display.When_Modified) + @nanosecond (Widget.Numeric_Input minimum=0 maximum=999 display=Display.When_Modified) new : Integer -> Integer -> Integer -> Integer -> Integer -> Integer -> Time_Of_Day ! Time_Error new (hour = 0) (minute = 0) (second = 0) (millisecond = 0) (microsecond = 0) (nanosecond = 0) = total_nanoseconds = nanosecond + microsecond * 1000 + millisecond * 1000000 diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Zone.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Zone.enso index 8d6caf78bd..00f9137cc9 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Zone.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Zone.enso @@ -3,11 +3,14 @@ import project.Data.Json.JS_Object import project.Data.Numbers.Integer import project.Data.Text.Text import project.Data.Time.Date_Time.Date_Time +import project.Data.Vector.Vector import project.Error.Error import project.Errors.Illegal_Argument.Illegal_Argument import project.Errors.Time_Error.Time_Error import project.Panic.Panic from project.Data.Boolean import Boolean, False, True +from project.Metadata import Display, make_single_choice, Widget +from project.Metadata.Choice import Option polyglot java import java.lang.Exception as JException polyglot java import java.time.ZoneId @@ -107,8 +110,11 @@ type Time_Zone from Standard.Base.Time.Time_Zone import Time_Zone example_new = Time_Zone.new 1 1 50 + @hours (Widget.Numeric_Input minimum=-18 maximum=18 display=Display.Always) + @minutes (Widget.Numeric_Input minimum=-59 maximum=59 display=Display.When_Modified) + @seconds (Widget.Numeric_Input minimum=-59 maximum=59 display=Display.When_Modified) new : Integer -> Integer -> Integer -> Time_Zone - new (hours = 0) (minutes = 0) (seconds = 0) = + new (hours:Integer = 0) (minutes:Integer = 0) (seconds:Integer = 0) = new_builtin hours minutes seconds ## ALIAS time zone from text @@ -147,8 +153,9 @@ type Time_Zone from Standard.Base import Time_Zone example_parse = Time_Zone.parse "+03:02:01" + @id (make_single_choice Time_Zone.zone_names Display.Always) parse : Text -> Time_Zone ! Time_Error - parse id = + parse id:Text = Panic.catch JException handler=(catch -> Error.throw (Time_Error.Error catch.payload.getMessage)) <| parse_builtin id @@ -194,6 +201,17 @@ type Time_Zone to_display_text : Text to_display_text self = self.to_text + ## PRIVATE + Gets the default drop down option for Time_Zone. + default_widget : Widget + default_widget = + options = [Option "System" "Time_Zone.system", Option "Local" "Time_Zone.local", Option "UTC" "Time_Zone.utc", Option "Named" "(Time_Zone.parse 'UTC')", Option "custom" "(Time_Zone.new 1 0 0)"] + Widget.Single_Choice values=options display=Display.When_Modified + + ## Gets a list of all the time zone names that are predefined. + zone_names : Vector Text + zone_names = Time_Utils.getZoneNames + ## PRIVATE Time_Zone.from (that:JS_Object) = if that.get "type" == "Time_Zone" && ["id"].all that.contains_key then Time_Zone.parse (that.get "id") else diff --git a/std-bits/base/src/main/java/org/enso/base/Time_Utils.java b/std-bits/base/src/main/java/org/enso/base/Time_Utils.java index 35ddf787fc..053baf0413 100644 --- a/std-bits/base/src/main/java/org/enso/base/Time_Utils.java +++ b/std-bits/base/src/main/java/org/enso/base/Time_Utils.java @@ -246,4 +246,8 @@ public class Time_Utils { LocalDate baseDate = LocalDate.of(minYear, 1, 1); builder.appendValueReduced(yearField, 2, 2, baseDate); } + + public static String[] getZoneNames() { + return ZoneId.getAvailableZoneIds().stream().toArray(String[]::new); + } } diff --git a/test/Base_Tests/src/Data/Json_Spec.enso b/test/Base_Tests/src/Data/Json_Spec.enso index 2dfd4b1c2b..59f7c2cc96 100644 --- a/test/Base_Tests/src/Data/Json_Spec.enso +++ b/test/Base_Tests/src/Data/Json_Spec.enso @@ -117,7 +117,7 @@ add_specs suite_builder = '{"type":"Date_Time","constructor":"new","year":2023,"month":9,"day":29,"hour":11,"second":52}'.should_parse_as (JS_Object.from_pairs [["type", "Date_Time"], ["constructor", "new"], ["year", 2023], ["month", 9], ["day", 29], ["hour", 11], ["second", 52]]) '{"type":"Date_Time","constructor":"new","year":2023,"month":9,"day":29,"hour":11,"minute":52,"nanosecond":572104300}'.should_parse_as (JS_Object.from_pairs [["type", "Date_Time"], ["constructor", "new"], ["year", 2023], ["month", 9], ["day", 29], ["hour", 11], ["minute", 52], ["nanosecond", 572104300]]) - group_builder.specify "should be able to read a JSON file with a BOM indicating UTF-16 encoding" <| + group_builder.specify "should be able to read a JSON file with a BOM indicating UTF-16 encoding" pending="Encoding.default turned off temporarily" <| utf_16_le_bom = [-1, -2] bytes = utf_16_le_bom + ("{}".bytes Encoding.utf_16_le) f = File.create_temporary_file "json-with-bom" ".json" diff --git a/test/Base_Tests/src/Data/Text/Encoding_Spec.enso b/test/Base_Tests/src/Data/Text/Encoding_Spec.enso index 16e14c904d..fc917137f1 100644 --- a/test/Base_Tests/src/Data/Text/Encoding_Spec.enso +++ b/test/Base_Tests/src/Data/Text/Encoding_Spec.enso @@ -68,7 +68,7 @@ add_specs suite_builder = default_warning.should_equal invalid_ascii_out Problems.get_attached_warnings default_warning . should_contain_the_same_elements_as problems - suite_builder.group "Default Encoding" group_builder-> + suite_builder.group "Default Encoding" pending="Encoding.default turned off temporarily" group_builder-> group_builder.specify "should try reading as UTF-8 by default" <| bytes = [65, -60, -123, -60, -103] # A ą ę diff --git a/test/Table_Tests/src/IO/Delimited_Read_Spec.enso b/test/Table_Tests/src/IO/Delimited_Read_Spec.enso index e805ffc614..4824a7bc77 100644 --- a/test/Table_Tests/src/IO/Delimited_Read_Spec.enso +++ b/test/Table_Tests/src/IO/Delimited_Read_Spec.enso @@ -475,7 +475,7 @@ add_specs suite_builder = Delimited_Format.Delimited ',' . with_line_endings Line_Ending_Style.Unix . should_equal (Delimited_Format.Delimited ',' line_endings=Line_Ending_Style.Unix) utf_16_le_bom = [-1, -2] - group_builder.specify "(in default mode) should detect UTF-16 encoding if BOM is present" <| + group_builder.specify "(in default mode) should detect UTF-16 encoding if BOM is present" pending="Encoding.default turned off temporarily" <| bytes = utf_16_le_bom + ('a,b\n1,2'.bytes Encoding.utf_16_le) f = File.create_temporary_file "delimited-utf-16-bom" ".csv" bytes.write_bytes f . should_succeed @@ -485,7 +485,7 @@ add_specs suite_builder = # No hidden BOM in the column name table.column_names.first.utf_8 . should_equal [97] - group_builder.specify "(in default mode) should skip UTF-8 BOM if it was present" <| + group_builder.specify "(in default mode) should skip UTF-8 BOM if it was present" pending="Encoding.default turned off temporarily" <| utf_8_bom = [-17, -69, -65] bytes = utf_8_bom + ('a,b\n1,2'.bytes Encoding.utf_8) f = File.create_temporary_file "delimited-utf-8-bom" ".csv" @@ -506,6 +506,10 @@ add_specs suite_builder = # The first column name now contains this invalid character, because it wasn't a BOM r.column_names.first . should_equal "￾a" + group_builder.specify "if UTF-16 encoding was selected but an inverted BOM is detected, a warning is issued (pt 2)" pending="Encoding.default turned off temporarily" <| + bytes = utf_16_le_bom + ('a,b\n1,2'.bytes Encoding.utf_16_be) + f = File.create_temporary_file "delimited-utf-16-inverted-bom" ".csv" + # If we read without specifying the encoding, we will infer UTF-16 LE encoding because of the BOM and get garbage: r2 = f.read r2.column_names . should_equal ["Column 1"] @@ -523,7 +527,7 @@ add_specs suite_builder = r.first_column.to_vector . should_equal ['\uFFFD'] Problems.expect_only_warning Encoding_Error r - group_builder.specify "should fall back to Windows-1252 encoding if invalid UTF-8 characters are encountered in Default encoding" <| + group_builder.specify "should fall back to Windows-1252 encoding if invalid UTF-8 characters are encountered in Default encoding" pending="Encoding.default turned off temporarily" <| f = File.create_temporary_file "delimited-invalid-utf-8" ".csv" # On the simple characters all three encodings (ASCII, UTF-8 and Win-1252) agree, so we can use ASCII bytes. bytes = ('A,B\n1,y'.bytes Encoding.ascii) + [-1] + ('z\n2,-'.bytes Encoding.ascii) diff --git a/test/Table_Tests/src/IO/Delimited_Write_Spec.enso b/test/Table_Tests/src/IO/Delimited_Write_Spec.enso index 855ef66551..41424d0470 100644 --- a/test/Table_Tests/src/IO/Delimited_Write_Spec.enso +++ b/test/Table_Tests/src/IO/Delimited_Write_Spec.enso @@ -569,7 +569,7 @@ add_specs suite_builder = ## If the Delimited config has Encoding.default, the encoding for read will be determined by BOM and Win-1252 fallback heuristics. The same encoding should be used for writing, to ensure that when the resulting file is read, all content is correctly decoded. - group_builder.specify "should use the same effective encoding for writing as the one that would be used for reading" <| + group_builder.specify "should use the same effective encoding for writing as the one that would be used for reading" pending="Encoding.default turned off temporarily" <| f = File.create_temporary_file "append-detect" ".csv" Test.with_clue "UTF-16 detected by BOM: " <| bom = [-1, -2]