From 88f32d9b2aebcde940db9e430a6dbf64a80450b8 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Sun, 23 Jul 2023 09:04:11 +0000 Subject: [PATCH] Various small tickets... (#7367) - Added `Text.length` into Text class so CB lists the built in. - Added `File.starts_with` and tests for the built in method. - Add `to_js_object` and `to_display_text` to `Regex`. ![image](https://github.com/enso-org/enso/assets/4699705/3b197c94-9c49-4bc5-a2cc-ce53b917942e) - Add `to_js_object` and `to_display_text` to `Match`. ![image](https://github.com/enso-org/enso/assets/4699705/962ec4f2-324d-4f10-8ec0-932b093c6729) - Remove the `bit_shift_l` alias from the built-ins. - Add test and Enso wrapper for `Text.is_normalized`. --- .../Base/0.0.0-dev/src/Data/Numbers.enso | 2 +- .../Base/0.0.0-dev/src/Data/Text.enso | 25 ++++++++++++++ .../Base/0.0.0-dev/src/Data/Text/Regex.enso | 12 +++++++ .../0.0.0-dev/src/Data/Text/Regex/Match.enso | 12 +++++++ .../Base/0.0.0-dev/src/System/File.enso | 34 ++++++++++++------- .../Database/0.0.0-dev/src/Data/Table.enso | 2 +- .../number/bigInteger/BitShiftNode.java | 6 +--- .../number/smallInteger/BitShiftNode.java | 6 +--- test/Tests/src/Data/Text_Spec.enso | 6 ++++ test/Tests/src/Data/Time/Date_Time_Spec.enso | 5 +-- test/Tests/src/Semantic/Meta_Spec.enso | 4 +-- test/Tests/src/System/File_Spec.enso | 13 +++++++ 12 files changed, 99 insertions(+), 28 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso index fcb59c187f7..1335ad9fa73 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso @@ -1064,7 +1064,7 @@ type Integer 1.bit_shift_l 4 bit_shift_l : Integer -> Integer ! Arithmetic_Error - bit_shift_l self that = @Builtin_Method "Integer.bit_shift_l" + bit_shift_l self that = self.bit_shift that ## Performs a right-wise bit shift on the bits of this. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text.enso index 531fb8b595c..1af454b2156 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text.enso @@ -1,5 +1,6 @@ import project.Any.Any import project.Data.Locale.Locale +import project.Data.Numbers.Integer import project.Data.Ordering.Ordering import project.Error.Error import project.Errors.Common.Type_Error @@ -18,6 +19,20 @@ polyglot java import org.enso.base.Text_Utils users with efficient concatenation operations. @Builtin_Type type Text + ## ALIAS Count + Computes the number of characters in the text. + + ! What is a Character? + A character is defined as an Extended Grapheme Cluster, see Unicode + Standard Annex 29. This is the smallest unit that still has semantic + meaning in most text-processing applications. + + > Example + Getting the length of the string "건반(Korean)". + + "건반(Korean)".length + length : Integer + length self = @Builtin_Method "Text.length" ## ALIAS Concatenate Concatenates the text that to the right side of this. @@ -117,3 +132,13 @@ type Text Conversion to Text that overrides the default `to_text` behavior. to_text : Text to_text self = self + + ## ADVANCED + Checks where this Text is in FCD normalized form. + + > Example + Check if the string is normalized + + "14.95€".is_normalized + is_normalized : Boolean + is_normalized self = @Builtin_Method "Text.is_normalized" diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso index 51cbe252a0c..8db25d7c478 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso @@ -1,5 +1,6 @@ import project.Any.Any import project.Data.Filter_Condition.Filter_Condition +import project.Data.Json.JS_Object import project.Data.Map.Map import project.Data.Numbers.Integer import project.Data.Range.Range @@ -61,6 +62,17 @@ type Regex (See https://github.com/oracle/graal/blob/master/regex/docs/README.md) Value (case_insensitive : Boolean) (internal_regex_object : Any) + ## PRIVATE + Converts this value to a JSON serializable object. + to_js_object : JS_Object + to_js_object self = + JS_Object.from_pairs [["type", "Regex"], ["case_insensitive", self.case_insensitive], ["pattern", self.internal_regex_object.pattern]] + + ## PRIVATE + Provides a human-readable representation of the `Regex`. + to_display_text : Text + to_display_text self = "Regex /" + self.internal_regex_object.pattern + "/" + (if self.case_insensitive then "i" else "") + ## Returns `True` if the input matches against the pattern described by `self`, otherwise `False`. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Match.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Match.enso index c0aaab958fa..f5f34b35873 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Match.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Match.enso @@ -1,4 +1,5 @@ import project.Any.Any +import project.Data.Json.JS_Object import project.Data.Map.Map import project.Data.Numbers.Integer import project.Data.Range.Range @@ -22,6 +23,17 @@ type Match (See https://github.com/oracle/graal/blob/master/regex/docs/README.md) Value (pattern : Regex) (internal_regex_result : Any) (input : Text) + ## PRIVATE + Converts this value to a JSON serializable object. + to_js_object : JS_Object + to_js_object self = + JS_Object.from_pairs [["type", "Match"], ["text", self.text]] + + ## PRIVATE + Provides a human-readable representation of the `Match`. + to_display_text : Text + to_display_text self = "Match {" + self.tet + "}" + ## PRIVATE Returns the start UTF16 character index of a group. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso index c16e34e7983..9f9b7f093ae 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso @@ -359,6 +359,19 @@ type File exists : Boolean exists self = @Builtin_Method "File.exists" + ## Checks whether the file is within another path. + + Arguments: + - parent: The path to check if self is contained within. + + > Example + Check if a file is within another path. + + is_within = File.new "foo/bar" . starts_with (File.new "foo") + is_not_within = File.new "foo/bar" . starts_with (File.new "f") + starts_with : File -> Boolean + starts_with self parent = @Builtin_Method "File.starts_with" + ## Gets the creation time of a file. > Example @@ -668,13 +681,9 @@ type File the name `"abc"` or any name starting with `"x"`. The groups cannot be nested. - Keep in mind that if `recursive` is set to True and a `name_filter` is - used, the function will return files from subdirectories only if the set - `name_filter` allows crossing directories. So even with `recursive=True` - a filter `"*.txt"` will only return files that are immediate children of - the listed directory, to list files recursively you need to use a filter - like `"**.txt"` or `"*/*"` (which will match only files that are exactly - one directory down from the listed directory) or no filter at all. + If `recursive` is set to True and a `name_filter` does not contain `**`, + it will be automatically prefixed with `**/` to allow matching files in + subdirectories. > Example List all files with ".md" or ".txt" extension in the example directory @@ -685,19 +694,20 @@ type File example_list_md_files = Examples.data_dir.list name_filter="**.{txt,md}" recursive=True list : Text -> Boolean -> Vector File - list self name_filter=Nothing recursive=False = + list self name_filter:Text="" recursive:Boolean=False = all_files = case recursive of True -> list_descendants self False -> self.list_immediate_children - filtered = case name_filter of - Nothing -> all_files + case name_filter of + "" -> all_files _ -> + used_filter = if recursive.not || name_filter.contains "**" then name_filter else + (if name_filter.starts_with "*" then "*" else "**/") + name_filter fs = FileSystems.getDefault - matcher = fs.getPathMatcher "glob:"+name_filter + matcher = fs.getPathMatcher "glob:"+used_filter all_files.filter file-> pathStr = self.relativize file . path matcher.matches (Path.of pathStr) - filtered ## Checks if `self` is a child path of `other`. is_child_of : File -> Boolean diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index a7684a79ea6..9dd1d1cf6f8 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -1084,7 +1084,7 @@ type Table the same name. So `table.join other on=["A", "B"]` is a shorthand for: table.join other on=[Join_Condition.Equals "A" "A", Join_Condition.Equals "B" "B"] @on Widget_Helpers.make_join_condition_selector - join : Table -> Join_Kind -> Join_Condition | Text | Vector (Join_Condition | Text) -> Text -> Text -> Problem_Behavior -> Table + join : Table -> Join_Kind -> Join_Condition | Text | Vector (Join_Condition | Text) -> Text -> Problem_Behavior -> Table join self right (join_kind : Join_Kind = Join_Kind.Left_Outer) (on : Join_Condition | Text | Vector (Join_Condition | Text) = (default_join_condition self join_kind)) (right_prefix:Text="Right ") (on_problems:Problem_Behavior=Report_Warning) = self.join_or_cross_join right join_kind on right_prefix on_problems diff --git a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/number/bigInteger/BitShiftNode.java b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/number/bigInteger/BitShiftNode.java index 6a8379f0a85..8b678e47ba5 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/number/bigInteger/BitShiftNode.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/number/bigInteger/BitShiftNode.java @@ -16,11 +16,7 @@ import org.enso.interpreter.runtime.error.PanicException; import org.enso.interpreter.runtime.number.EnsoBigInteger; @ImportStatic(BigIntegerOps.class) -@BuiltinMethod( - type = "Big_Integer", - name = "bit_shift", - description = "Bitwise shift.", - aliases = "bit_shift_l") +@BuiltinMethod(type = "Big_Integer", name = "bit_shift", description = "Bitwise shift.") public abstract class BitShiftNode extends Node { private @Child ToEnsoNumberNode toEnsoNumberNode = ToEnsoNumberNode.build(); private final CountingConditionProfile fitsInIntProfileLeftShift = diff --git a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/number/smallInteger/BitShiftNode.java b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/number/smallInteger/BitShiftNode.java index 63da858da7e..cd98b7d9c71 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/number/smallInteger/BitShiftNode.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/number/smallInteger/BitShiftNode.java @@ -16,11 +16,7 @@ import org.enso.interpreter.runtime.error.PanicException; import org.enso.interpreter.runtime.number.EnsoBigInteger; @ImportStatic(BigIntegerOps.class) -@BuiltinMethod( - type = "Small_Integer", - name = "bit_shift", - description = "Bitwise shift.", - aliases = "bit_shift_l") +@BuiltinMethod(type = "Small_Integer", name = "bit_shift", description = "Bitwise shift.") public abstract class BitShiftNode extends Node { private @Child ToEnsoNumberNode toEnsoNumberNode = ToEnsoNumberNode.build(); private final CountingConditionProfile canShiftLeftInLongProfile = diff --git a/test/Tests/src/Data/Text_Spec.enso b/test/Tests/src/Data/Text_Spec.enso index 15baaeaee16..6cb577e887c 100644 --- a/test/Tests/src/Data/Text_Spec.enso +++ b/test/Tests/src/Data/Text_Spec.enso @@ -77,6 +77,12 @@ spec = kshi.length . should_equal 1 facepalm.length . should_equal 1 + Test.specify "should be able to tell if Text is normalized" <| + 'a'.is_normalized . should_be_true + "14.95€".is_normalized . should_be_true + 'é'.is_normalized . should_be_false + 'e\u{301}'.is_normalized . should_be_true + Test.specify "should compare strings using utf normalization" <| "abc"=="def" . should_be_false 'a'=='b' . should_be_false diff --git a/test/Tests/src/Data/Time/Date_Time_Spec.enso b/test/Tests/src/Data/Time/Date_Time_Spec.enso index 15777e4e183..929c5c8d671 100644 --- a/test/Tests/src/Data/Time/Date_Time_Spec.enso +++ b/test/Tests/src/Data/Time/Date_Time_Spec.enso @@ -693,8 +693,9 @@ spec_with name create_new_datetime parse_datetime nanoseconds_loss_in_precision= create_new_datetime 2022 10 30 1 30 . add_work_days 1 . should_equal (Date_Time.new 2022 11 1 1 30) create_new_datetime 2022 10 30 3 30 . add_work_days 1 . should_equal (Date_Time.new 2022 11 1 3 30) - create_new_datetime 2022 3 27 1 30 . add_work_days 0 . should_equal (Date_Time.new 2022 3 28 1 30) - create_new_datetime 2022 3 27 3 30 . add_work_days 1 . should_equal (Date_Time.new 2022 3 29 3 30) + tz = Time_Zone.parse "Europe/Warsaw" + create_new_datetime 2022 3 27 1 30 zone=tz . add_work_days 0 . should_equal (Date_Time.new 2022 3 28 1 30 zone=tz) + create_new_datetime 2022 3 27 3 30 zone=tz . add_work_days 1 . should_equal (Date_Time.new 2022 3 29 3 30 zone=tz) Test.specify "should handle shifting dates around autumn DST edge cases" pending=dst_overlap_message <| d3 = create_new_datetime 2022 10 30 2 30 15 0 tz diff --git a/test/Tests/src/Semantic/Meta_Spec.enso b/test/Tests/src/Semantic/Meta_Spec.enso index 2853f7eb2e2..571530b149a 100644 --- a/test/Tests/src/Semantic/Meta_Spec.enso +++ b/test/Tests/src/Semantic/Meta_Spec.enso @@ -255,10 +255,10 @@ spec = methods.sort . should_equal ['Value', 'create', 'factory', 'first_method', 'my_method', 'other_method', 'second_method'] Test.specify "methods of Integer" <| - Meta.meta Integer . methods . sort . should_equal ['round', 'truncate'] + Meta.meta Integer . methods . sort . should_equal ['bit_shift_l', 'round', 'truncate'] Test.specify "static methods of Integer" <| - Meta.meta (Meta.type_of Integer) . methods . sort . should_equal ['parse', 'parse_builtin', 'round', 'truncate'] + Meta.meta (Meta.type_of Integer) . methods . sort . should_equal ['bit_shift_l', 'parse', 'parse_builtin', 'round', 'truncate'] Test.specify "methods of Any" <| Meta.meta Any . methods . should_contain "to_text" diff --git a/test/Tests/src/System/File_Spec.enso b/test/Tests/src/System/File_Spec.enso index 7634fd73ad0..944dd9d4e4c 100644 --- a/test/Tests/src/System/File_Spec.enso +++ b/test/Tests/src/System/File_Spec.enso @@ -67,6 +67,10 @@ spec = (File.new "foo").should_equal (File.new "foo") (File.new "bar").should_not_equal (File.new "foo") + Test.specify "should allow checking in parent path with `starts_with`" <| + (File.new "foo/bar").starts_with (File.new "foo") . should_be_true + (File.new "foo/bar").starts_with (File.new "f") . should_be_false + Test.specify "should allow creating a directory" <| f = enso_project.data / "good_dir" f.delete_if_exists @@ -719,7 +723,16 @@ spec = filtered1 = root.list name_filter="**.txt" recursive=True . map .to_text filtered1.sort.should_equal (resolve ["sample.txt", "subdirectory/a.txt", "subdirectory/nested/b.txt"]) + filtered1b = root.list name_filter="*.txt" recursive=True . map .to_text + filtered1b.sort.should_equal (resolve ["sample.txt", "subdirectory/a.txt", "subdirectory/nested/b.txt"]) + filtered2 = root.list name_filter="*/*/*" recursive=True . map .to_text filtered2.should_equal (resolve ["subdirectory/nested/b.txt"]) + filtered3 = root.list name_filter="a.txt" recursive=True . map .to_text + filtered3.sort.should_equal (resolve ["subdirectory/a.txt"]) + + filtered4 = root.list name_filter="nested/*.txt" recursive=True . map .to_text + filtered4.sort.should_equal (resolve ["subdirectory/nested/b.txt"]) + main = Test_Suite.run_main spec