Data analysts should be able to use Text.location_of to find indexes within string using various matchers (#3324)

Implements https://www.pivotaltracker.com/n/projects/2539304/stories/181266029
2024-12-23 15:21:48 +03:00 · 2022-03-12 20:42:00 +01:00 · 2022-03-12 20:42:00 +01:00 · 247b284316
commit 247b284316
parent 3ef18ab5b8
21 changed files with 1237 additions and 110 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -63,6 +63,7 @@
 - [Implemented `Bool.compare_to` method][3317]
 - [Implemented `Map.first`, `Map.last` functions. Expanded `Table.group_by` to
  also compute mode, percentile, minimum, maximum.][3318]
 - [Implemented `Text.location_of` and `Text.location_of_all` methods.][3324]
 [debug-shortcuts]:
  https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -100,7 +101,8 @@
 [3236]: https://github.com/enso-org/enso/pull/3236
 [3311]: https://github.com/enso-org/enso/pull/3311
 [3317]: https://github.com/enso-org/enso/pull/3317
-[3317]: https://github.com/enso-org/enso/pull/3318
+[3318]: https://github.com/enso-org/enso/pull/3318
 [3324]: https://github.com/enso-org/enso/pull/3324
 #### Enso Compiler
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso
@ -5,9 +5,11 @@ from Standard.Builtins import Text, Prim_Text_Helpers
 import Standard.Base.Data.Text.Regex
 import Standard.Base.Data.Text.Regex.Mode
 import Standard.Base.Data.Text.Matching_Mode
 import Standard.Base.Data.Text.Case
 import Standard.Base.Data.Text.Location
 import Standard.Base.Data.Text.Line_Ending_Style
 from Standard.Base.Data.Text.Span as Span_Module import Span
 import Standard.Base.Data.Text.Split_Kind
 import Standard.Base.Data.Text.Text_Sub_Range
 import Standard.Base.Data.Locale
@ -15,6 +17,7 @@ import Standard.Base.Meta
 from Standard.Builtins export Text
 export Standard.Base.Data.Text.Matching_Mode
 export Standard.Base.Data.Text.Case
 export Standard.Base.Data.Text.Location
 export Standard.Base.Data.Text.Split_Kind
@ -546,7 +549,7 @@ Text.== that = if Meta.is_same_object this Text then Meta.is_same_object that Te
         (('É' . equals_ignore_case 'é') && ('é' . equals_ignore_case 'e\u0301')) == True
 Text.equals_ignore_case : Text -> Locale -> Boolean
 Text.equals_ignore_case that locale=Locale.default =
-    (this.to_case_insensitive_key locale) == (that.to_case_insensitive_key locale)
+    Text_Utils.equals_ignore_case this that locale.java_locale
 ## ADVANCED
   PRIVATE
@ -555,7 +558,7 @@ Text.equals_ignore_case that locale=Locale.default =
   used to perform case-insensitive comparisons.
 Text.to_case_insensitive_key : Locale -> Text
 Text.to_case_insensitive_key locale=Locale.default =
-    this.to_case Case.Lower locale . to_case Case.Upper locale
+    Text_Utils.case_insensitive_key this locale.java_locale
 ## Compare two texts to discover their ordering.
@ -895,7 +898,7 @@ Text.contains term="" matcher=Text_Matcher.new = case matcher of
    Text_Matcher case_sensitivity -> case case_sensitivity of
        True -> Text_Utils.contains this term
        Case_Insensitive locale ->
-            Text_Utils.contains (this.to_case_insensitive_key locale) (term.to_case_insensitive_key locale)
+            Text_Utils.contains_case_insensitive this term locale.java_locale
    Regex_Matcher _ _ _ _ _ ->
        compiled_pattern = matcher.compile term
        match = compiled_pattern.match this Mode.First
@ -952,27 +955,6 @@ Text.repeat count=1 =
       https://www.pivotaltracker.com/story/show/181435598
    0.up_to (count.max 0) . fold "" acc-> _-> acc + this
 ## PRIVATE
   Utility function taking a range pointing at grapheme clusters and converting to a range on the underlying code points
 range_to_char_indices : Text -> Range -> Range ! Index_Out_Of_Bounds_Error
 range_to_char_indices text range =
    len = text.length
    start = if range.start < 0 then range.start + len else range.start
    end = if range.end == Nothing then len else (if range.end < 0 then range.end + len else range.end)
    is_valid = (Range 0 len+1).contains
    case (Pair (is_valid start) (is_valid end)) of
        Pair False _ -> Error.throw (Index_Out_Of_Bounds_Error range.start len)
        Pair True False -> Error.throw (Index_Out_Of_Bounds_Error range.end len)
        Pair True True ->
            if start>=end then (Range 0 0) else
                iterator = BreakIterator.getCharacterInstance
                iterator.setText text
                start_index = iterator.next start
                end_index = iterator.next (end - start)
                Range start_index end_index
 ## ALIAS first, last, left, right, mid, substring
   Creates a new Text by selecting the specified range of the input.
@ -1009,7 +991,7 @@ range_to_char_indices text range =
 Text.take : (Text_Sub_Range | Range) -> Text ! Index_Out_Of_Bounds_Error
 Text.take range =
    char_range = case range of
-        Range _ _ -> here.range_to_char_indices this range
+        Range _ _ -> Span_Module.range_to_char_indices this range
        _ -> range.to_char_range this
    Text_Utils.substring this char_range.start char_range.end
@ -1049,7 +1031,7 @@ Text.take range =
 Text.drop : (Text_Sub_Range | Range) -> Text ! Index_Out_Of_Bounds_Error
 Text.drop range =
    char_range = case range of
-        Range _ _ -> here.range_to_char_indices this range
+        Range _ _ -> Span_Module.range_to_char_indices this range
        _ -> range.to_char_range this
    if char_range.start == 0 then Text_Utils.drop_first this char_range.end else
        prefix = Text_Utils.substring this 0 char_range.start
@ -1184,3 +1166,204 @@ Text.trim where=Location.Both what=_.is_whitespace =
            loop current break_iterator.previous
    if start_index >= end_index then "" else
        Text_Utils.substring this start_index end_index
 ## ALIAS find, index_of, position_of, span_of
   Find the location of the `term` in the input.
   Returns a Span representing the location at which the term was found, or
   `Nothing` if the term was not found in the input.
   Arguments:
   - term: The term to find.
   - mode: Specifies if the first or last occurrence of the term should be
     returned if there are multiple occurrences within the input. The first
     occurrence is returned by default.
   - matcher: Specifies how the term is matched against the input:
     - If a `Text_Matcher`, the text is compared using case-sensitively rules
       specified in the matcher.
     - If a `Regex_Matcher`, the `term` is used as a regular expression and
       matched using the associated options.
   ! What is a Character?
     A character is defined as an Extended Grapheme Cluster, see Unicode
     Standard Annex 29. This is the smallest unit that still has semantic
     meaning in most text-processing applications.
   > Example
     Finding location of a substring.
         "Hello World!".location_of "J" == Nothing
         "Hello World!".location_of "o" == Span (Range 4 5) "Hello World!"
         "Hello World!".location_of "o" mode=Matching_Mode.Last == Span (Range 7 8) "Hello World!"
   ! Match Length
     The  function returns not only the index of the match but a `Span` instance
     which contains both the start and end indices, allowing to determine the
     length of the match. This is useful not only with regex matches (where a
     regular expression can have matches of various lengths) but also for case
     insensitive matching. In case insensitive mode, a single character can
     match multiple characters, for example `ß` will match `ss` and `SS`, and
     the ligature `ﬃ` will match `ffi` or `f` etc. Thus in case insensitive
     mode, the length of the match can be shorter or longer than the term that
     was being matched, so it is extremely important to not rely on the length
     of the matched term when analysing the matches as they may have different
     lengths.
   > Example
     Match length differences in case insensitive matching.
         term = "straße"
         text = "MONUMENTENSTRASSE 42"
         match = text . location_of term matcher=(Text_Matcher Case_Insensitive.new)
         term.length == 6
         match.length == 7
   ! Matching Grapheme Clusters
     In case insensitive mode, a single character can match multiple characters,
     for example `ß` will match `ss` and `SS`, and the ligature `ﬃ` will match
     `ffi` or `f` etc. Thus in this mode, it is sometimes possible for a term to
     match only a part of some single grapheme cluster, for example in the text
     `ﬃa` the term `ia` will match just one-third of the first grapheme `ﬃ`.
     Since we do not have the resolution to distinguish such partial matches
     (as that would require non-integer indices), so a match which matched just
     a part of some grapheme cluster is extended and treated as if it matched
     the whole grapheme cluster.
   > Example
     Extending matches to full grapheme clusters.
         ligatures = "ﬃﬄ"
         ligatures.length == 2
         term_1 = "IFF"
         match_1 = ligatures . location_of term_1 matcher=(Text_Matcher Case_Insensitive.new)
         term_1.length == 3
         match_1.length == 2
         term_2 = "ffiffl"
         match_2 = ligatures . location_of term_2 matcher=(Text_Matcher Case_Insensitive.new)
         term_2.length == 6
         match_2.length == 2
         # After being extended to full grapheme clusters, both terms "IFF" and "ffiffl" match the same span of grapheme clusters.
         match_1 == match_2
 Text.location_of : Text -> (Matching_Mode.First | Matching_Mode.Last) -> Matcher -> Span | Nothing
 Text.location_of term="" mode=Matching_Mode.First matcher=Text_Matcher.new = case matcher of
    Text_Matcher case_sensitive -> case case_sensitive of
        True ->
            codepoint_span = case mode of
                Matching_Mode.First -> Text_Utils.span_of this term
                Matching_Mode.Last -> Text_Utils.last_span_of this term
            if codepoint_span.is_nothing then Nothing else
                start = Text_Utils.utf16_index_to_grapheme_index this codepoint_span.start
                ## While the codepoint_span may have different code unit length
                   from our term, the `length` counted in grapheme clusters is
                   guaranteed to be the same.
                end = start + term.length
                Span (Range start end) this
        Case_Insensitive locale -> case term.is_empty of
            True -> case mode of
                Matching_Mode.First -> Span (Range 0 0) this
                Matching_Mode.Last ->
                    end = this.length
                    Span (Range end end) this
            False ->
                search_for_last = case mode of
                    Matching_Mode.First -> False
                    Matching_Mode.Last -> True
                case Text_Utils.span_of_case_insensitive this term locale.java_locale search_for_last of
                    Nothing -> Nothing
                    grapheme_span ->
                        Span (Range grapheme_span.start grapheme_span.end) this
    Regex_Matcher _ _ _ _ _ -> case mode of
        Matching_Mode.First ->
            case matcher.compile term . match this Mode.First of
                Nothing -> Nothing
                match -> match.span 0 . to_grapheme_span
        Matching_Mode.Last ->
            case matcher.compile term . match this Mode.All of
                Nothing -> Nothing
                matches -> matches.last.span 0 . to_grapheme_span
 ## ALIAS find_all, index_of_all, position_of_all, span_of_all
   Finds all the locations of the `term` in the input.
   If not found, the function returns an empty Vector.
   Arguments:
   - term: The term to find.
   - matcher: Specifies how the term is matched against the input:
     - If a `Text_Matcher`, the text is compared using case-sensitively rules
       specified in the matcher.
     - If a `Regex_Matcher`, the `term` is used as a regular expression and
       matched using the associated options.
   ! What is a Character?
     A character is defined as an Extended Grapheme Cluster, see Unicode
     Standard Annex 29. This is the smallest unit that still has semantic
     meaning in most text-processing applications.
   > Example
     Finding locations of all occurrences of a substring.
         "Hello World!".location_of_all "J" == []
         "Hello World!".location_of_all "o" . map .start == [4, 7]
   ! Match Length
     The  function returns not only the index of the match but a `Span` instance
     which contains both the start and end indices, allowing to determine the
     length of the match. This is useful not only with regex matches (where a
     regular expression can have matches of various lengths) but also for case
     insensitive matching. In case insensitive mode, a single character can
     match multiple characters, for example `ß` will match `ss` and `SS`, and
     the ligature `ﬃ` will match `ffi` or `f` etc. Thus in case insensitive
     mode, the length of the match can be shorter or longer than the term that
     was being matched, so it is extremely important to not rely on the length
     of the matched term when analysing the matches as they may have different
     lengths.
   > Example
     Match length differences in case insensitive matching.
         term = "strasse"
         text = "MONUMENTENSTRASSE ist eine große Straße."
         match = text . location_of_all term matcher=(Text_Matcher Case_Insensitive.new)
         term.length == 7
         match . map .length == [7, 6]
   ! Matching Grapheme Clusters
     In case insensitive mode, a single character can match multiple characters,
     for example `ß` will match `ss` and `SS`, and the ligature `ﬃ` will match
     `ffi` or `f` etc. Thus in this mode, it is sometimes possible for a term to
     match only a part of some single grapheme cluster, for example in the text
     `ﬃa` the term `ia` will match just one-third of the first grapheme `ﬃ`.
     Since we do not have the resolution to distinguish such partial matches
     (as that would require non-integer indices), so a match which matched just
     a part of some grapheme cluster is extended and treated as if it matched
     the whole grapheme cluster.
   > Example
     Extending matches to full grapheme clusters.
         ligatures = "ﬃﬄFFIFF"
         ligatures.length == 7
         match_1 = ligatures . location_of_all "IFF" matcher=(Text_Matcher Case_Insensitive.new)
         match_1 . map .length == [2, 3]
         match_2 = ligatures . location_of_all "ffiff" matcher=(Text_Matcher Case_Insensitive.new)
         match_2 . map .length == [2, 5]
 Text.location_of_all : Text -> Matcher -> [Span]
 Text.location_of_all term="" matcher=Text_Matcher.new = case matcher of
    Text_Matcher case_sensitive -> if term.is_empty then Vector.new (this.length + 1) (ix -> Span (Range ix ix) this) else case case_sensitive of
        True ->
            codepoint_spans = Vector.from_array <| Text_Utils.span_of_all this term
            grahpeme_ixes = Vector.from_array <| Text_Utils.utf16_indices_to_grapheme_indices this (codepoint_spans.map .start).to_array
            ## While the codepoint_spans may have different code unit lengths
               from our term, the `length` counted in grapheme clusters is
               guaranteed to be the same.
            offset = term.length
            grahpeme_ixes . map start->
                end = start+offset
                Span (Range start end) this
        Case_Insensitive locale ->
            grapheme_spans = Vector.from_array <| Text_Utils.span_of_all_case_insensitive this term locale.java_locale
            grapheme_spans.map grapheme_span->
                Span (Range grapheme_span.start grapheme_span.end) this
    Regex_Matcher _ _ _ _ _ ->
        case matcher.compile term . match this Mode.All of
            Nothing -> []
            matches -> matches.map m-> m.span 0 . to_grapheme_span
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Matching_Mode.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Matching_Mode.enso
@ -0,0 +1,5 @@
 ## Matches the first found instance.
 type First
 ## Matches the last found instance.
 type Last
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Engine/Default.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Engine/Default.enso
@ -40,7 +40,7 @@ import Standard.Base.Data.Text.Regex.Engine
 import Standard.Base.Data.Text.Regex.Option as Global_Option
 import Standard.Base.Data.Text.Regex.Mode
 import Standard.Base.Polyglot.Java as Java_Ext
-import Standard.Base.Data.Text.Span
+from Standard.Base.Data.Text.Span as Span_Module import Utf_16_Span
 from Standard.Builtins import Java
@ -183,8 +183,13 @@ type Pattern
         on the encoding, we normalize all input.
    build_matcher : Text -> Integer -> Integer -> Java_Matcher
    build_matcher input start end =
-        normalized_input = if this.options.contains Global_Option.Ascii_Matching then input else
+        ## TODO [RW] Normalization had to be disabled - since start and end are
-            Text_Utils.normalize input
+           in code unit space, normalization could shift these indices!
           This should be addressed when reviewing
           See: https://www.pivotaltracker.com/story/show/181524498
        #normalized_input = if this.options.contains Global_Option.Ascii_Matching then input else
        #    Text_Utils.normalize input
        normalized_input = input
        internal_matcher = this.internal_pattern.matcher normalized_input . region start end
        if this.options.contains No_Anchoring_Bounds then
@ -262,7 +267,7 @@ type Pattern
                internal_matcher = this.build_matcher input start end
                if internal_matcher . find start . not then Nothing else
-                    Match internal_matcher start end
+                    Match internal_matcher start end input
            Integer ->
                if mode < 0 then Panic.throw <|
                    Mode_Error "Cannot match a negative number of times."
@ -272,13 +277,16 @@ type Pattern
                go : Integer -> Integer -> Nothing
                go offset remaining_count =
                    should_continue = remaining_count > 0
-                    if should_continue.not || (offset > end) then Nothing else
+                    if should_continue.not || (offset >= end) then Nothing else
                        internal_matcher = this.build_matcher input start end
                        found = internal_matcher.find offset
                        if found.not then Nothing else
-                            builder.append (Match internal_matcher start end)
+                            builder.append (Match internal_matcher start end input)
-                            @Tail_Call go (internal_matcher.end 0) remaining_count-1
+                            match_end = internal_matcher.end 0
                            # Ensure progress even if the match is an empty string.
                            new_offset = if match_end > offset then match_end else offset+1
                            @Tail_Call go new_offset remaining_count-1
                go start mode
                vector = builder.to_vector
@ -294,8 +302,11 @@ type Pattern
                        found = internal_matcher.find offset
                        if found.not then Nothing else
-                            builder.append (Match internal_matcher start end)
+                            builder.append (Match internal_matcher start end input)
-                            @Tail_Call go (internal_matcher.end 0)
+                            match_end = internal_matcher.end 0
                            # Ensure progress even if the match is an empty string.
                            new_offset = if match_end > offset then match_end else offset+1
                            @Tail_Call go new_offset
                go start
                vector = builder.to_vector
@ -304,7 +315,7 @@ type Pattern
            Mode.Full ->
                internal_matcher = this.build_matcher input start end
                if internal_matcher.matches.not then Nothing else
-                    Match internal_matcher start end
+                    Match internal_matcher start end input
            Mode.Bounded _ _ _ -> Panic.throw <|
                Mode_Error "Modes cannot be recursive."
@ -312,7 +323,7 @@ type Pattern
            Mode.Bounded start end sub_mode ->
                if start < end then do_match_mode sub_mode start end else
                    Panic.throw Invalid_Bounds_Error
-            _ -> do_match_mode mode 0 input.length
+            _ -> do_match_mode mode 0 (Text_Utils.char_length input)
    ## ADVANCED
@ -334,7 +345,7 @@ type Pattern
                 pattern.matches input
    matches : Text -> Boolean
    matches input = case this.match input mode=Mode.Full of
-        Match _ _ _ -> True
+        Match _ _ _ _ -> True
        Vector.Vector _ -> True
        _ -> False
@ -405,7 +416,7 @@ type Pattern
    find input mode=Mode.All =
        matches = this.match input mode
        case matches of
-            Match _ _ _ -> matches.group 0
+            Match _ _ _ _ -> matches.group 0
            Vector.Vector _ -> matches.map (_.group 0)
            _ -> matches
@ -548,7 +559,7 @@ type Pattern
                internal_matcher.replaceAll replacement
            Mode.Full ->
                case this.match input mode=Mode.Full of
-                    Match _ _ _ -> replacement
+                    Match _ _ _ _ -> replacement
                    Nothing -> input
            Mode.Bounded _ _ _ -> Panic.throw <|
                Mode_Error "Modes cannot be recursive."
@ -556,7 +567,7 @@ type Pattern
        case mode of
            Mode.Bounded _ _ _ -> Panic.throw <|
                Mode_Error "Bounded replacements are not well-formed."
-            _ -> do_replace_mode mode 0 input.length
+            _ -> do_replace_mode mode 0 (Text_Utils.char_length input)
 ## The default implementation of the `Data.Text.Regex.Engine.Match` interface.
 type Match
@ -570,7 +581,8 @@ type Match
         match.
       - region_start: The start of the region over which the match was made.
       - region_end: The end of the region over which the match was made.
-    type Match (internal_match : Java_Matcher) (region_start : Integer) (region_end : Integer)
+       - input: The input text that was being matched.
    type Match (internal_match : Java_Matcher) (region_start : Integer) (region_end : Integer) (input : Text)
    ## Gets the text matched by the group with the provided identifier, or
       `Nothing` if the group did not participate in the match. If no such group
@ -743,10 +755,10 @@ type Match
             example_Span =
                 match = Examples.match
                 match.span 0
-    span : Integer | Text -> Span | Nothing ! Regex.No_Such_Group_Error
+    span : Integer | Text -> Utf_16_Span | Nothing ! Regex.No_Such_Group_Error
    span id = case this.group id of
        Nothing -> Nothing
-        _ -> Span.new (this.start id) (this.end id) (this.group 0)
+        _ -> Utf_16_Span (Range (this.start id) (this.end id)) this.input
    ## Returns the start character index of the match's region.
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Mode.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex/Mode.enso
@ -4,11 +4,13 @@
   to matching on the `Full` content of the input text.
 from Standard.Base import all
 from Standard.Base.Data.Text.Matching_Mode import First
 from Standard.Base.Data.Text.Matching_Mode export First
 type Mode
    ## The regex will only match the first instance it finds.
-    type First
+    First
    ## The regex will match up to some `Integer` number of instances.
    Integer
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Span.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Span.enso
@ -7,30 +7,14 @@
         example_span =
             text = "Hello!"
-             Span.new 0 3 text
+             Span 0 3 text
 from Standard.Base import all
-import Standard.Base.Data.Range
+from Standard.Base.Data.Text.Extensions import Index_Out_Of_Bounds_Error
-## Construct a new `Span`.
+polyglot java import org.enso.base.Text_Utils
-
+polyglot java import com.ibm.icu.text.BreakIterator
   Arguments:
   - start: The index of the first character included in the span.
   - end: The index of the first character after `start` that is _not_ included
     in the span.
   - text: The `Text` over which the span exists. This is _optional_.
   > Example
     Creating a span over the first three characters of the text "hello!".
         import Standard.Base.Data.Text.Span
         example_span =
             text = "Hello!"
             Span.new 0 3 text
 new : Integer -> Integer -> Text | Nothing -> Span
 new start end text=Nothing = Span (start.up_to end) text
 type Span
@ -38,7 +22,7 @@ type Span
       Arguments:
       - range: The range of characters over which the span exists.
-       - text: The text over which the span exists. This is _optional_.
+       - text: The text over which the span exists.
       ! What is a Character?
         A character is defined as an Extended Grapheme Cluster, see Unicode
@ -54,7 +38,7 @@ type Span
                 text = "Hello!"
                 range = 0.up_to 3
                 Span.Span range text
-    type Span (range : Range.Range) (text : (Text | Nothing) = Nothing)
+    type Span (range : Range.Range) (text : Text)
    ## The index of the first character included in the span.
@ -74,3 +58,112 @@ type Span
         meaning in most text-processing applications.
    end : Integer
    end = this.range.end
    ## The length of the span in extended grapheme clusters.
       ! What is a Character?
         A character is defined as an Extended Grapheme Cluster, see Unicode
         Standard Annex 29. This is the smallest unit that still has semantic
         meaning in most text-processing applications.
    length : Integer
    length = this.range.length
    ## Converts the span of extended grapheme clusters to a corresponding span
       of UTF-16 code units.
       > Example
         Find the span of code units corresponding to the span of extended grapheme clusters.
         text = 'ae\u{301}fz'
         (Span (Range 1 3) text).to_utf_16_span == (Utf_16_Span (Range 1 4) text)
    to_utf_16_span : Utf_16_Span
    to_utf_16_span =
        Utf_16_Span (here.range_to_char_indices this.text this.range) this.text
 type Utf_16_Span
    ## A representation of a span of UTF-16 code units in Enso's `Text` type.
       Arguments:
       - range: The range of code units over which the span exists.
       - text: The text over which the span exists.
       > Example
         Creating a span over the first three code units of the text 'a\u{301}bc'.
             import Standard.Base.Data.Text.Span
             example_span =
                 text = 'a\u{301}bc'
                 Span.Utf_16_Span (Range 0 3) text
    type Utf_16_Span (range : Range.Range) (text : Text)
    ## The index of the first code unit included in the span.
    start : Integer
    start = this.range.start
    ## The index of the first code unit after `start` that is _not_ included in
       the span.
    end : Integer
    end = this.range.end
    ## The length of the span in UTF-16 code units.
    length : Integer
    length = this.range.length
    ## Returns a span of extended grapheme clusters which is the closest
       approximation of this span of code units.
       The resulting span is extended in such a way that every code unit that
       was contained by the original span is also contained in a new span. Since
       some grapheme clusters consist of multiple code units, after the span was
       extended it may also contain code units which were not contained inside
       of the original span.
       > Example
         Convert a codepoint span to graphemes and back.
         text = 'a\u{301}e\u{302}o\u{303}'
         span = Utf_16_Span (Range 1 5) text # The span contains the units [\u{301}, e, \u{302}, o].
         extended = span.to_grapheme_span
         extended == Span (Range 0 3) text # The span is extended to the whole string since it contained code units from every grapheme cluster.
         extended.to_utf_16_span == Utf_16_Span (Range 0 6) text
    to_grapheme_span : Span
    to_grapheme_span = if (this.start < 0) || (this.end > Text_Utils.char_length this.text) then Error.throw (Illegal_State_Error "Utf_16_Span indices are out of range of the associated text.") else
        if this.end < this.start then Error.throw (Illegal_State_Error "Utf_16_Span invariant violation: start <= end") else
            case this.start == this.end of
                True ->
                    grapheme_ix = Text_Utils.utf16_index_to_grapheme_index this.text this.start
                    Span (Range grapheme_ix grapheme_ix) this.text
                False ->
                    grapheme_ixes = Text_Utils.utf16_indices_to_grapheme_indices this.text [this.start, this.end - 1].to_array
                    grapheme_first = grapheme_ixes.at 0
                    grapheme_last = grapheme_ixes.at 1
                    ## We find the grapheme index of the last code unit actually contained within our span and set the
                       end grapheme to the first grapheme after that. This ensures that if code units associated with
                       only a part of a grapheme were contained in our original span, the resulting span will be
                       extended to contain this whole grapheme.
                    grapheme_end = grapheme_last + 1
                    Span (Range grapheme_first grapheme_end) this.text
 ## PRIVATE
   Utility function taking a range pointing at grapheme clusters and converting
   to a range on the underlying code units.
 range_to_char_indices : Text -> Range -> Range ! Index_Out_Of_Bounds_Error
 range_to_char_indices text range =
    len = text.length
    start = if range.start < 0 then range.start + len else range.start
    end = if range.end == Nothing then len else (if range.end < 0 then range.end + len else range.end)
    is_valid = (Range 0 len+1).contains
    case (Pair (is_valid start) (is_valid end)) of
        Pair False _ -> Error.throw (Index_Out_Of_Bounds_Error range.start len)
        Pair True False -> Error.throw (Index_Out_Of_Bounds_Error range.end len)
        Pair True True ->
            if start>=end then (Range 0 0) else
                iterator = BreakIterator.getCharacterInstance
                iterator.setText text
                start_index = iterator.next start
                end_index = iterator.next (end - start)
                Range start_index end_index
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Text_Sub_Range.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Text_Sub_Range.enso
@ -79,24 +79,24 @@ type Text_Sub_Range
                    Range (if start_index == -1 then 0 else start_index) (Text_Utils.char_length text)
            Before delimiter ->
                if delimiter.is_empty then (Range 0 0) else
-                    index = Text_Utils.index_of text delimiter
+                    span = Text_Utils.span_of text delimiter
-                    if index == -1 then (Range 0 (Text_Utils.char_length text)) else
+                    if span.is_nothing then (Range 0 (Text_Utils.char_length text)) else
-                        (Range 0 index)
+                        (Range 0 span.start)
            Before_Last delimiter ->
                if delimiter.is_empty then (Range 0 (Text_Utils.char_length text)) else
-                    index = Text_Utils.last_index_of text delimiter
+                    span = Text_Utils.last_span_of text delimiter
-                    if index == -1 then (Range 0 (Text_Utils.char_length text)) else
+                    if span.is_nothing then (Range 0 (Text_Utils.char_length text)) else
-                        (Range 0 index)
+                        (Range 0 span.start)
            After delimiter ->
                if delimiter.is_empty then (Range 0 (Text_Utils.char_length text)) else
-                    index = Text_Utils.index_of text delimiter
+                    span = Text_Utils.span_of text delimiter
-                    if index == -1 then (Range 0 0) else
+                    if span.is_nothing then (Range 0 0) else
-                        (Range (index + Text_Utils.char_length delimiter) (Text_Utils.char_length text))
+                        (Range span.end (Text_Utils.char_length text))
            After_Last delimiter ->
                if delimiter.is_empty then (Range 0 0) else
-                    index = Text_Utils.last_index_of text delimiter
+                    span = Text_Utils.last_span_of text delimiter
-                    if index == -1 then (Range 0 0) else
+                    if span.is_nothing then (Range 0 0) else
-                        (Range (index + Text_Utils.char_length delimiter) (Text_Utils.char_length text))
+                        (Range span.end (Text_Utils.char_length text))
            While predicate ->
                indices = find_sub_range_end text _-> start-> end->
                    predicate (Text_Utils.substring text start end) . not
--- a/engine/launcher/src/main/resources/application.conf
+++ b/engine/launcher/src/main/resources/application.conf
@ -1,7 +1,7 @@
 akka {
  loggers = ["akka.event.slf4j.Slf4jLogger"]
  logging-filter = "akka.event.slf4j.Slf4jLoggingFilter"
-  version = "2.6.6"
+  version = "2.6.18"
  stdout-loglevel = "ERROR"
 }
--- a/std-bits/base/src/main/java/org/enso/base/Text_Utils.java
+++ b/std-bits/base/src/main/java/org/enso/base/Text_Utils.java
@ -1,11 +1,19 @@
 package org.enso.base;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.BreakIterator;
 import com.ibm.icu.text.CaseMap.Fold;
 import com.ibm.icu.text.Normalizer;
 import com.ibm.icu.text.Normalizer2;
 import com.ibm.icu.text.StringSearch;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;
 import java.util.regex.Pattern;
 import org.enso.base.text.CaseFoldedString;
 import org.enso.base.text.GraphemeSpan;
 import org.enso.base.text.Utf16Span;
 /** Utils for standard library operations on Text. */
 public class Text_Utils {
@ -117,6 +125,23 @@ public class Text_Utils {
    }
  }
  /**
   * Checks whether two strings are equal up to Unicode canonicalization and ignoring case.
   *
   * @param str1 the first string
   * @param str2 the second string
   * @param locale the locale to use for case folding
   * @return the result of comparison
   */
  public static boolean equals_ignore_case(String str1, Object str2, Locale locale) {
    if (str2 instanceof String) {
      Fold fold = CaseFoldedString.caseFoldAlgorithmForLocale(locale);
      return compare_normalized(fold.apply(str1), fold.apply((String) str2)) == 0;
    } else {
      return false;
    }
  }
  /**
   * Converts an array of codepoints into a string.
   *
@ -176,6 +201,36 @@ public class Text_Utils {
    return searcher.first() != StringSearch.DONE;
  }
  /**
   * Checks if {@code substring} is a substring of {@code string}.
   *
   * @param string the containing string.
   * @param substring the contained string.
   * @return whether {@code substring} is a substring of {@code string}.
   */
  public static boolean contains_case_insensitive(String string, String substring, Locale locale) {
    // {@code StringSearch} does not handle empty strings as we would want, so we need these special
    // cases.
    if (substring.isEmpty()) return true;
    if (string.isEmpty()) return false;
    Fold fold = CaseFoldedString.caseFoldAlgorithmForLocale(locale);
    StringSearch searcher = new StringSearch(fold.apply(substring), fold.apply(string));
    return searcher.first() != StringSearch.DONE;
  }
  /**
   * Transforms the provided string into a form which can be used for case insensitive comparisons.
   *
   * @param string the string to transform
   * @param locale the locale to use - needed to distinguish a special case when handling Turkish
   *     'i' characters
   * @return a transformed string that can be used for case insensitive comparisons
   */
  public static String case_insensitive_key(String string, Locale locale) {
    return CaseFoldedString.simpleFold(string, locale);
  }
  /**
   * Replaces all occurrences of {@code oldSequence} within {@code str} with {@code newSequence}.
   *
@ -200,37 +255,215 @@ public class Text_Utils {
  }
  /**
-   * Find the first index of needle in the haystack
+   * Find the first occurrence of needle in the haystack
   *
   * @param haystack the string to search
   * @param needle the substring that is searched for
-   * @return index of the first needle or -1 if not found.
+   * @return a UTF-16 code unit span of the first needle or null if not found.
   */
-  public static long index_of(String haystack, String needle) {
+  public static Utf16Span span_of(String haystack, String needle) {
    if (needle.isEmpty()) return new Utf16Span(0, 0);
    if (haystack.isEmpty()) return null;
    StringSearch search = new StringSearch(needle, haystack);
    int pos = search.first();
-    return pos == StringSearch.DONE ? -1 : pos;
+    if (pos == StringSearch.DONE) return null;
    return new Utf16Span(pos, pos + search.getMatchLength());
  }
  /**
-   * Find the last index of needle in the haystack
+   * Find the last occurrence of needle in the haystack
   *
   * @param haystack the string to search
   * @param needle the substring that is searched for
-   * @return index of the last needle or -1 if not found.
+   * @return a UTF-16 code unit span of the last needle or null if not found.
   */
-  public static long last_index_of(String haystack, String needle) {
+  public static Utf16Span last_span_of(String haystack, String needle) {
    if (needle.isEmpty()) {
      int afterLast = haystack.length();
      return new Utf16Span(afterLast, afterLast);
    }
    if (haystack.isEmpty()) return null;
    StringSearch search = new StringSearch(needle, haystack);
-    int pos = search.first();
+    int pos = search.last();
    if (pos == StringSearch.DONE) return null;
    return new Utf16Span(pos, pos + search.getMatchLength());
  }
  /**
   * Find spans of all occurrences of the needle within the haystack.
   *
   * @param haystack the string to search
   * @param needle the substring that is searched for
   * @return a list of UTF-16 code unit spans at which the needle occurs in the haystack
   */
  public static List<Utf16Span> span_of_all(String haystack, String needle) {
    if (needle.isEmpty())
      throw new IllegalArgumentException(
          "The operation `index_of_all` does not support searching for an empty term.");
    if (haystack.isEmpty()) return List.of();
    StringSearch search = new StringSearch(needle, haystack);
    ArrayList<Utf16Span> occurrences = new ArrayList<>();
    long ix;
    while ((ix = search.next()) != StringSearch.DONE) {
      occurrences.add(new Utf16Span(ix, ix + search.getMatchLength()));
    }
    return occurrences;
  }
  /**
   * Converts a UTF-16 code unit index to index of the grapheme that this code unit belongs to.
   *
   * @param text the text associated with the index
   * @param codeunit_index the UTF-16 index
   * @return an index of an extended grapheme cluster that contains the code unit from the input
   */
  public static long utf16_index_to_grapheme_index(String text, long codeunit_index) {
    BreakIterator breakIterator = BreakIterator.getCharacterInstance();
    breakIterator.setText(text);
    if (codeunit_index < 0 || codeunit_index > text.length()) {
      throw new IndexOutOfBoundsException(
          "Index " + codeunit_index + " is outside of the provided text.");
    }
    int grapheme_end = breakIterator.next();
    long grapheme_index = 0;
    while (grapheme_end <= codeunit_index && grapheme_end != BreakIterator.DONE) {
      grapheme_index++;
      grapheme_end = breakIterator.next();
    }
    return grapheme_index;
  }
  /**
   * Converts a series of UTF-16 code unit indices to indices of graphemes that these code units
   * belong to.
   *
   * <p>For performance, it assumes that the provided indices are sorted in a non-decreasing order
   * (duplicate entries are permitted). Behaviour is unspecified if an unsorted list is provided.
   *
   * <p>The behaviour is unspecified if indices provided on the input are outside of the range [0,
   * text.length()].
   *
   * @param text the text associated with the indices
   * @param codeunit_indices the array of UTF-16 code unit indices, sorted in non-decreasing order
   * @return an array of grapheme indices corresponding to the UTF-16 units from the input
   */
  public static long[] utf16_indices_to_grapheme_indices(String text, List<Long> codeunit_indices) {
    BreakIterator breakIterator = BreakIterator.getCharacterInstance();
    breakIterator.setText(text);
    int grapheme_end = breakIterator.next();
    long grapheme_index = 0;
    long[] result = new long[codeunit_indices.size()];
    int result_ix = 0;
    for (long codeunit_index : codeunit_indices) {
      while (grapheme_end <= codeunit_index && grapheme_end != BreakIterator.DONE) {
        grapheme_index++;
        grapheme_end = breakIterator.next();
      }
      result[result_ix++] = grapheme_index;
    }
    return result;
  }
  /**
   * Find the first or last occurrence of needle in the haystack.
   *
   * @param haystack the string to search
   * @param needle the substring that is searched for
   * @param locale the locale used for case-insensitive comparisons
   * @param searchForLast if set to true, will search for the last occurrence; otherwise searches
   *     for the first one
   * @return an extended-grapheme-cluster span of the first or last needle, or null if none found.
   */
  public static GraphemeSpan span_of_case_insensitive(
      String haystack, String needle, Locale locale, boolean searchForLast) {
    if (needle.isEmpty())
      throw new IllegalArgumentException(
          "The operation `span_of_case_insensitive` does not support searching for an empty term.");
    if (haystack.isEmpty()) return null;
    CaseFoldedString foldedHaystack = CaseFoldedString.fold(haystack, locale);
    String foldedNeedle = CaseFoldedString.simpleFold(needle, locale);
    StringSearch search = new StringSearch(foldedNeedle, foldedHaystack.getFoldedString());
    int pos;
    if (searchForLast) {
      pos = search.last();
    } else {
      pos = search.first();
    }
    if (pos == StringSearch.DONE) {
-      return -1;
+      return null;
    } else {
      return findExtendedSpan(foldedHaystack, pos, search.getMatchLength());
    }
  }
  /**
   * Find all occurrences of needle in the haystack
   *
   * @param haystack the string to search
   * @param needle the substring that is searched for
   * @param locale the locale used for case-insensitive comparisons
   * @return a list of extended-grapheme-cluster spans at which the needle occurs in the haystack
   */
  public static List<GraphemeSpan> span_of_all_case_insensitive(
      String haystack, String needle, Locale locale) {
    if (needle.isEmpty())
      throw new IllegalArgumentException(
          "The operation `span_of_all_case_insensitive` does not support searching for an empty term.");
    if (haystack.isEmpty()) return List.of();
    CaseFoldedString foldedHaystack = CaseFoldedString.fold(haystack, locale);
    String foldedNeedle = CaseFoldedString.simpleFold(needle, locale);
    StringSearch search = new StringSearch(foldedNeedle, foldedHaystack.getFoldedString());
    ArrayList<GraphemeSpan> result = new ArrayList<>();
    int pos;
    while ((pos = search.next()) != StringSearch.DONE) {
      result.add(findExtendedSpan(foldedHaystack, pos, search.getMatchLength()));
    }
-    for (int next = search.next(); next != StringSearch.DONE; next = search.next()) {
+    return result;
-      pos = next;
+  }
    }
-    return pos;
+  /**
   * Finds the grapheme span corresponding to the found match indexed with code units.
   *
   * <p>It extends the found span to ensure that graphemes associated with all found code units are
   * included in the resulting span. Thus, some additional code units which were not present in the
   * original match may also be present due to the extension.
   *
   * <p>The extension to the left is trivial - we just find the grapheme associated with the first
   * code unit and even if that code unit is not the first one of that grapheme, by returning it we
   * correctly extend to the left. The extension to the right works by finding the index of the
   * grapheme associated with the last code unit actually present in the span, then the end of the
   * returned span is set to the next grapheme after it. This correctly handles the edge case where
   * only a part of some grapheme was matched.
   *
   * @param string the folded string with which the positions are associated, containing a cache of
   *     position mappings
   * @param position the position of the match (in code units)
   * @param length the length of the match (in code units)
   * @return a minimal {@code GraphemeSpan} which contains all code units from the match
   */
  private static GraphemeSpan findExtendedSpan(CaseFoldedString string, int position, int length) {
    int firstGrapheme = string.codeUnitToGraphemeIndex(position);
    if (length == 0) {
      return new GraphemeSpan(firstGrapheme, firstGrapheme);
    } else {
      int lastGrapheme = string.codeUnitToGraphemeIndex(position + length - 1);
      int endGrapheme = lastGrapheme + 1;
      return new GraphemeSpan(firstGrapheme, endGrapheme);
    }
  }
  /**
--- a/std-bits/base/src/main/java/org/enso/base/text/CaseFoldedString.java
+++ b/std-bits/base/src/main/java/org/enso/base/text/CaseFoldedString.java
@ -0,0 +1,135 @@
 package org.enso.base.text;
 import com.ibm.icu.text.BreakIterator;
 import com.ibm.icu.text.CaseMap;
 import com.ibm.icu.text.CaseMap.Fold;
 import java.util.Locale;
 /**
 * Represents a string transformed using Unicode Case Folding which can be used for case insensitive
 * comparisons.
 *
 * <p>It contains facilities for converting indices in the transformed string to corresponding
 * indices back in the original string.
 */
 public class CaseFoldedString {
  private final String foldedString;
  /**
   * A mapping from code units in the transformed string to their corresponding graphemes in the
   * original string.
   *
   * <p>The mapping must be valid from indices from 0 to @{code foldedString.length()+1}
   * (inclusive).
   */
  private final int[] graphemeIndexMapping;
  /**
   * Constructs a new instance of the folded string.
   *
   * @param foldeString the string after applying the case folding transformation
   * @param graphemeIndexMapping a mapping created during the transformation which maps code units
   *     in the transformed string to their corresponding graphemes in the original string
   */
  private CaseFoldedString(String foldeString, int[] graphemeIndexMapping) {
    this.foldedString = foldeString;
    this.graphemeIndexMapping = graphemeIndexMapping;
  }
  /**
   * Maps a code unit in the folded string to the corresponding grapheme in the original string.
   *
   * @param codeunitIndex the index of the code unit in the folded string, valid indices range from
   *     0 to {@code getFoldedString().length()+1} (inclusive), allowing to also ask for the
   *     position of the end code unit which is located right after the end of the string - which
   *     should always map to the analogous end grapheme.
   * @return the index of the grapheme from the original string that after applying the
   *     transformation contains the requested code unit
   */
  public int codeUnitToGraphemeIndex(int codeunitIndex) {
    if (codeunitIndex < 0 || codeunitIndex > this.foldedString.length()) {
      throw new IndexOutOfBoundsException(codeunitIndex);
    }
    return graphemeIndexMapping[codeunitIndex];
  }
  /** Returns the transformed string. */
  public String getFoldedString() {
    return foldedString;
  }
  /**
   * Folds a string remembering the mapping from code units to its original grapheme cluster
   * indices.
   *
   * @param charSequence a sequence of UTF-16 characters to transform
   * @param locale the locale to use as a reference for case folding; it is needed because Turkish
   *     and Azerbaijani locales handle casing of the letter `i` in a different way than other
   *     locales
   * @return a {@code CaseFoldedString} instance which contains the transformed string and allows to
   *     map its code units to original grapheme clusters
   */
  public static CaseFoldedString fold(CharSequence charSequence, Locale locale) {
    BreakIterator breakIterator = BreakIterator.getCharacterInstance();
    breakIterator.setText(charSequence);
    StringBuilder stringBuilder = new StringBuilder(charSequence.length());
    Fold foldAlgorithm = caseFoldAlgorithmForLocale(locale);
    IntArrayBuilder index_mapping = new IntArrayBuilder(charSequence.length() + 1);
    // We rely on the fact that ICU Case Folding is _not_ context-sensitive, i.e. the mapping of
    // each grapheme cluster is independent of surrounding ones. Regular casing is
    // context-sensitive.
    int current = breakIterator.current();
    int next;
    int grapheme_index = 0;
    while ((next = breakIterator.next()) != BreakIterator.DONE) {
      CharSequence grapheme = new StringSlice(charSequence, current, next);
      String foldedGrapheme = foldAlgorithm.apply(grapheme);
      stringBuilder.append(foldedGrapheme);
      for (int i = 0; i < foldedGrapheme.length(); ++i) {
        index_mapping.add(grapheme_index);
      }
      grapheme_index++;
      current = next;
    }
    // The mapping should also be able to handle a {@code str.length()} query, so we add one more
    // element to the mapping pointing to a non-existent grapheme after the end of the text.
    index_mapping.add(grapheme_index);
    return new CaseFoldedString(
        stringBuilder.toString(), index_mapping.unsafeGetStorageAndInvalidateTheBuilder());
  }
  /**
   * A helper function which folds the string without remembering the index mapping.
   *
   * <p>It should be used when the index mapping is not needed, as its implementation is much more
   * efficient.
   *
   * @param charSequence a sequence of UTF-16 characters to transform
   * @param locale the locale to use as a reference for case folding; it is needed because Turkish
   *     and Azerbaijani locales handle casing of the letter `i` in a different way than the others
   * @return the folded string
   */
  public static String simpleFold(CharSequence string, Locale locale) {
    return caseFoldAlgorithmForLocale(locale).apply(string);
  }
  private static final Locale AZ_LOCALE = new Locale("az");
  private static final Locale TR_LOCALE = new Locale("tr");
  /**
   * Returns a case folding algorithm appropriate for the given locale.
   *
   * <p>The algorithm is locale-dependent because Turkish and Azerbaijani locales handle casing of
   * the letter `i` in a different way than other locales.
   */
  public static Fold caseFoldAlgorithmForLocale(Locale locale) {
    if (locale.equals(AZ_LOCALE) || locale.equals(TR_LOCALE)) {
      return CaseMap.fold().turkic();
    }
    return CaseMap.fold();
  }
 }
--- a/std-bits/base/src/main/java/org/enso/base/text/GraphemeSpan.java
+++ b/std-bits/base/src/main/java/org/enso/base/text/GraphemeSpan.java
@ -0,0 +1,28 @@
 package org.enso.base.text;
 /**
 * Represents a span of characters (understood as extended grapheme clusters) within a Text.
 *
 * <p>The start index indicates the first grapheme of the span and the end index indicates the first
 * grapheme after the end of the span.
 *
 * <p>Represents an empty span if start and end indices are equal. Such an empty span refers to the
 * space just before the grapheme corresponding to index start.
 */
 public class GraphemeSpan {
  public final long start, end;
  /**
   * Constructs a span of characters (understood as extended grapheme clusters).
   *
   * @param start index of the first extended grapheme cluster contained within the span (or
   *     location of the span if it is empty)
   * @param end index of the first extended grapheme cluster after start that is not contained
   *     within the span
   */
  public GraphemeSpan(long start, long end) {
    this.start = start;
    this.end = end;
  }
 }
--- a/std-bits/base/src/main/java/org/enso/base/text/IntArrayBuilder.java
+++ b/std-bits/base/src/main/java/org/enso/base/text/IntArrayBuilder.java
@ -0,0 +1,65 @@
 package org.enso.base.text;
 /** A helper to efficiently build an array of unboxed integers of arbitrary length. */
 public class IntArrayBuilder {
  private int[] storage;
  private int length;
  /**
   * Constructs an empty builder with a given initial capacity.
   *
   * @param initialCapacity the initial capacity of the builder, can be used to avoid expanding the
   *     storage if the amount of elements can be estimated in advance.
   */
  public IntArrayBuilder(int initialCapacity) {
    length = 0;
    storage = new int[initialCapacity];
  }
  /** Adds a new element to the array, expanding it if necessary. */
  public void add(int x) {
    if (length >= storage.length) {
      grow();
    }
    storage[length++] = x;
  }
  /**
   * Expands the storage to fit more elements.
   *
   * <p>The storage grows by 50% and is always increased by at least one. The 50% growth is chosen
   * so that the amortized cost of adding a new element to the array stays constant.
   */
  private void grow() {
    int newCapacity = storage.length + (storage.length / 2);
    if (newCapacity <= storage.length) {
      newCapacity = storage.length + 1;
    }
    int[] newStorage = new int[newCapacity];
    System.arraycopy(this.storage, 0, newStorage, 0, length);
    this.storage = newStorage;
  }
  /** Returns the amount of elements already added to the storage. */
  public int getLength() {
    return length;
  }
  /**
   * Returns the underlying storage of the builder.
   *
   * <p>This method avoids copying for performance so it should be used with care. The storage can
   * actually have more elements than were added, so the user should be careful to only query the
   * first {@code getLength()} elements. Querying other elements results in an unspecified result.
   *
   * <p>After calling this method, the builder is invalidated and cannot be used anymore. Any usage
   * of the builder afterwards will result in a {@code NullPointerException}.
   */
  public int[] unsafeGetStorageAndInvalidateTheBuilder() {
    int[] tmp = storage;
    this.storage = null;
    return tmp;
  }
 }
--- a/std-bits/base/src/main/java/org/enso/base/text/StringSlice.java
+++ b/std-bits/base/src/main/java/org/enso/base/text/StringSlice.java
@ -0,0 +1,34 @@
 package org.enso.base.text;
 /** A char sequence which allows to access a slice of another char sequence without copying. */
 class StringSlice implements CharSequence {
  private final CharSequence text;
  private final int subStart, subEnd;
  /** Constructs a slice of the given text. */
  public StringSlice(CharSequence text, int start, int end) {
    this.text = text;
    this.subStart = start;
    this.subEnd = end;
  }
  @Override
  public int length() {
    return subEnd - subStart;
  }
  @Override
  public char charAt(int index) {
    return text.charAt(subStart + index);
  }
  @Override
  public CharSequence subSequence(int start, int end) {
    return new StringSlice(text, subStart + start, subStart + end);
  }
  @Override
  public String toString() {
    return text.subSequence(subStart, subEnd).toString();
  }
 }
--- a/std-bits/base/src/main/java/org/enso/base/text/Utf16Span.java
+++ b/std-bits/base/src/main/java/org/enso/base/text/Utf16Span.java
@ -0,0 +1,18 @@
 package org.enso.base.text;
 /**
 * Represents a span of UTF-16 code units within a String.
 *
 * <p>The start index indicates the first code unit of the span and the end index indicates the
 * first code unit after the end of the span.
 */
 public class Utf16Span {
  public final long start, end;
  /** Constructs a span of UTF-16 code units. */
  public Utf16Span(long start, long end) {
    this.start = start;
    this.end = end;
  }
 }
--- a/test/Tests/src/Data/Text/Default_Regex_Engine_Spec.enso
+++ b/test/Tests/src/Data/Text/Default_Regex_Engine_Spec.enso
@ -6,7 +6,7 @@ import Standard.Base.Data.Text.Regex
 import Standard.Base.Data.Text.Regex.Engine.Default as Default_Engine
 import Standard.Base.Data.Text.Regex.Mode
 import Standard.Base.Data.Text.Regex.Option as Global_Option
-import Standard.Base.Data.Text.Span
+from Standard.Base.Data.Text.Span as Span_Module import Utf_16_Span
 polyglot java import java.util.regex.Pattern as Java_Pattern
@ -182,6 +182,22 @@ spec =
            match.at 1 . group 0 . should_equal "ef"
            match.at 2 . group 0 . should_equal "gh"
        Test.specify "should correctly handle empty patterns" pending="Figure out how to make Regex correctly handle empty patterns." <|
           pattern = engine.compile "" []
           match_1 = pattern.match "" mode=Mode.All
           match_1.length . should_equal 1
           match_1.at 0 . start 0 . should_equal 0
           match_1.at 0 . end 0 . should_equal 0
           match_2 = pattern.match "ABC" mode=Mode.All
           match_2.length . should_equal 4
           match_2.at 0 . start 0 . should_equal 0
           match_2.at 0 . end 0 . should_equal 0
           match_2.at 1 . start 0 . should_equal 1
           match_2.at 1 . end 0 . should_equal 1
           match_2.at 3 . start 0 . should_equal 3
           match_2.at 3 . end 0 . should_equal 3
    Test.group "The default regex engine's Pattern.find" <|
        engine = Default_Engine.new
@ -261,11 +277,23 @@ spec =
            match.at 1 . should_equal "ef"
            match.at 2 . should_equal "gh"
            match_2 = pattern.find input mode=(Mode.Bounded 2 8 mode=10)
            match_2.length . should_equal 3
            match_2.at 0 . should_equal "cd"
            match_2.at 1 . should_equal "ef"
            match_2.at 2 . should_equal "gh"
            match_3 = pattern.find input mode=(Mode.Bounded 2 8 mode=2)
            match_3.length . should_equal 2
            match_3.at 0 . should_equal "cd"
            match_3.at 1 . should_equal "ef"
        Test.specify "should correctly handle edge cases where one-letter matches happen at the end of the word" <|
            engine.compile "(a+|1+)" [] . find "a1a1" . should_equal ["a", "1", "a", "1"]
            engine.compile "([a]+|[1]+)" [] . find "a1a1" . should_equal ["a", "1", "a", "1"]
            engine.compile "([0-9]+|[^0-9]+)" [] . find "a1b2" . should_equal ["a", "1", "b", "2"]
            engine.compile "([0-9]+|[^0-9]+)" [] . find "a1b2" mode=5 . should_equal ["a", "1", "b", "2"]
            engine.compile "([0-9]+|[^0-9]+)" [] . find "a1b2" mode=4 . should_equal ["a", "1", "b", "2"]
            engine.compile "([0-9]+|[^0-9]+)" [] . find "a1b2" mode=3 . should_equal ["a", "1", "b"]
            engine.compile "([0-9]+|[^0-9]+)" [] . find "a1b2" mode=(Mode.Bounded 1 3) . should_equal ["1", "b"]
@ -501,10 +529,10 @@ spec =
        match . should_be_a Default_Engine.Match
        Test.specify "should get the span of a group by index" <|
-            match.span 1 . should_equal (Span.new 0 6 input)
+            match.span 1 . should_equal (Utf_16_Span (Range 0 6) input)
        Test.specify "should get the span of a group by name" <|
-            match.span "letters" . should_equal (Span.new 6 18 input)
+            match.span "letters" . should_equal (Utf_16_Span (Range 6 18) input)
        Test.specify "should return Nothing if the group didn't match" <|
            match.span 3 . should_equal Nothing
--- a/test/Tests/src/Data/Text/Regex_Spec.enso
+++ b/test/Tests/src/Data/Text/Regex_Spec.enso
@ -26,3 +26,4 @@ spec =
            pattern = "http://example.com"
            Regex.escape pattern . should_equal "\Qhttp://example.com\E"
 main = Test.Suite.run_main here.spec
--- a/test/Tests/src/Data/Text/Span_Spec.enso
+++ b/test/Tests/src/Data/Text/Span_Spec.enso
@ -2,20 +2,36 @@
 from Standard.Base import all
 import Standard.Test
-import Standard.Base.Data.Text.Span
+from Standard.Base.Data.Text.Span as Span_Module import Span, Utf_16_Span
 spec = Test.group "Text.Span" <|
    Test.specify "should be able to be created over a text" <|
        text = "Hello!"
-        span = Span.new 0 3 text
+        span = Span (Range 0 3) text
        span.start . should_equal 0
        span.end . should_equal 3
        span.text . should_equal text
-    Test.specify "should be able to be created without a text" <|
+    Test.specify "should be able to be converted to code units" <|
-        span = Span.new 5 8
+        text = 'ae\u{301}fz'
-        span.start . should_equal 5
+        (Span (Range 1 3) text).to_utf_16_span . should_equal (Utf_16_Span (Range 1 4) text)
        span.end . should_equal 8
        span.text . should_equal Nothing
    Test.specify "should expand to the associated grapheme clusters" <|
        text = 'a\u{301}e\u{302}o\u{303}'
        span = Utf_16_Span (Range 1 5) text
        extended = span.to_grapheme_span
        extended . should_equal (Span (Range 0 3) text)
        extended.to_utf_16_span . should_equal (Utf_16_Span (Range 0 6) text)
        Utf_16_Span (Range 0 2) text . to_grapheme_span . should_equal (Span (Range 0 1) text)
        Utf_16_Span (Range 0 1) text . to_grapheme_span . should_equal (Span (Range 0 1) text)
        Utf_16_Span (Range 0 0) text . to_grapheme_span . should_equal (Span (Range 0 0) text)
        Utf_16_Span (Range 1 1) text . to_grapheme_span . should_equal (Span (Range 0 0) text)
        Utf_16_Span (Range 2 2) text . to_grapheme_span . should_equal (Span (Range 1 1) text)
        Utf_16_Span (Range 0 4) text . to_grapheme_span . should_equal (Span (Range 0 2) text)
        Utf_16_Span (Range 0 3) text . to_grapheme_span . should_equal (Span (Range 0 2) text)
        Utf_16_Span (Range 0 2) text . to_grapheme_span . should_equal (Span (Range 0 1) text)
 main = Test.Suite.run_main here.spec
--- a/test/Tests/src/Data/Text/Utils_Spec.enso
+++ b/test/Tests/src/Data/Text/Utils_Spec.enso
@ -0,0 +1,61 @@
 from Standard.Base import all
 polyglot java import org.enso.base.Text_Utils
 polyglot java import org.enso.base.text.CaseFoldedString
 import Standard.Test
 polyglot java import com.ibm.icu.text.BreakIterator
 spec =
    Test.group "Text_Utils" <|
        kshi = '\u0915\u094D\u0937\u093F'
        facepalm = '\u{1F926}\u{1F3FC}\u200D\u2642\uFE0F'
        text = "a"+kshi+facepalm+'e\u{301}Z'
        codepoints_to_graphemes = _.flatten <| text.characters.map_with_index ix-> grapheme->
            codepoints_count = grapheme.utf_16.length
            Vector.new codepoints_count _->ix
        Test.specify "should correctly translate an codepoint index to a grapheme index" <|
            codepoints_to_graphemes . each_with_index codepoint_ix-> grapheme_ix->
                found_grapheme_ix = Text_Utils.utf16_index_to_grapheme_index text codepoint_ix
                found_grapheme_ix.should_equal grapheme_ix
            Text_Utils.utf16_index_to_grapheme_index text text.utf_16.length . should_equal text.length
            Text_Utils.utf16_index_to_grapheme_index "" 0 . should_equal 0
            Text_Utils.utf16_index_to_grapheme_index 'ą' 0 . should_equal 0
            Text_Utils.utf16_index_to_grapheme_index 'ą' 1 . should_equal 1
            Text_Utils.utf16_index_to_grapheme_index "aB" 0 . should_equal 0
            Text_Utils.utf16_index_to_grapheme_index "aB" 1 . should_equal 1
            Text_Utils.utf16_index_to_grapheme_index "aB" 2 . should_equal 2
            Text_Utils.utf16_index_to_grapheme_index 'a\u{301}' 0 . should_equal 0
            Text_Utils.utf16_index_to_grapheme_index 'a\u{301}' 1 . should_equal 0
            Text_Utils.utf16_index_to_grapheme_index 'a\u{301}' 2 . should_equal 1
        Test.specify "should correctly translate a series of codepoint indices to a grapheme indices in a batch" <|
            translate_indices text ixes =
                Vector.from_array <| Text_Utils.utf16_indices_to_grapheme_indices text ixes.to_array
            codepoint_indices = Vector.new text.utf_16.length ix->ix
            translate_indices text codepoint_indices . should_equal codepoints_to_graphemes
            translate_indices "" [0] . should_equal [0]
            translate_indices 'ą' [0, 1] . should_equal [0, 1]
            translate_indices "aB" [0, 1, 2] . should_equal [0, 1, 2]
            translate_indices 'a\u{301}' [0, 1, 2] . should_equal [0, 0, 1]
        Test.specify "should correctly case-fold a string and translate codeunits to graphemes" <|
            text = 'a\u{301}AZßﬃą'
            folded = CaseFoldedString.fold text Locale.default.java_locale
            folded.getFoldedString . should_equal 'a\u{301}azssffią'
            codeunits = Vector.new folded.getFoldedString.utf_16.length+1 ix->ix
            grapheme_ixes = codeunits.map ix->
                folded.codeUnitToGraphemeIndex ix
            grapheme_ixes . should_equal [0, 0, 1, 2, 3, 3, 4, 4, 4, 5, 6]
            Test.expect_panic_with (folded.codeUnitToGraphemeIndex -1) Polyglot_Error
            Test.expect_panic_with (folded.codeUnitToGraphemeIndex folded.getFoldedString.utf_16.length+1) Polyglot_Error
 main = Test.Suite.run_main here.spec
--- a/test/Tests/src/Data/Text_Spec.enso
+++ b/test/Tests/src/Data/Text_Spec.enso
@ -4,7 +4,10 @@ from Standard.Base.Data.Text.Extensions import Index_Out_Of_Bounds_Error
 import Standard.Base.Data.Text.Regex.Engine.Default as Default_Engine
 import Standard.Base.Data.Locale
 import Standard.Base.Data.Text.Split_Kind
 from Standard.Base.Data.Text.Span as Span_Module import Span
 from Standard.Base.Data.Text.Text_Sub_Range import all
 import Standard.Base.Data.Text.Regex.Mode
 import Standard.Base.Data.Text.Matching_Mode
 import Standard.Test
 type Auto a
@ -87,9 +90,8 @@ spec =
            'e\u0301' . equals_ignore_case 'e\u0303' . should_be_false
            "I" . equals_ignore_case "i" . should_be_true
            "I" . equals_ignore_case "ı" . should_be_true
            "İ" . equals_ignore_case "i" . should_be_false
            "İ" . equals_ignore_case "i" (locale = Locale.new "tr") . should_be_true
            "I" . equals_ignore_case "ı" (locale = Locale.new "tr") . should_be_true
            "I" . equals_ignore_case "i" (locale = Locale.new "tr") . should_be_false
            "Kongressstraße"=="Kongressstrasse" . should_be_false
@ -199,15 +201,20 @@ spec =
            'He\u{302}llo\u{308} Wo\u{301}rld!'.take (Last 6) . should_equal 'Wo\u{301}rld!'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.take (Last 5) . should_equal 'o\u{301}rld!'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.take (Before 'e\u{302}') . should_equal 'H'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.take (Before 'ê') . should_equal 'H'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.take (Before 'e') . should_equal 'He\u{302}llo\u{308} Wo\u{301}rld!'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Before_Last 'o\u{308}') . should_equal 'He\u{302}llo\u{308} W'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Before_Last 'ö') . should_equal 'He\u{302}llo\u{308} W'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Before_Last 'o') . should_equal 'He\u{302}llo\u{308} Wo\u{308}rld!'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.take (After 'e\u{302}') . should_equal 'llo\u{308} Wo\u{301}rld!'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.take (After 'ê') . should_equal 'llo\u{308} Wo\u{301}rld!'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.take (After 'e\u{308}') . should_equal ''
            'He\u{302}llo\u{308} Wo\u{301}rld!'.take (After 'e') . should_equal ''
            'He\u{302}llo\u{308} Wo\u{308}rld!'.take (After_Last 'o\u{308}') . should_equal 'rld!'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.take (After_Last 'ö') . should_equal 'rld!'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.take (After_Last 'o') . should_equal ''
            'He\u{302}llo\u{308} Wo\u{308}rld!'.take (While c->c!='e\u{302}') . should_equal 'H'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.take (While c->c!='ê') . should_equal 'H'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.take (While c->c!='e') . should_equal 'He\u{302}llo\u{308} Wo\u{308}rld!'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Range 3 5) . should_equal 'lo\u{308}'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Range -3 -1) . should_equal 'ld'
@ -232,6 +239,30 @@ spec =
            '✨🚀🚧😍😃😍😎😙😉☺'.take (Range -3 Nothing) . should_equal '😙😉☺'
            '✨🚀🚧😍😃😍😎😙😉☺'.take (Range -3 -1) . should_equal '😙😉'
        Test.specify "take should correctly handle edge cases" <|
            "".take First.new . should_equal ""
            "".take Last.new . should_equal ""
            "".take (After "a") . should_equal ""
            "".take (After_Last "a") . should_equal ""
            "".take (Before "a") . should_equal ""
            "".take (Before_Last "a") . should_equal ""
            "".take (After "") . should_equal ""
            "".take (After_Last "") . should_equal ""
            "".take (Before "") . should_equal ""
            "".take (Before_Last "") . should_equal ""
            "".take (While _->True) . should_equal ""
            "".take (Range 0 0) . should_equal ""
            'ABC\u{301}'.take (Range 0 0) . should_equal ""
            'ABC\u{301}'.take (After "") . should_equal 'ABC\u{301}'
            'ABC\u{301}'.take (After_Last "") . should_equal ""
            'ABC\u{301}'.take (Before "") . should_equal ""
            'ABC\u{301}'.take (Before_Last "") . should_equal 'ABC\u{301}'
        Test.specify "drop should work as in the examples" <|
            "Hello World!".drop First.new . should_equal "ello World!"
            "Hello World!".drop (First 5) . should_equal " World!"
@ -269,15 +300,20 @@ spec =
            'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (Last 6) . should_equal 'He\u{302}llo\u{308} '
            'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (Last 5) . should_equal 'He\u{302}llo\u{308} W'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (Before 'e\u{302}') . should_equal 'e\u{302}llo\u{308} Wo\u{301}rld!'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (Before 'ê') . should_equal 'e\u{302}llo\u{308} Wo\u{301}rld!'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (Before 'e') . should_equal ''
            'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Before_Last 'o\u{308}') . should_equal 'o\u{308}rld!'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Before_Last 'ö') . should_equal 'o\u{308}rld!'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Before_Last 'o') . should_equal ''
            'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (After 'e\u{302}') . should_equal 'He\u{302}'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (After 'ê') . should_equal 'He\u{302}'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (After 'e\u{308}') . should_equal 'He\u{302}llo\u{308} Wo\u{301}rld!'
            'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (After 'e') . should_equal 'He\u{302}llo\u{308} Wo\u{301}rld!'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (After_Last 'o\u{308}') . should_equal 'He\u{302}llo\u{308} Wo\u{308}'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (After_Last 'ö') . should_equal 'He\u{302}llo\u{308} Wo\u{308}'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (After_Last 'o') . should_equal 'He\u{302}llo\u{308} Wo\u{308}rld!'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (While c->c!='e\u{302}') . should_equal 'e\u{302}llo\u{308} Wo\u{308}rld!'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (While c->c!='ê') . should_equal 'e\u{302}llo\u{308} Wo\u{308}rld!'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (While c->c!='e') . should_equal ''
            'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Range 3 5) . should_equal 'He\u{302}l Wo\u{308}rld!'
            'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Range -3 -1) . should_equal 'He\u{302}llo\u{308} Wo\u{308}r!'
@ -301,6 +337,30 @@ spec =
            '✨🚀🚧😍😃😍😎😙😉☺'.drop (Range -3 Nothing) . should_equal '✨🚀🚧😍😃😍😎'
            '✨🚀🚧😍😃😍😎😙😉☺'.drop (Range -3 -1) . should_equal '✨🚀🚧😍😃😍😎☺'
        Test.specify "drop should correctly handle edge cases" <|
            "".drop First.new . should_equal ""
            "".drop Last.new . should_equal ""
            "".drop (After "a") . should_equal ""
            "".drop (After_Last "a") . should_equal ""
            "".drop (Before "a") . should_equal ""
            "".drop (Before_Last "a") . should_equal ""
            "".drop (After "") . should_equal ""
            "".drop (After_Last "") . should_equal ""
            "".drop (Before "") . should_equal ""
            "".drop (Before_Last "") . should_equal ""
            "".drop (While _->True) . should_equal ""
            "".drop (Range 0 0) . should_equal ""
            'ABC\u{301}'.drop (Range 0 0) . should_equal 'ABC\u{301}'
            'ABC\u{301}'.drop (After "") . should_equal ''
            'ABC\u{301}'.drop (After_Last "") . should_equal 'ABC\u{301}'
            'ABC\u{301}'.drop (Before "") . should_equal 'ABC\u{301}'
            'ABC\u{301}'.drop (Before_Last "") . should_equal ''
        Test.specify "should correctly convert character case" <|
            "FooBar Baz".to_case Case.Lower . should_equal "foobar baz"
            "FooBar Baz".to_case Case.Upper . should_equal "FOOBAR BAZ"
@ -465,10 +525,7 @@ spec =
            ## This shows what regex is doing by default and we cannot easily fix
               that.
            's\u{301}' . contains 's' (Regex_Matcher.new) . should_be_true
-            ## This would normally be false, but we perform input normalization
+            'ś' . contains 's' (Regex_Matcher.new) . should_be_false
               to get results that are consistent regardless of if the input was
               normalized or not.
            'ś' . contains 's' (Regex_Matcher.new) . should_be_true
            's\u{301}' . contains 'ś' (Regex_Matcher.new) . should_be_true
            'ś' . contains 's\u{301}' (Regex_Matcher.new) . should_be_true
@ -767,6 +824,157 @@ spec =
            '✨🚀🚧'*2 . should_equal '✨🚀🚧✨🚀🚧'
        Test.specify "location_of should work as shown in examples" <|
            example_1 =
                "Hello World!".location_of "J" == Nothing
                "Hello World!".location_of "o" == Span (Range 4 5) "Hello World!"
                "Hello World!".location_of "o" mode=Matching_Mode.Last == Span (Range 4 5) "Hello World!"
            example_2 =
                term = "straße"
                text = "MONUMENTENSTRASSE 42"
                match = text . location_of term matcher=(Text_Matcher Case_Insensitive.new)
                term.length . should_equal 6
                match.length . should_equal 7
            example_3 =
                ligatures = "ﬃﬄ"
                ligatures.length . should_equal 2
                term_1 = "IFF"
                match_1 = ligatures . location_of term_1 matcher=(Text_Matcher Case_Insensitive.new)
                term_1.length . should_equal 3
                match_1.length . should_equal 2
                term_2 = "ffiffl"
                match_2 = ligatures . location_of term_2 matcher=(Text_Matcher Case_Insensitive.new)
                term_2.length . should_equal 6
                match_2.length . should_equal 2
                match_1 . should_equal match_2
            example_4 =
                "Hello World!".location_of_all "J" . should_equal []
                "Hello World!".location_of_all "o" . map .start . should_equal [4, 7]
            example_5 =
                term = "strasse"
                text = "MONUMENTENSTRASSE ist eine große Straße."
                match = text . location_of_all term matcher=(Text_Matcher Case_Insensitive.new)
                term.length . should_equal 7
                match . map .length . should_equal [7, 6]
            example_6 =
                ligatures = "ﬃﬄFFIFF"
                ligatures.length . should_equal 7
                match_1 = ligatures . location_of_all "IFF" matcher=(Text_Matcher Case_Insensitive.new)
                match_1 . map .length . should_equal [2, 3]
                match_2 = ligatures . location_of_all "ffiff" matcher=(Text_Matcher Case_Insensitive.new)
                match_2 . map .length . should_equal [2, 5]
            # Put them in blocks to avoid name clashes.
            example_1
            example_2
            example_3
            example_4
            example_5
            example_6
        Test.specify "should allow to find location_of occurrences within a text" <|
            "Hello World!".location_of_all "J" . should_equal []
            "Hello World!".location_of_all "o" . map .start . should_equal [4, 7]
            accents = 'a\u{301}e\u{301}o\u{301}'
            accents.location_of accent_1 . should_equal (Span (Range 1 2) accents)
            "".location_of "foo" . should_equal Nothing
            "".location_of "foo" mode=Matching_Mode.Last . should_equal Nothing
            "".location_of_all "foo" . should_equal []
            "".location_of "" . should_equal (Span (Range 0 0) "")
            "".location_of "" mode=Matching_Mode.Last . should_equal (Span (Range 0 0) "")
            "".location_of_all "" . should_equal [Span (Range 0 0) ""]
            abc = 'A\u{301}ßC'
            abc.location_of "" . should_equal (Span (Range 0 0) abc)
            abc.location_of "" mode=Matching_Mode.Last . should_equal (Span (Range 3 3) abc)
            abc.location_of_all "" . should_equal [Span (Range 0 0) abc, Span (Range 1 1) abc, Span (Range 2 2) abc, Span (Range 3 3) abc]
        Test.specify "should allow case insensitive matching in location_of" <|
            hello = "Hello WORLD!"
            case_insensitive = Text_Matcher Case_Insensitive.new
            hello.location_of "world" . should_equal Nothing
            hello.location_of "world" matcher=case_insensitive . should_equal (Span (Range 6 11) hello)
            hello.location_of "o" mode=Mode.First matcher=case_insensitive . should_equal (Span (Range 4 5) hello)
            hello.location_of "o" mode=Matching_Mode.Last matcher=case_insensitive . should_equal (Span (Range 7 8) hello)
            accents = 'A\u{301}E\u{301}O\u{301}'
            accents.location_of accent_1 matcher=case_insensitive . should_equal (Span (Range 1 2) accents)
            "Strasse".location_of "ß" matcher=case_insensitive . should_equal (Span (Range 4 6) "Strasse")
            "Monumentenstraße 42".location_of "STRASSE" matcher=case_insensitive . should_equal (Span (Range 10 16) "Monumentenstraße 42")
            '\u0390'.location_of '\u03B9\u0308\u0301' matcher=case_insensitive . should_equal (Span (Range 0 1) '\u0390')
            'ԵՒ'.location_of 'և' . should_equal Nothing
            'ԵՒ'.location_of 'և' matcher=case_insensitive . should_equal (Span (Range 0 2) 'ԵՒ')
            'և'.location_of 'ԵՒ' matcher=case_insensitive . should_equal (Span (Range 0 1) 'և')
            ligatures = 'ffaﬀﬁﬂﬃﬄﬅﬆZ'
            ligatures.location_of 'FFI' matcher=case_insensitive . should_equal (Span (Range 3 5) ligatures)
            ligatures.location_of 'FF' matcher=case_insensitive . should_equal (Span (Range 0 2) ligatures)
            ligatures.location_of 'ff' matcher=case_insensitive mode=Matching_Mode.Last . should_equal (Span (Range 7 8) ligatures)
            ligatures.location_of_all 'ff' . should_equal [Span (Range 0 2) ligatures]
            ligatures.location_of_all 'FF' matcher=case_insensitive . should_equal [Span (Range 0 2) ligatures, Span (Range 3 4) ligatures, Span (Range 6 7) ligatures, Span (Range 7 8) ligatures]
            ligatures.location_of_all 'ffi' matcher=case_insensitive . should_equal [Span (Range 3 5) ligatures, Span (Range 6 7) ligatures]
            'fffi'.location_of_all 'ﬀ' matcher=case_insensitive . should_equal [Span (Range 0 2) 'fffi']
            'fffi'.location_of_all 'ﬃ' . should_equal []
            'fffi'.location_of_all 'ﬃ' matcher=case_insensitive . should_equal [Span (Range 1 4) 'fffi']
            'FFFI'.location_of 'ﬃ' matcher=case_insensitive . should_equal (Span (Range 1 4) 'FFFI')
            'ﬃﬄ'.location_of 'IF' matcher=case_insensitive . should_equal (Span (Range 0 2) 'ﬃﬄ')
            'ﬃﬄ'.location_of 'F' Matching_Mode.Last matcher=case_insensitive . should_equal (Span (Range 1 2) 'ﬃﬄ')
            'ﬃﬄ'.location_of_all 'F' matcher=case_insensitive . should_equal [Span (Range 0 1) 'ﬃﬄ', Span (Range 0 1) 'ﬃﬄ', Span (Range 1 2) 'ﬃﬄ', Span (Range 1 2) 'ﬃﬄ']
            'aaﬃbb'.location_of_all 'af' matcher=case_insensitive . should_equal [Span (Range 1 3) 'aaﬃbb']
            'aaﬃbb'.location_of_all 'affi' matcher=case_insensitive . should_equal [Span (Range 1 3) 'aaﬃbb']
            'aaﬃbb'.location_of_all 'ib' matcher=case_insensitive . should_equal [Span (Range 2 4) 'aaﬃbb']
            'aaﬃbb'.location_of_all 'ffib' matcher=case_insensitive . should_equal [Span (Range 2 4) 'aaﬃbb']
            "".location_of "foo" matcher=case_insensitive . should_equal Nothing
            "".location_of "foo" matcher=case_insensitive mode=Matching_Mode.Last . should_equal Nothing
            "".location_of_all "foo" matcher=case_insensitive . should_equal []
            "".location_of "" matcher=case_insensitive . should_equal (Span (Range 0 0) "")
            "".location_of "" matcher=case_insensitive mode=Matching_Mode.Last . should_equal (Span (Range 0 0) "")
            "".location_of_all "" matcher=case_insensitive . should_equal [Span (Range 0 0) ""]
            abc = 'A\u{301}ßC'
            abc.location_of "" matcher=case_insensitive . should_equal (Span (Range 0 0) abc)
            abc.location_of "" matcher=case_insensitive mode=Matching_Mode.Last . should_equal (Span (Range 3 3) abc)
            abc.location_of_all "" matcher=case_insensitive . should_equal [Span (Range 0 0) abc, Span (Range 1 1) abc, Span (Range 2 2) abc, Span (Range 3 3) abc]
        Test.specify "should allow regexes in location_of" <|
            hello = "Hello World!"
            regex = Regex_Matcher.new
            regex_insensitive = Regex_Matcher.new case_sensitive=Case_Insensitive.new
            hello.location_of ".o" Matching_Mode.First matcher=regex . should_equal (Span (Range 3 5) hello)
            hello.location_of ".o" Matching_Mode.Last matcher=regex . should_equal (Span (Range 6 8) hello)
            hello.location_of_all ".o" matcher=regex . map .start . should_equal [3, 6]
            "foobar".location_of "BAR" Mode.First matcher=regex_insensitive . should_equal (Span (Range 3 6) "foobar")
            ## Regex matching does not do case folding
            "Strasse".location_of "ß" Mode.First matcher=regex_insensitive . should_equal Nothing
            ## But it should handle the Unicode normalization
            accents = 'a\u{301}e\u{301}o\u{301}'
            accents.location_of accent_1 Mode.First matcher=regex . should_equal (Span (Range 1 2) accents)
        Test.specify "should correctly handle regex edge cases in location_of" pending="Figure out how to make Regex correctly handle empty patterns." <|
            regex = Regex_Matcher.new
            "".location_of "foo" matcher=regex . should_equal Nothing
            "".location_of "foo" matcher=regex mode=Matching_Mode.Last . should_equal Nothing
            "".location_of_all "foo" matcher=regex . should_equal []
            "".location_of "" matcher=regex . should_equal (Span (Range 0 0) "")
            "".location_of_all "" matcher=regex . should_equal [Span (Range 0 0) ""]
            "".location_of "" matcher=regex mode=Matching_Mode.Last . should_equal (Span (Range 0 0) "")
            abc = 'A\u{301}ßC'
            abc.location_of "" matcher=regex . should_equal (Span (Range 0 0) abc)
            abc.location_of_all "" matcher=regex . should_equal [Span (Range 0 0) abc, Span (Range 0 0) abc, Span (Range 1 1) abc, Span (Range 2 2) abc, Span (Range 3 3) abc]
            abc.location_of "" matcher=regex mode=Matching_Mode.Last . should_equal (Span (Range 3 3) abc)
    Test.group "Regex matching" <|
        Test.specify "should be possible on text" <|
            match = "My Text: Goes Here".match "^My Text: (.+)$" mode=Regex_Mode.First
--- a/test/Tests/src/Examples_Spec.enso
+++ b/test/Tests/src/Examples_Spec.enso
@ -128,3 +128,4 @@ spec = Test.group "Examples" <|
        match.groups.length . should_equal 5
        match.named_groups.size . should_equal 2
 main = Test.Suite.run_main here.spec
--- a/test/Tests/src/Main.enso
+++ b/test/Tests/src/Main.enso
@ -34,6 +34,7 @@ import project.Data.Text_Spec
 import project.Data.Time.Spec as Time_Spec
 import project.Data.Vector_Spec
 import project.Data.Text.Regex_Spec
 import project.Data.Text.Utils_Spec
 import project.Data.Text.Default_Regex_Engine_Spec
 import project.Data.Text.Matching_Spec
 import project.Data.Text.Span_Spec
@ -87,6 +88,7 @@ main = Test.Suite.run_main <|
    Runtime_Spec.spec
    Span_Spec.spec
    Stack_Traces_Spec.spec
    Utils_Spec.spec
    Text_Spec.spec
    Time_Spec.spec
    Uri_Spec.spec