Create Index_Sub_Range type and update Text.take and Text.drop (#3617)

This commit is contained in:
Radosław Waśko 2022-08-03 13:41:34 +02:00 committed by GitHub
parent 796b1b5b82
commit 0a2fea925c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 734 additions and 221 deletions

View File

@ -169,6 +169,8 @@
the Postgres connection][3593] the Postgres connection][3593]
- [Added `Regression` to the `Standard.Base` library and removed legacy `Model` - [Added `Regression` to the `Standard.Base` library and removed legacy `Model`
type from `Standard.Table`.][3601] type from `Standard.Table`.][3601]
- [Created `Index_Sub_Range` type and updated `Text.take` and
`Text.drop`.][3617]
[debug-shortcuts]: [debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -267,6 +269,7 @@
[3590]: https://github.com/enso-org/enso/pull/3590 [3590]: https://github.com/enso-org/enso/pull/3590
[3593]: https://github.com/enso-org/enso/pull/3593 [3593]: https://github.com/enso-org/enso/pull/3593
[3601]: https://github.com/enso-org/enso/pull/3601 [3601]: https://github.com/enso-org/enso/pull/3601
[3617]: https://github.com/enso-org/enso/pull/3617
#### Enso Compiler #### Enso Compiler

View File

@ -0,0 +1,45 @@
from Standard.Base import all
import Standard.Base.Random
type Index_Sub_Range
## Select the first `count` items.
Selects no items if `count` is less than or equal to 0.
Selects all items if `count` is greater than the length of the input.
type First (count : Integer = 1)
## Select the last `count` characters.
Selects no items if `count` is less than or equal to 0.
Selects all items if `count` is greater than the length of the input.
type Last (count : Integer = 1)
## Select elements from the start while the predicate returns `True`.
type While (predicate : (Any -> Boolean))
## Selects specific indexes (starting from 0) either as an `Integer` or a
`Range`.
If the index or start of the Range is out of bounds, an error is
reported. If the end of the Range is out of bounds, all items until the
end of the input are selected.
Only ranges with positive step and positive indices are supported.
Individual integer indices can be negative which allows for indexing
from the end of the collection.
type By_Index (indexes : (Integer | Range | Vector (Integer | Range)) = [0])
## Gets a random sample of entries, without repetitions.
If `count` is greater than the length of the input, a random permutation
of all elements from the input is selected.
type Sample (count:Integer) (seed:Integer=Random.get_default_seed)
## Gets every Nth entry.
Arguments:
- step: The step between consecutive entries that are included.
- first: The first entry to include. If it is outside of bounds of the
input, an error is raised.
type Every (step:Integer) (first:Integer=0)

View File

@ -10,6 +10,7 @@ import Standard.Base.Data.Text.Location
import Standard.Base.Data.Text.Line_Ending_Style import Standard.Base.Data.Text.Line_Ending_Style
import Standard.Base.Data.Text.Span as Span_Module import Standard.Base.Data.Text.Span as Span_Module
import Standard.Base.Data.Text.Text_Sub_Range import Standard.Base.Data.Text.Text_Sub_Range
from Standard.Base.Data.Text.Text_Sub_Range import First
from Standard.Base.Error.Problem_Behavior import Report_Warning from Standard.Base.Error.Problem_Behavior import Report_Warning
import Standard.Base.Data.Locale import Standard.Base.Data.Locale
import Standard.Base.Meta import Standard.Base.Meta
@ -25,15 +26,6 @@ polyglot java import java.lang.StringBuilder
polyglot java import org.enso.base.Text_Utils polyglot java import org.enso.base.Text_Utils
polyglot java import org.enso.base.Encoding_Utils polyglot java import org.enso.base.Encoding_Utils
## UNSTABLE
An error for when an index is out of bounds in a text.
Arguments:
- index: The requested index in the text.
- length: The length of the text.
type Index_Out_Of_Bounds_Error index length
## ALIAS Length ## ALIAS Length
Computes the number of characters in the text. Computes the number of characters in the text.
@ -1091,17 +1083,26 @@ Text.repeat self count=1 =
"Hello World!".take (After_Last "o") == "rld!" "Hello World!".take (After_Last "o") == "rld!"
"Hello World!".take (While c->c!=" ") == "Hello" "Hello World!".take (While c->c!=" ") == "Hello"
"Hello World!".take (Range 3 5) == "lo" "Hello World!".take (Range 3 5) == "lo"
"Hello World!".take (Range -3 -1) == "ld"
"Hello World!".take (Range -3 Nothing) == "ld!"
"Hello World!".take (Range 5 Nothing) == " World!" "Hello World!".take (Range 5 Nothing) == " World!"
"Hello World!".take (Range 5 12) == " World!" "Hello World!".take (Range 5 12) == " World!"
"Hello World!".take (Range 12 12) == "" "Hello World!".take (Range 6 12 2) == "Wrd"
Text.take : (Text_Sub_Range | Range) -> Text ! Index_Out_Of_Bounds_Error "Hello World!".take (Every 2 first=6) == "Wrd"
Text.take self range = "Hello World!".take (Every 3) == "Hl Wl"
char_range = case range of "Hello World!".take (By_Index 0) == "H"
Range _ _ _ -> Span_Module.range_to_char_indices self range "Hello World!".take (By_Index [1, 0, 0, 6, 0]) == "eHHWH"
_ -> range.to_char_range self "Hello World!".take (By_Index [Range 0 3, 6, Range 6 12 2]) == "HelWWrd"
Text_Utils.substring self char_range.start char_range.end "Hello World!".take (Sample 3 seed=42) == "l d"
Text.take : (Text_Sub_Range | Index_Sub_Range | Range) -> Text ! Index_Out_Of_Bounds_Error
Text.take self range=(First 1) =
ranges = Text_Sub_Range.find_codepoint_ranges self range
case ranges of
Range start end _ ->
Text_Utils.substring self start end
Text_Sub_Range.Codepoint_Ranges char_ranges _ ->
sb = StringBuilder.new
char_ranges.map char_range->
sb.append self char_range.start char_range.end
sb.toString
## ALIAS skip, remove ## ALIAS skip, remove
Creates a new Text by removing the specified range of the input. Creates a new Text by removing the specified range of the input.
@ -1131,20 +1132,32 @@ Text.take self range =
"Hello World!".drop (After_Last "o") == "Hello Wo" "Hello World!".drop (After_Last "o") == "Hello Wo"
"Hello World!".drop (While c->c!=" ") == " World!" "Hello World!".drop (While c->c!=" ") == " World!"
"Hello World!".drop (Range 3 5) == "Hel World!" "Hello World!".drop (Range 3 5) == "Hel World!"
"Hello World!".drop (Range -3 -1) == "Hello Wor!"
"Hello World!".drop (Range -3 Nothing) == "Hello Wor"
"Hello World!".drop (Range 5 Nothing) == "Hello" "Hello World!".drop (Range 5 Nothing) == "Hello"
"Hello World!".drop (Range 5 12) == "Hello" "Hello World!".drop (Range 5 12) == "Hello"
"Hello World!".drop (Range 12 12) == "Hello World!" "Hello World!".drop (Range 6 12 2) == "Hello ol!"
Text.drop : (Text_Sub_Range | Range) -> Text ! Index_Out_Of_Bounds_Error "Hello World!".drop (Every 2 first=6) == "Hello ol!"
Text.drop self range = "Hello World!".drop (Every 3) == "elo ord!"
char_range = case range of "Hello World!".drop (By_Index 0) == "ello World!"
Range _ _ _ -> Span_Module.range_to_char_indices self range "Hello World!".drop (By_Index [1, 0, 0, 6, 0]) == "llo orld!"
_ -> range.to_char_range self "Hello World!".drop (By_Index [Range 0 3, 6, Range 6 12 2]) == "lo ol!"
if char_range.start == 0 then Text_Utils.drop_first self char_range.end else "Hello World!".drop (Sample 3 seed=42) == "HeloWorl!"
prefix = Text_Utils.substring self 0 char_range.start Text.drop : (Text_Sub_Range | Index_Sub_Range | Range) -> Text ! Index_Out_Of_Bounds_Error
if char_range.end == (Text_Utils.char_length self) then prefix else Text.drop self range=(First 1) =
prefix + Text_Utils.drop_first self char_range.end ranges = Text_Sub_Range.find_codepoint_ranges self range
case ranges of
Range start end _ ->
if start == 0 then Text_Utils.drop_first self end else
prefix = Text_Utils.substring self 0 start
if end == (Text_Utils.char_length self) then prefix else
prefix + Text_Utils.drop_first self end
Text_Sub_Range.Codepoint_Ranges _ _ ->
sorted_char_ranges_to_remove = ranges.sorted_and_distinct_ranges
len = Text_Utils.char_length self
sb = StringBuilder.new
ranges_with_sentinels = [Range 0 0] + sorted_char_ranges_to_remove + [Range len len]
ranges_with_sentinels.zip ranges_with_sentinels.tail prev-> next->
sb.append self prev.end next.start
sb.toString
## ALIAS lower, upper, title, proper ## ALIAS lower, upper, title, proper
Converts each character in `self` to the specified case. Converts each character in `self` to the specified case.

View File

@ -21,7 +21,8 @@ type Span
## A representation of a span of characters in Enso's `Text` type. ## A representation of a span of characters in Enso's `Text` type.
Arguments: Arguments:
- range: The range of characters over which the span exists. - range: The range of characters over which the span exists. The range is
assumed to have `step` equal to 1.
- text: The text over which the span exists. - text: The text over which the span exists.
! What is a Character? ! What is a Character?
@ -85,7 +86,8 @@ type Utf_16_Span
## A representation of a span of UTF-16 code units in Enso's `Text` type. ## A representation of a span of UTF-16 code units in Enso's `Text` type.
Arguments: Arguments:
- range: The range of code units over which the span exists. - range: The range of code units over which the span exists. The range is
assumed to have `step` equal to 1.
- text: The text over which the span exists. - text: The text over which the span exists.
> Example > Example

View File

@ -1,21 +1,17 @@
from Standard.Base import all from Standard.Base import all
import Standard.Base.Runtime.Ref
from Standard.Base.Data.Text.Extensions import Index_Out_Of_Bounds_Error from Standard.Base.Data.Text.Extensions import Index_Out_Of_Bounds_Error
from Standard.Base.Data.Text.Span as Span_Module import Span
from Standard.Base.Data.Index_Sub_Range import First, Last, While, By_Index, Sample, Every
import Standard.Base.Random
from Standard.Base.Data.Index_Sub_Range export First, Last, While, By_Index, Sample, Every
polyglot java import com.ibm.icu.text.BreakIterator polyglot java import com.ibm.icu.text.BreakIterator
polyglot java import org.enso.base.Text_Utils polyglot java import org.enso.base.Text_Utils
## Type defining a substring of a Text ## Type defining a substring of a Text
type Text_Sub_Range type Text_Sub_Range
## Select the first `count` characters.
Select an empty string if `count` is less than or equal to 0.
Select the entire string if `count` is greater than the length of the input.
type First (count : Integer = 1)
## Select the last `count` characters.
Select an empty string if `count` is less than or equal to 0.
Select the entire string if `count` is greater than the length of the input.
type Last (count : Integer = 1)
## Select characters until the first instance of `delimiter`. ## Select characters until the first instance of `delimiter`.
Select an empty string if `delimiter` is empty. Select an empty string if `delimiter` is empty.
Select the entire string if the input does not contain `delimiter`. Select the entire string if the input does not contain `delimiter`.
@ -34,71 +30,219 @@ type Text_Sub_Range
Select an empty string if the input does not contain `delimiter`. Select an empty string if the input does not contain `delimiter`.
type After_Last (delimiter : Text) type After_Last (delimiter : Text)
## Select characters while the predicate returns `True`. ## PRIVATE
type While (predicate : (Text -> Boolean)) Finds code-point indices corresponding to the part of the input matching the
range specified by one of the types: `Text_Sub_Range`, `Index_Sub_Range`,
`Range`.
This method may return either a single range instance or a vector of ranges.
While the input ranges may have varying steps, they are processed and split
in such a way that the ranges returned by this method always have a step
equal to 1.
find_codepoint_ranges : Text -> (Text_Sub_Range | Index_Sub_Range | Range) -> (Range | Codepoint_Ranges)
find_codepoint_ranges text subrange =
case subrange of
Before delimiter ->
if delimiter.is_empty then (Range 0 0) else
span = Text_Utils.span_of text delimiter
if span.is_nothing then (Range 0 (Text_Utils.char_length text)) else
(Range 0 span.codeunit_start)
Before_Last delimiter ->
if delimiter.is_empty then (Range 0 (Text_Utils.char_length text)) else
span = Text_Utils.last_span_of text delimiter
if span.is_nothing then (Range 0 (Text_Utils.char_length text)) else
(Range 0 span.codeunit_start)
After delimiter ->
if delimiter.is_empty then (Range 0 (Text_Utils.char_length text)) else
span = Text_Utils.span_of text delimiter
if span.is_nothing then (Range 0 0) else
(Range span.codeunit_end (Text_Utils.char_length text))
After_Last delimiter ->
if delimiter.is_empty then (Range 0 0) else
span = Text_Utils.last_span_of text delimiter
if span.is_nothing then (Range 0 0) else
(Range span.codeunit_end (Text_Utils.char_length text))
First count ->
if count <= 0 then (Range 0 0) else
iterator = BreakIterator.getCharacterInstance
iterator.setText text
start_index = iterator.next count
Range 0 (if start_index == -1 then (Text_Utils.char_length text) else start_index)
Last count ->
if count <= 0 then (Range 0 0) else
iterator = BreakIterator.getCharacterInstance
iterator.setText text
iterator.last
start_index = iterator.next -count
Range (if start_index == -1 then 0 else start_index) (Text_Utils.char_length text)
While predicate ->
indices = find_sub_range_end text _-> start-> end->
predicate (Text_Utils.substring text start end) . not
if indices.first.is_nothing then (Range 0 indices.second) else
Range 0 indices.first
By_Index indices ->
case indices of
Vector.Vector _ ->
if indices.length == 1 then resolve_index_or_range text indices.first else
batch_resolve_indices_or_ranges text indices
_ -> resolve_index_or_range text indices
Sample count seed ->
rng = Random.new seed
indices = Random.random_indices text.length count rng
find_codepoint_ranges text (By_Index indices)
Every step start ->
if step <= 0 then Error.throw (Illegal_Argument_Error "Step within Every must be positive.") else
len = text.length
if start >= len then Range 0 0 else
range = Range start text.length step
find_codepoint_ranges text (By_Index range)
Range _ _ _ ->
find_codepoint_ranges text (By_Index subrange)
type Codepoint_Ranges
## PRIVATE
A list of codepoint ranges corresponding to the matched parts of the
input.
Arguments:
- ranges: the list of ranges. Each `Range` has `step` equal to 1.
- is_sorted_and_distinct: A helper value specifying if the ranges are
already sorted and non-intersecting.
type Codepoint_Ranges (ranges : Vector Range) (is_sorted_and_distinct : Boolean)
## PRIVATE ## PRIVATE
Finds code-point indices corresponding to the part of the input matching the `Text_Sub_Range`. Returns a new sorted list of ranges where intersecting ranges have been
to_char_range : Text -> Range merged.
to_char_range self text =
## Utility function to find char indices for Text_Sub_Range. Empty subranges are not discarded.
Arguments: sorted_and_distinct_ranges : Vector Range
- text: Text to search sorted_and_distinct_ranges self = if self.is_sorted_and_distinct then self.ranges else
- predicate: Function to test each character, receives: sorted = self.ranges.filter (range-> range.is_empty.not) . sort on=(.start)
- index: current index if sorted.is_empty then [] else
- start: index the char array to start of grapheme cluster current_ref = Ref.new sorted.first
- end: index the char array to start of next grapheme cluster builder = Vector.new_builder
If the predicate returns True for a given character, the loop will exit. sorted.tail.each range->
Returns: either a Pair of char indices for current grapheme cluster or current = current_ref.get
Pair -1 (char array length) if not found. case range.start <= current.end of
find_sub_range_end = text->predicate-> True -> current_ref.put (Range current.start (Math.max current.end range.end))
iterator = BreakIterator.getCharacterInstance False ->
iterator.setText text builder.append current
current_ref.put range
builder.append current_ref.get
builder.to_vector
loop index start end = ## PRIVATE
if end == -1 then (Pair -1 start) else Utility function to find char indices for Text_Sub_Range.
if predicate index start end then (Pair start end) else Arguments:
@Tail_Call loop (index + 1) end iterator.next - text: Text to search
- predicate: Function to test each character, receives:
- index: current index
- start: index the char array to start of grapheme cluster
- end: index the char array to start of next grapheme cluster
If the predicate returns True for a given character, the loop will exit.
Returns: either a Pair of char indices for current grapheme cluster or
Pair Nothing (char array length) if not found.
find_sub_range_end = text->predicate->
iterator = BreakIterator.getCharacterInstance
iterator.setText text
loop 0 0 iterator.next loop index start end =
if end == -1 then (Pair Nothing start) else
if predicate index start end then (Pair start end) else
@Tail_Call loop (index + 1) end iterator.next
case self of loop 0 0 iterator.next
First count ->
if count <= 0 then (Range 0 0) else ## PRIVATE
iterator = BreakIterator.getCharacterInstance resolve_index_or_range text descriptor = Panic.recover [Index_Out_Of_Bounds_Error, Illegal_Argument_Error] <|
iterator.setText text iterator = BreakIterator.getCharacterInstance
start_index = iterator.next count iterator.setText text
Range 0 (if start_index == -1 then (Text_Utils.char_length text) else start_index) case descriptor of
Last count -> Integer ->
if count <= 0 then (Range 0 0) else if descriptor < 0 then
iterator = BreakIterator.getCharacterInstance iterator.last
iterator.setText text start = iterator.next descriptor
iterator.last end = iterator.next
start_index = iterator.next -count if (start == -1) || (end == -1) then Error.throw (Index_Out_Of_Bounds_Error descriptor text.length) else
Range (if start_index == -1 then 0 else start_index) (Text_Utils.char_length text) Range start end
Before delimiter -> Range _ _ _ ->
if delimiter.is_empty then (Range 0 0) else len = text.length
span = Text_Utils.span_of text delimiter true_range = normalize_range descriptor len
if span.is_nothing then (Range 0 (Text_Utils.char_length text)) else if descriptor.is_empty then Range 0 0 else
(Range 0 span.codeunit_start) case true_range.step == 1 of
Before_Last delimiter -> True -> Span_Module.range_to_char_indices text true_range
if delimiter.is_empty then (Range 0 (Text_Utils.char_length text)) else False ->
span = Text_Utils.last_span_of text delimiter ranges = Vector.new_builder
if span.is_nothing then (Range 0 (Text_Utils.char_length text)) else if true_range.step <= 0 then panic_on_non_positive_step
(Range 0 span.codeunit_start) go start_index current_grapheme =
After delimiter -> end_index = iterator.next
if delimiter.is_empty then (Range 0 (Text_Utils.char_length text)) else if (start_index == -1) || (end_index == -1) || (current_grapheme >= true_range.end) then Nothing else
span = Text_Utils.span_of text delimiter ranges.append (Range start_index end_index)
if span.is_nothing then (Range 0 0) else ## We advance by step-1, because we already advanced by
(Range span.codeunit_end (Text_Utils.char_length text)) one grapheme when looking for the end of the previous
After_Last delimiter -> one.
if delimiter.is_empty then (Range 0 0) else @Tail_Call go (iterator.next true_range.step-1) current_grapheme+true_range.step
span = Text_Utils.last_span_of text delimiter
if span.is_nothing then (Range 0 0) else go (iterator.next true_range.start) true_range.start
(Range span.codeunit_end (Text_Utils.char_length text)) Codepoint_Ranges ranges.to_vector is_sorted_and_distinct=True
While predicate ->
indices = find_sub_range_end text _-> start-> end-> ## PRIVATE
predicate (Text_Utils.substring text start end) . not Returns an array of UTF-16 code-unit indices corresponding to the beginning
if indices.first == -1 then (Range 0 indices.second) else and end of each consecutive grapheme cluster.
Range 0 indices.first
These indices are consistent with the vector returned by `Text.char_vector`.
character_ranges text =
iterator = BreakIterator.getCharacterInstance
iterator.setText text
ranges = Vector.new_builder
go prev nxt = if nxt == -1 then Nothing else
ranges.append (Range prev nxt)
@Tail_Call go nxt iterator.next
go iterator.first iterator.next
ranges.to_vector
## PRIVATE
batch_resolve_indices_or_ranges text descriptors = Panic.recover [Index_Out_Of_Bounds_Error, Illegal_Argument_Error] <|
## This is pre-computing the ranges for all characters in the string, which
may be much more than necessary, for example if all ranges reference only
the beginning of the string. In the future we may want to replace this
with a lazy data structure which advances the break iterator only on
demand, using a Vector.Builder to cache any prior ranges for random
access.
characters = character_ranges text
ranges = Vector.new_builder
descriptors.each descriptor->
case descriptor of
Integer ->
ranges.append (Panic.rethrow <| characters.at descriptor)
Range _ _ _ ->
if descriptor.is_empty then Range 0 0 else
true_range = normalize_range descriptor characters.length
case true_range.step == 1 of
True ->
first_grapheme = Panic.rethrow <| characters.at true_range.start
last_grapheme = Panic.rethrow <| characters.at true_range.end-1
ranges.append (Range first_grapheme.start last_grapheme.end)
False ->
if true_range.start >= characters.length then
Panic.throw (Index_Out_Of_Bounds_Error true_range.start characters.length)
true_range.to_vector.each ix->
ranges.append (Panic.rethrow <| characters.at ix)
Codepoint_Ranges ranges.to_vector is_sorted_and_distinct=False
## PRIVATE
panic_on_non_positive_step =
Panic.throw (Illegal_Argument_Error "Range step must be positive.")
## PRIVATE
Ensures that the range is valid and trims it to the length of the collection.
normalize_range range length =
if range.step <= 0 then panic_on_non_positive_step
# We may add support for negative indices in the future.
if (range.start < 0) || (range.end < 0) then
Panic.throw (Illegal_Argument_Error "Ranges with negative indices are not supported for indexing.")
if (range.start >= length) then
Panic.throw (Index_Out_Of_Bounds_Error range.start length)
if range.end >= length then Range range.start length range.step else
range

View File

@ -1128,22 +1128,6 @@ type Builder
Array.copy old_array 0 new_array 0 self.length Array.copy old_array 0 new_array 0 self.length
Vector new_array Vector new_array
## UNSTABLE
An error for when an index is out of bounds in a vector.
Arguments:
- index: The requested index in the vector.
- length: The length of the vector.
type Index_Out_Of_Bounds_Error index length
## UNSTABLE
Pretty prints an index out of bounds error.
Index_Out_Of_Bounds_Error.to_display_text : Text
Index_Out_Of_Bounds_Error.to_display_text self =
"The index " + self.index.to_text + " is out of bounds in a vector with length " + self.length.to_text + "."
## UNSTABLE ## UNSTABLE
An error that indicates that the vector is empty. An error that indicates that the vector is empty.

View File

@ -198,6 +198,22 @@ type Illegal_Argument_Error
handle_java_exception = handle_java_exception =
Panic.catch_java IllegalArgumentException handler=(cause-> Error.throw (Illegal_Argument_Error cause.getMessage cause)) Panic.catch_java IllegalArgumentException handler=(cause-> Error.throw (Illegal_Argument_Error cause.getMessage cause))
## UNSTABLE
An error indicating that a requested index was out of bounds of a collection.
Arguments:
- index: The requested index.
- length: The length of the collection.
type Index_Out_Of_Bounds_Error index length
## UNSTABLE
Pretty prints an index out of bounds error.
Index_Out_Of_Bounds_Error.to_display_text : Text
Index_Out_Of_Bounds_Error.to_display_text self =
"The index " + self.index.to_text + " is out of bounds in a collection of length " + self.length.to_text + "."
## PRIVATE ## PRIVATE
Wraps a dataflow error lifted to a panic, making possible to distinguish it Wraps a dataflow error lifted to a panic, making possible to distinguish it
from other panics. from other panics.

View File

@ -0,0 +1,39 @@
from Standard.Base import all
import Standard.Base.System
polyglot java import java.util.Random as Java_Random
polyglot java import org.enso.base.Random_Utils
## UNSTABLE
Returns a default seed to use for random number generation.
The returned seed may differ between each call to this method.
get_default_seed : Integer
get_default_seed = System.nano_time
## Constructs a new random number generator.
new : Integer -> Random_Number_Generator
new seed=get_default_seed =
Random_Number_Generator (Java_Random.new seed)
type Random_Number_Generator
## A random number generator.
type Random_Number_Generator java_random
## Returns a new vector containing a random sample of the input vector, without
replacement.
If the amount of elements to select is larger than the input vector size, it
returns a random permutation of the input vector.
sample : Vector Any -> Integer -> Random_Number_Generator -> Vector Any
sample vector k rng =
new_array = Random_Utils.sample vector.to_array k rng.java_random
Vector.Vector new_array
## Returns `k` indices sampled from the range [0, n-1] without replacement.
If `k >= n`, it will return a random permutation of the indices.
random_indices : Integer -> Integer -> Random_Number_Generator -> Vector Integer
random_indices n k rng =
array = Random_Utils.random_indices n k rng.java_random
Vector.Vector array

View File

@ -1279,15 +1279,6 @@ type Aggregate_Column
print : Nothing print : Nothing
print self = self.values.print print self = self.values.print
## UNSTABLE
A type representing an error for an out-of-bounds index in a column.
Arguments:
- index: The index of the element requested.
- length: The length of the column in which `index` was out of bounds.
type Index_Out_Of_Bounds_Error index length
## UNSTABLE ## UNSTABLE
Pretty-prints the index out of bounds error. Pretty-prints the index out of bounds error.

View File

@ -13,6 +13,9 @@ public abstract class ConstructorNode extends ExpressionNode {
private final AtomConstructor constructor; private final AtomConstructor constructor;
ConstructorNode(AtomConstructor constructor) { ConstructorNode(AtomConstructor constructor) {
if (constructor == null) {
throw new NullPointerException("Constructor cannot be null");
}
this.constructor = constructor; this.constructor = constructor;
} }

View File

@ -0,0 +1,38 @@
package org.enso.base;
import java.util.Arrays;
import java.util.Random;
public class Random_Utils {
/** Samples k random values from the input. */
public static Object[] sample(Object[] array, int k, Random rng) {
k = Math.min(k, array.length);
var copy = Arrays.copyOf(array, array.length);
shuffleFirstInPlace(copy, k, rng);
return Arrays.copyOf(copy, k);
}
public static Long[] random_indices(int n, int k, Random rng) {
/*
* TODO while acceptable for `k` close to `n`, for `k <<< n`, this algorithm is not efficient, a
* better one should be implemented, see: https://www.pivotaltracker.com/story/show/182853142
*/
Long[] indices = new Long[n];
for (int i = 0; i < n; ++i) {
indices[i] = (long) i;
}
k = Math.min(k, n);
shuffleFirstInPlace(indices, k, rng);
return Arrays.copyOf(indices, k);
}
private static <T> void shuffleFirstInPlace(T[] array, int k, Random rng) {
int n = array.length;
for (int i = 0; i < Math.min(k, n); ++i) {
int r = i + rng.nextInt(n - i);
T tmp = array[i];
array[i] = array[r];
array[r] = tmp;
}
}
}

View File

@ -13,8 +13,8 @@ spec = Test.group "Columns" <|
test_column.at 0 . should_equal 1 test_column.at 0 . should_equal 1
test_column.at 2 . should_equal 5 test_column.at 2 . should_equal 5
test_column.at 5 . should_equal 6 test_column.at 5 . should_equal 6
test_column.at 6 . should_fail_with Column.Index_Out_Of_Bounds_Error test_column.at 6 . should_fail_with Index_Out_Of_Bounds_Error
empty_column.at 0 . should_fail_with Column.Index_Out_Of_Bounds_Error empty_column.at 0 . should_fail_with Index_Out_Of_Bounds_Error
Test.specify "should be able to take the first n elements" <| Test.specify "should be able to take the first n elements" <|
expected_1 = Column.from_vector "Test" [1, 3, 5] expected_1 = Column.from_vector "Test" [1, 3, 5]

View File

@ -0,0 +1,38 @@
from Standard.Base import all
from Standard.Base.Data.Text.Text_Sub_Range import all
import Standard.Test
spec = Test.group "Text_Sub_Range.Codepoint_Ranges" <|
run ranges =
Codepoint_Ranges ranges False . sorted_and_distinct_ranges
Test.specify "should be able to sort correctly merge neighboring sequences" <|
run [] . should_equal []
run [Range 0 0] . should_equal []
run [Range 0 10] . should_equal [Range 0 10]
run [Range 0 10, Range 2 4] . should_equal [Range 0 10]
run [Range 0 5, Range 5 10] . should_equal [Range 0 10]
run [Range 5 10, Range 0 0, Range 0 1, Range 1 5] . should_equal [Range 0 10]
run [Range 0 1, Range 1 2] . should_equal [Range 0 2]
run [Range 6 7, Range 7 8, Range 5 5, Range 0 1, Range 2 3] . should_equal [Range 0 1, Range 2 3, Range 6 8]
run [Range 5 10, Range 3 6, Range 3 6, Range 3 5, Range 3 7, Range 0 1] . should_equal [Range 0 1, Range 3 10]
run [Range 0 1, Range 0 1] . should_equal [Range 0 1]
run [Range 0 1, Range 1 2] . should_equal [Range 0 2]
Test.specify "should correctly split a text into grapheme cluster ranges expressed in codepoint indices" <|
character_ranges "" . should_equal []
character_ranges "A" . should_equal [Range 0 1]
character_ranges "abc" . should_equal [Range 0 1, Range 1 2, Range 2 3]
character_ranges 'śs\u0301S' . should_equal [Range 0 1, Range 1 3, Range 3 4]
kshi = '\u0915\u094D\u0937\u093F'
facepalm = '\u{1F926}\u{1F3FC}\u200D\u2642\uFE0F'
accent_1 = '\u00E9'
accent_2 = '\u0065\u{301}'
character_ranges kshi . should_equal [Range 0 4]
character_ranges facepalm . should_equal [Range 0 7]
character_ranges accent_1 . should_equal [Range 0 1]
character_ranges accent_2 . should_equal [Range 0 2]
character_ranges kshi+facepalm+accent_1+accent_2 . should_equal [Range 0 4, Range 4 11, Range 11 12, Range 12 14]
main = Test.Suite.run_main spec

View File

@ -153,7 +153,9 @@ spec =
Test.specify "should return a dataflow error when accessing characters out of bounds" <| Test.specify "should return a dataflow error when accessing characters out of bounds" <|
str = kshi + facepalm + accent_1 + accent_2 str = kshi + facepalm + accent_1 + accent_2
str.at -5 . should_fail_with Index_Out_Of_Bounds_Error str.at -5 . should_fail_with Index_Out_Of_Bounds_Error
str.at -5 . catch . should_equal (Index_Out_Of_Bounds_Error -5 4)
str.at 4 . should_fail_with Index_Out_Of_Bounds_Error str.at 4 . should_fail_with Index_Out_Of_Bounds_Error
str.at 4 . catch . should_equal (Index_Out_Of_Bounds_Error 4 4)
Test.specify "should be able to split the text into words" <| Test.specify "should be able to split the text into words" <|
"I have not one, but two cats.".words . should_equal ['I', 'have', 'not', 'one', ',', 'but', 'two', 'cats', '.'] "I have not one, but two cats.".words . should_equal ['I', 'have', 'not', 'one', ',', 'but', 'two', 'cats', '.']
@ -239,12 +241,100 @@ spec =
text_2 = '\n\t\a\b\f\r\v\e\'' text_2 = '\n\t\a\b\f\r\v\e\''
text_2.to_text.should_equal "'\n\t\a\b\f\r\v\e\''" text_2.to_text.should_equal "'\n\t\a\b\f\r\v\e\''"
Test.specify "should allow taking or dropping every other character" <|
"ABCDE".take (Every 2) . should_equal "ACE"
"ABCD".take (Every 2) . should_equal "AC"
"ABCD".take (Every 2 first=1) . should_equal "BD"
"ABCDE".take (Every 2 first=1) . should_equal "BD"
"ABCDE".take (Every 3) . should_equal "AD"
"ABCDEFG".take (Every 3) . should_equal "ADG"
"ABCDEFG".take (Every 3 first=1) . should_equal "BE"
"ABCDEFG".take (Every 3 first=6) . should_equal "G"
"ABCDEFG".take (Every 10) . should_equal "A"
"ABCDE".drop (Every 2) . should_equal "BD"
"ABCD".drop (Every 2) . should_equal "BD"
"ABCD".drop (Every 2 first=1) . should_equal "AC"
"ABCDE".drop (Every 2 first=1) . should_equal "ACE"
"ABCDE".drop (Every 3) . should_equal "BCE"
"ABCDEFG".drop (Every 3) . should_equal "BCEF"
"ABCDEFG".drop (Every 3 first=1) . should_equal "ACDFG"
"ABCDEFGH".drop (Every 3 first=1) . should_equal "ACDFG"
"ABCDEFGHI".drop (Every 3 first=1) . should_equal "ACDFGI"
Test.specify "should allow selecting a random sample of a substring"
"AAAAA".take (Sample 3) . should_equal "AAA"
"AAAAA".drop (Sample 3) . should_equal "AA"
## These tests are very brittle and can be invalidated by a valid
implementation modification, so they may need to be updated.
"ABCDEFGH".take (Sample 0) . should_equal ""
"ABCDEFGH".take (Sample 8 seed=42) . should_equal "FGCHABED"
"ABCDEFGH".take (Sample 4 seed=42) . should_equal "FGCH"
"ABCDEFGH".take (Sample 2 seed=42) . should_equal "FG"
"ABCDEFGH".take (Sample 1 seed=42) . should_equal "F"
"ABCDEFGH".take (Sample 100 seed=42) . should_equal "FGCHABED"
"ABCDEFGH".drop (Sample 0) . should_equal "ABCDEFGH"
"ABCDEFGH".drop (Sample 1 seed=42) . should_equal "ABCDEGH"
"ABCDEFGH".drop (Sample 2 seed=42) . should_equal "ABCDEH"
"ABCDEFGH".drop (Sample 4 seed=42) . should_equal "ABDE"
"ABCDEFGH".drop (Sample 8 seed=42) . should_equal ""
"ABCDEFGH".drop (Sample 100 seed=42) . should_equal ""
Test.specify "should allow taking or dropping many indices or subranges (possibly overlapping)" <|
"123"*1000 . take (By_Index (Vector.new 3000 ix-> 2999-ix)) . should_equal "321"*1000
"123"*1000 . take (By_Index (Vector.new 3000 _-> 0)) . should_equal "1"*3000
"123456"*1000 . take (By_Index (Vector.new 100 ix-> Range 6*ix+1 6*ix+3)) . should_equal "23"*100
"AB"*1000 . take (By_Index (Vector.new 100 ix-> Range ix+1 ix+5)) . should_equal "BABAABAB"*50
"123"*1000 . drop (By_Index (Vector.new 300 ix-> 2999-ix)) . should_equal "123"*900
"123"*1000 . drop (By_Index (Vector.new 3000 _-> 0)) . should_equal "23"+"123"*999
"123456"*1000 . drop (By_Index (Vector.new 1000 ix-> Range 6*ix+1 6*ix+3)) . should_equal "1456"*1000
"ABCD"*25 . drop (By_Index (Vector.new 90 ix-> Range ix+1 ix+5)) . should_equal "ACDABCD"
"ABCD"*1000 . take (Range 0 4000 4) . should_equal "A"*1000
"ABCD"*1000 . take (Every 4) . should_equal "A"*1000
"ABCD"*1000 . take (By_Index [Range 0 4000 4, Range 1 4000 4]) . should_equal ("A"*1000 + "B"*1000)
"ABCD"*1000 . take (By_Index [Range 0 4000 4, Range 2 4000 4]) . should_equal ("A"*1000 + "C"*1000)
"ABCD"*1000 . drop (Range 0 4000 4) . should_equal "BCD"*1000
"ABCD"*1000 . drop (Every 4) . should_equal "BCD"*1000
"ABCD"*1000 . drop (By_Index [Range 0 4000 4, Range 1 4000 4]) . should_equal "CD"*1000
"ABCD"*1000 . drop (By_Index [Range 0 4000 4, Range 2 4000 4]) . should_equal "BD"*1000
"0123456789".take (By_Index [Range 0 4, Range 4 6, Range 8 9]) . should_equal "0123458"
"0123456789".take (By_Index [Range 4 6, Range 0 4, 0, 0]) . should_equal "45012300"
"0123456789".drop (By_Index [Range 0 4, Range 4 6, Range 8 9]) . should_equal "679"
"0123456789".drop (By_Index [Range 4 6, Range 0 4, 0, 0]) . should_equal "6789"
"0123456789".drop (By_Index [Range 2 5, Range 0 3, 0, 0]) . should_equal "56789"
Test.specify "should allow selecting substrings by characters" <| Test.specify "should allow selecting substrings by characters" <|
txt = kshi + facepalm + accent_1 + accent_2 txt = kshi + facepalm + accent_1 + accent_2
txt.take (First 2) . should_equal (kshi + facepalm) txt.take (First 2) . should_equal (kshi + facepalm)
txt.drop (First 2) . should_equal (accent_1 + accent_2) txt.drop (First 2) . should_equal (accent_1 + accent_2)
txt.take (Last 2) . should_equal (accent_1 + accent_2) txt.take (Last 2) . should_equal (accent_1 + accent_2)
txt.drop (Last 2) . should_equal (kshi + facepalm) txt.drop (Last 2) . should_equal (kshi + facepalm)
txt.take (Range 0 2) . should_equal (kshi + facepalm)
txt.take (By_Index (Range 0 2)) . should_equal (kshi + facepalm)
txt.drop (Range 0 2) . should_equal (accent_1 + accent_2)
txt.take (Range 2 4) . should_equal (accent_1 + accent_2)
txt.drop (Range 2 4) . should_equal (kshi + facepalm)
txt.take (Every 2) . should_equal (kshi + accent_1)
txt.take (Every 2 first=1) . should_equal (facepalm + accent_2)
txt.drop (Every 2) . should_equal (facepalm + accent_2)
txt.take (Range 0 4 2) . should_equal (kshi + accent_1)
txt.take (By_Index [0, 3]) . should_equal (kshi + accent_2)
txt.take (By_Index 0) . should_equal kshi
txt.take (By_Index 1) . should_equal facepalm
txt.take (By_Index 2) . should_equal accent_1
txt.take (By_Index 3) . should_equal accent_2
txt.drop (By_Index [0, 3]) . should_equal (facepalm + accent_1)
txt.drop (By_Index [0, 3, 0]) . should_equal (facepalm + accent_1)
txt.drop (By_Index [0, 3, 0, 2, 1]) . should_equal ""
txt.take (By_Index [0, 3, 0, 2, 1]) . should_equal (kshi + accent_2 + kshi + accent_1 + facepalm)
txt.take (By_Index [0, 0, Range 0 2]) . should_equal (kshi + kshi + kshi + facepalm)
txt.drop (By_Index [Range 2 4, Range 0 2]) . should_equal ""
Test.specify "take should work as in the examples" <| Test.specify "take should work as in the examples" <|
"Hello World!".take First . should_equal "H" "Hello World!".take First . should_equal "H"
@ -266,47 +356,70 @@ spec =
"Hello World!".take (While c->c!=" ") . should_equal "Hello" "Hello World!".take (While c->c!=" ") . should_equal "Hello"
"Hello World!".take (While c->c!="z") . should_equal "Hello World!" "Hello World!".take (While c->c!="z") . should_equal "Hello World!"
"Hello World!".take (Range 3 5) . should_equal "lo" "Hello World!".take (Range 3 5) . should_equal "lo"
"Hello World!".take (Range -3 -1) . should_equal "ld"
"Hello World!".take (Range -3 Nothing) . should_equal "ld!"
"Hello World!".take (Range 5 Nothing) . should_equal " World!"
"Hello World!".take (Range 5 12) . should_equal " World!" "Hello World!".take (Range 5 12) . should_equal " World!"
"Hello World!".take (Range 12 12) . should_equal "" "Hello World!".take (Range 6 12 2) . should_equal "Wrd"
"Hello World!".take (Every 2 first=6) . should_equal "Wrd"
"Hello World!".take (Every 3) . should_equal "HlWl"
"Hello World!".take (By_Index 0) . should_equal "H"
"Hello World!".take (By_Index [1, 0, 0, 6, 0]) . should_equal "eHHWH"
"Hello World!".take (By_Index [Range 0 3, 6, Range 6 12 2]) . should_equal "HelWWrd"
"Hello World!".take (Sample 3 seed=42) . should_equal "l d"
Test.specify "take should report errors for invalid Ranges" <| Test.specify "take should report errors for start indices out of bounds but just go till the end if the end index is OOB" <|
"Hello World!".take (Range 0 14) . should_fail_with Index_Out_Of_Bounds_Error txt = "Hello World!"
"Hello World!".take (Range 13 12) . should_fail_with Index_Out_Of_Bounds_Error txt.take (Range 0 14) . should_equal txt
"Hello World!".take (Range -13 10) . should_fail_with Index_Out_Of_Bounds_Error txt.take (Range 6 100) . should_equal "World!"
"Hello World!".take (Range 0 -20) . should_fail_with Index_Out_Of_Bounds_Error txt.take (Range txt.length-1 txt.length) . should_equal "!"
"Hello World!".take (Range 0 10 2) . should_fail_with Illegal_Argument_Error txt.take (Range txt.length txt.length) . should_fail_with Index_Out_Of_Bounds_Error
"Hello World!".take (Range 0 10 -1) . should_fail_with Illegal_Argument_Error txt.take (Range txt.length txt.length) . catch . should_equal (Index_Out_Of_Bounds_Error txt.length txt.length)
txt.take (Range txt.length 100) . should_fail_with Index_Out_Of_Bounds_Error
txt.take (First 100) . should_equal txt
txt.take (Last 100) . should_equal txt
txt.take (By_Index 100) . should_fail_with Index_Out_Of_Bounds_Error
txt.take (By_Index 13) . should_fail_with Index_Out_Of_Bounds_Error
txt.take (By_Index [0, 1, 13]) . should_fail_with Index_Out_Of_Bounds_Error
txt.take (By_Index [0, Range 14 15, 1]) . should_fail_with Index_Out_Of_Bounds_Error
txt.take (By_Index [0, 1, Range 6 100]) . should_equal "HeWorld!"
txt.take (By_Index [0, 1, Range 6 100 2]) . should_equal "HeWrd"
txt.take (Range 13 12) . should_fail_with Index_Out_Of_Bounds_Error
"".take (Range 0 0) . should_fail_with Index_Out_Of_Bounds_Error
"".take (Range 0 0) . catch . should_equal (Index_Out_Of_Bounds_Error 0 0)
"".take (By_Index 0) . should_fail_with Index_Out_Of_Bounds_Error
"ABC".take (By_Index 3) . should_fail_with Index_Out_Of_Bounds_Error
txt.take (Range 13 20) . should_fail_with Index_Out_Of_Bounds_Error
txt.take (Range 13 20 2) . should_fail_with Index_Out_Of_Bounds_Error
txt.take (By_Index [Range 0 2, Range 13 20]) . should_fail_with Index_Out_Of_Bounds_Error
txt.take (By_Index [Range 0 0, Range 13 10, Range 2 2 2]) . should_equal ""
txt.take (By_Index [Range 0 2 2, Range 13 20 2]) . should_fail_with Index_Out_Of_Bounds_Error
txt.take (By_Index [Range 0 2 2, Range 13 20 2]) . catch . should_equal (Index_Out_Of_Bounds_Error 13 12)
txt.take (By_Index [Range 0 2 2, Range txt.length 100 2]) . should_fail_with Index_Out_Of_Bounds_Error
"".take (By_Index 0) . should_fail_with Index_Out_Of_Bounds_Error
Test.specify "take should work on grapheme clusters" <| Test.specify "take should work on grapheme clusters" <|
'He\u{302}llo\u{308} Wo\u{301}rld!'.take (First 2) . should_equal 'He\u{302}' txt_1 = 'He\u0302llo\u0308 Wo\u0301rld!'
'He\u{302}llo\u{308} Wo\u{301}rld!'.take (First 5) . should_equal 'He\u{302}llo\u{308}' txt_2 = 'He\u0302llo\u0308 Wo\u0308rld!'
'He\u{302}llo\u{308} Wo\u{301}rld!'.take (Last 6) . should_equal 'Wo\u{301}rld!' txt_1.take (First 2) . should_equal 'He\u{302}'
'He\u{302}llo\u{308} Wo\u{301}rld!'.take (Last 5) . should_equal 'o\u{301}rld!' txt_1.take (First 5) . should_equal 'He\u{302}llo\u{308}'
'He\u{302}llo\u{308} Wo\u{301}rld!'.take (Before 'e\u{302}') . should_equal 'H' txt_1.take (Last 6) . should_equal 'Wo\u{301}rld!'
'He\u{302}llo\u{308} Wo\u{301}rld!'.take (Before 'ê') . should_equal 'H' txt_1.take (Last 5) . should_equal 'o\u{301}rld!'
'He\u{302}llo\u{308} Wo\u{301}rld!'.take (Before 'e') . should_equal 'He\u{302}llo\u{308} Wo\u{301}rld!' txt_1.take (Before 'e\u{302}') . should_equal 'H'
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Before_Last 'o\u{308}') . should_equal 'He\u{302}llo\u{308} W' txt_1.take (Before 'ê') . should_equal 'H'
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Before_Last 'ö') . should_equal 'He\u{302}llo\u{308} W' txt_1.take (Before 'e') . should_equal txt_1
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Before_Last 'o') . should_equal 'He\u{302}llo\u{308} Wo\u{308}rld!' txt_2.take (Before_Last 'o\u{308}') . should_equal 'He\u{302}llo\u{308} W'
'He\u{302}llo\u{308} Wo\u{301}rld!'.take (After 'e\u{302}') . should_equal 'llo\u{308} Wo\u{301}rld!' txt_2.take (Before_Last 'ö') . should_equal 'He\u{302}llo\u{308} W'
'He\u{302}llo\u{308} Wo\u{301}rld!'.take (After 'ê') . should_equal 'llo\u{308} Wo\u{301}rld!' txt_2.take (Before_Last 'o') . should_equal txt_2
'He\u{302}llo\u{308} Wo\u{301}rld!'.take (After 'e\u{308}') . should_equal '' txt_1.take (After 'e\u{302}') . should_equal 'llo\u{308} Wo\u{301}rld!'
'He\u{302}llo\u{308} Wo\u{301}rld!'.take (After 'e') . should_equal '' txt_1.take (After 'ê') . should_equal 'llo\u{308} Wo\u{301}rld!'
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (After_Last 'o\u{308}') . should_equal 'rld!' txt_1.take (After 'e\u{308}') . should_equal ''
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (After_Last 'ö') . should_equal 'rld!' txt_1.take (After 'e') . should_equal ''
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (After_Last 'o') . should_equal '' txt_2.take (After_Last 'o\u{308}') . should_equal 'rld!'
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (While c->c!='e\u{302}') . should_equal 'H' txt_2.take (After_Last 'ö') . should_equal 'rld!'
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (While c->c!='ê') . should_equal 'H' txt_2.take (After_Last 'o') . should_equal ''
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (While c->c!='e') . should_equal 'He\u{302}llo\u{308} Wo\u{308}rld!' txt_2.take (While c->c!='e\u{302}') . should_equal 'H'
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Range 3 5) . should_equal 'lo\u{308}' txt_2.take (While c->c!='ê') . should_equal 'H'
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Range -3 -1) . should_equal 'ld' txt_2.take (While c->c!='e') . should_equal txt_2
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Range -3 Nothing) . should_equal 'ld!' txt_2.take (Range 3 5) . should_equal 'lo\u{308}'
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Range 5 Nothing) . should_equal ' Wo\u{308}rld!' txt_2.take (Range 5 12) . should_equal ' Wo\u{308}rld!'
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Range 5 12) . should_equal ' Wo\u{308}rld!'
'He\u{302}llo\u{308} Wo\u{308}rld!'.take (Range 12 12) . should_equal ''
Test.specify "take should work on emojis" <| Test.specify "take should work on emojis" <|
'✨🚀🚧😍😃😎😙😉☺'.take First . should_equal '✨' '✨🚀🚧😍😃😎😙😉☺'.take First . should_equal '✨'
@ -320,11 +433,10 @@ spec =
'✨🚀🚧😍😃😍😎😙😉☺'.take (After_Last '😍') . should_equal '😎😙😉☺' '✨🚀🚧😍😃😍😎😙😉☺'.take (After_Last '😍') . should_equal '😎😙😉☺'
'✨🚀🚧😍😃😍😎😙😉☺'.take (While c->c!="😃") . should_equal '✨🚀🚧😍' '✨🚀🚧😍😃😍😎😙😉☺'.take (While c->c!="😃") . should_equal '✨🚀🚧😍'
'✨🚀🚧😍😃😍😎😙😉☺'.take (Range 3 6) . should_equal '😍😃😍' '✨🚀🚧😍😃😍😎😙😉☺'.take (Range 3 6) . should_equal '😍😃😍'
'✨🚀🚧😍😃😍😎😙😉☺'.take (Range 3 Nothing) . should_equal '😍😃😍😎😙😉☺'
'✨🚀🚧😍😃😍😎😙😉☺'.take (Range -3 Nothing) . should_equal '😙😉☺'
'✨🚀🚧😍😃😍😎😙😉☺'.take (Range -3 -1) . should_equal '😙😉'
Test.specify "take should correctly handle edge cases" <| Test.specify "take should correctly handle edge cases" <|
"ABC".take . should_equal "A"
"".take First . should_equal "" "".take First . should_equal ""
"".take Last . should_equal "" "".take Last . should_equal ""
@ -340,7 +452,6 @@ spec =
"".take (While _->True) . should_equal "" "".take (While _->True) . should_equal ""
"".take (Range 0 0) . should_equal ""
'ABC\u{301}'.take (Range 0 0) . should_equal "" 'ABC\u{301}'.take (Range 0 0) . should_equal ""
'ABC\u{301}'.take (After "") . should_equal 'ABC\u{301}' 'ABC\u{301}'.take (After "") . should_equal 'ABC\u{301}'
@ -348,6 +459,18 @@ spec =
'ABC\u{301}'.take (Before "") . should_equal "" 'ABC\u{301}'.take (Before "") . should_equal ""
'ABC\u{301}'.take (Before_Last "") . should_equal 'ABC\u{301}' 'ABC\u{301}'.take (Before_Last "") . should_equal 'ABC\u{301}'
"ABC".take (By_Index -1) . should_equal "C"
"ABC".take (By_Index [-1, -1, -1, -3, 2]) . should_equal "CCCAC"
"ABC".take (By_Index []) . should_equal ""
"ABC".take (By_Index (Range -2 -1)) . should_fail_with Illegal_Argument_Error
"".take (Every 2) . should_equal ""
"".take (Every 2 first=1) . should_equal ""
"ABC".take (Every 5) . should_equal "A"
"A".take (Every 5) . should_equal "A"
"ABC".take (Every 5 first=4) . should_equal ""
"".take (Sample 0) . should_equal ""
"".take (Sample 100) . should_equal ""
Test.specify "drop should work as in the examples" <| Test.specify "drop should work as in the examples" <|
"Hello World!".drop First . should_equal "ello World!" "Hello World!".drop First . should_equal "ello World!"
"Hello World!".drop (First 5) . should_equal " World!" "Hello World!".drop (First 5) . should_equal " World!"
@ -367,47 +490,61 @@ spec =
"Hello World!".drop (While c->c!=" ") . should_equal " World!" "Hello World!".drop (While c->c!=" ") . should_equal " World!"
"Hello World!".drop (While c->c!="z") . should_equal "" "Hello World!".drop (While c->c!="z") . should_equal ""
"Hello World!".drop (Range 3 5) . should_equal "Hel World!" "Hello World!".drop (Range 3 5) . should_equal "Hel World!"
"Hello World!".drop (Range -3 -1) . should_equal "Hello Wor!"
"Hello World!".drop (Range -3 Nothing) . should_equal "Hello Wor"
"Hello World!".drop (Range 5 Nothing) . should_equal "Hello"
"Hello World!".drop (Range 5 12) . should_equal "Hello" "Hello World!".drop (Range 5 12) . should_equal "Hello"
"Hello World!".drop (Range 12 12) . should_equal "Hello World!" "Hello World!".drop (Range 6 12 2) . should_equal "Hello ol!"
"Hello World!".drop (Every 2 first=6) . should_equal "Hello ol!"
"Hello World!".drop (Every 3) . should_equal "elo ord!"
"Hello World!".drop (By_Index 0) . should_equal "ello World!"
"Hello World!".drop (By_Index [1, 0, 0, 6, 0]) . should_equal "llo orld!"
"Hello World!".drop (By_Index [Range 0 3, 6, Range 6 12 2]) . should_equal "lo ol!"
"Hello World!".drop (Sample 3 seed=42) . should_equal "HeloWorl!"
Test.specify "drop should report errors for invalid Ranges" <| Test.specify "drop should report errors for start indices out of bounds but just go till the end if the end index is OOB" <|
"Hello World!".drop (Range 0 14) . should_fail_with Index_Out_Of_Bounds_Error txt = "Hello World!"
"Hello World!".drop (Range 13 12) . should_fail_with Index_Out_Of_Bounds_Error txt.drop (Range 0 14) . should_equal ""
"Hello World!".drop (Range -13 10) . should_fail_with Index_Out_Of_Bounds_Error txt.drop (First 100) . should_equal ""
"Hello World!".drop (Range 0 -20) . should_fail_with Index_Out_Of_Bounds_Error txt.drop (Last 100) . should_equal ""
"Hello World!".drop (Range 0 10 2) . should_fail_with Illegal_Argument_Error txt.drop (By_Index 100) . should_fail_with Index_Out_Of_Bounds_Error
"Hello World!".drop (Range 0 10 -1) . should_fail_with Illegal_Argument_Error txt.drop (By_Index 100) . catch . should_equal (Index_Out_Of_Bounds_Error 100 12)
txt.drop (By_Index 13) . should_fail_with Index_Out_Of_Bounds_Error
txt.drop (By_Index [0, 1, 13]) . should_fail_with Index_Out_Of_Bounds_Error
txt.drop (By_Index [0, Range 14 15, 1]) . should_fail_with Index_Out_Of_Bounds_Error
txt.drop (By_Index [0, 1, Range 6 100]) . should_equal "llo "
txt.drop (Range 13 12) . should_fail_with Index_Out_Of_Bounds_Error
txt.drop (Range 14 15) . should_fail_with Index_Out_Of_Bounds_Error
"".drop (By_Index 0) . should_fail_with Index_Out_Of_Bounds_Error
"".drop (Range 0 0) . should_fail_with Index_Out_Of_Bounds_Error
"".drop (Range 0 0) . catch . should_equal (Index_Out_Of_Bounds_Error 0 0)
txt.drop (Range 0 0) . should_equal txt
txt.drop (Range 5 100) . should_equal "Hello"
txt.drop (Range 5 100 2) . should_equal "HelloWrd"
txt.drop (By_Index [0, 1, 0, Range 5 100 2]) . should_equal "lloWrd"
Test.specify "drop should work on grapheme clusters" <| Test.specify "drop should work on grapheme clusters" <|
'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (First 2) . should_equal 'llo\u{308} Wo\u{301}rld!' txt_1 = 'He\u0302llo\u0308 Wo\u0301rld!'
'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (First 5) . should_equal ' Wo\u{301}rld!' txt_2 = 'He\u0302llo\u0308 Wo\u0308rld!'
'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (Last 6) . should_equal 'He\u{302}llo\u{308} ' txt_1.drop (First 2) . should_equal 'llo\u{308} Wo\u{301}rld!'
'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (Last 5) . should_equal 'He\u{302}llo\u{308} W' txt_1.drop (First 5) . should_equal ' Wo\u{301}rld!'
'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (Before 'e\u{302}') . should_equal 'e\u{302}llo\u{308} Wo\u{301}rld!' txt_1.drop (Last 6) . should_equal 'He\u{302}llo\u{308} '
'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (Before 'ê') . should_equal 'e\u{302}llo\u{308} Wo\u{301}rld!' txt_1.drop (Last 5) . should_equal 'He\u{302}llo\u{308} W'
'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (Before 'e') . should_equal '' txt_1.drop (Before 'e\u{302}') . should_equal 'e\u{302}llo\u{308} Wo\u{301}rld!'
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Before_Last 'o\u{308}') . should_equal 'o\u{308}rld!' txt_1.drop (Before 'ê') . should_equal 'e\u{302}llo\u{308} Wo\u{301}rld!'
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Before_Last 'ö') . should_equal 'o\u{308}rld!' txt_1.drop (Before 'e') . should_equal ''
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Before_Last 'o') . should_equal '' txt_2.drop (Before_Last 'o\u{308}') . should_equal 'o\u{308}rld!'
'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (After 'e\u{302}') . should_equal 'He\u{302}' txt_2.drop (Before_Last 'ö') . should_equal 'o\u{308}rld!'
'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (After 'ê') . should_equal 'He\u{302}' txt_2.drop (Before_Last 'o') . should_equal ''
'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (After 'e\u{308}') . should_equal 'He\u{302}llo\u{308} Wo\u{301}rld!' txt_1.drop (After 'e\u{302}') . should_equal 'He\u{302}'
'He\u{302}llo\u{308} Wo\u{301}rld!'.drop (After 'e') . should_equal 'He\u{302}llo\u{308} Wo\u{301}rld!' txt_1.drop (After 'ê') . should_equal 'He\u{302}'
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (After_Last 'o\u{308}') . should_equal 'He\u{302}llo\u{308} Wo\u{308}' txt_1.drop (After 'e\u{308}') . should_equal txt_1
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (After_Last 'ö') . should_equal 'He\u{302}llo\u{308} Wo\u{308}' txt_1.drop (After 'e') . should_equal txt_1
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (After_Last 'o') . should_equal 'He\u{302}llo\u{308} Wo\u{308}rld!' txt_2.drop (After_Last 'o\u{308}') . should_equal 'He\u{302}llo\u{308} Wo\u{308}'
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (While c->c!='e\u{302}') . should_equal 'e\u{302}llo\u{308} Wo\u{308}rld!' txt_2.drop (After_Last 'ö') . should_equal 'He\u{302}llo\u{308} Wo\u{308}'
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (While c->c!='ê') . should_equal 'e\u{302}llo\u{308} Wo\u{308}rld!' txt_2.drop (After_Last 'o') . should_equal txt_2
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (While c->c!='e') . should_equal '' txt_2.drop (While c->c!='e\u{302}') . should_equal 'e\u{302}llo\u{308} Wo\u{308}rld!'
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Range 3 5) . should_equal 'He\u{302}l Wo\u{308}rld!' txt_2.drop (While c->c!='ê') . should_equal 'e\u{302}llo\u{308} Wo\u{308}rld!'
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Range -3 -1) . should_equal 'He\u{302}llo\u{308} Wo\u{308}r!' txt_2.drop (While c->c!='e') . should_equal ''
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Range -3 Nothing) . should_equal 'He\u{302}llo\u{308} Wo\u{308}r' txt_2.drop (Range 3 5) . should_equal 'He\u{302}l Wo\u{308}rld!'
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Range 5 Nothing) . should_equal 'He\u{302}llo\u{308}' txt_2.drop (Range 5 12) . should_equal 'He\u{302}llo\u{308}'
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Range 5 12) . should_equal 'He\u{302}llo\u{308}'
'He\u{302}llo\u{308} Wo\u{308}rld!'.drop (Range 12 12) . should_equal 'He\u{302}llo\u{308} Wo\u{308}rld!'
Test.specify "drop should work on emojis" <| Test.specify "drop should work on emojis" <|
'✨🚀🚧😍😃😎😙😉☺'.drop First . should_equal '🚀🚧😍😃😎😙😉☺' '✨🚀🚧😍😃😎😙😉☺'.drop First . should_equal '🚀🚧😍😃😎😙😉☺'
@ -420,11 +557,10 @@ spec =
'✨🚀🚧😍😃😍😎😙😉☺'.drop (After_Last '😍') . should_equal '✨🚀🚧😍😃😍' '✨🚀🚧😍😃😍😎😙😉☺'.drop (After_Last '😍') . should_equal '✨🚀🚧😍😃😍'
'✨🚀🚧😍😃😍😎😙😉☺'.drop (While c->c!="😃") . should_equal '😃😍😎😙😉☺' '✨🚀🚧😍😃😍😎😙😉☺'.drop (While c->c!="😃") . should_equal '😃😍😎😙😉☺'
'✨🚀🚧😍😃😍😎😙😉☺'.drop (Range 3 6) . should_equal '✨🚀🚧😎😙😉☺' '✨🚀🚧😍😃😍😎😙😉☺'.drop (Range 3 6) . should_equal '✨🚀🚧😎😙😉☺'
'✨🚀🚧😍😃😍😎😙😉☺'.drop (Range 3 Nothing) . should_equal '✨🚀🚧'
'✨🚀🚧😍😃😍😎😙😉☺'.drop (Range -3 Nothing) . should_equal '✨🚀🚧😍😃😍😎'
'✨🚀🚧😍😃😍😎😙😉☺'.drop (Range -3 -1) . should_equal '✨🚀🚧😍😃😍😎☺'
Test.specify "drop should correctly handle edge cases" <| Test.specify "drop should correctly handle edge cases" <|
"ABC".drop . should_equal "BC"
"".drop First . should_equal "" "".drop First . should_equal ""
"".drop Last . should_equal "" "".drop Last . should_equal ""
@ -440,7 +576,7 @@ spec =
"".drop (While _->True) . should_equal "" "".drop (While _->True) . should_equal ""
"".drop (Range 0 0) . should_equal "" "".drop (Range 0 0) . should_fail_with Index_Out_Of_Bounds_Error
'ABC\u{301}'.drop (Range 0 0) . should_equal 'ABC\u{301}' 'ABC\u{301}'.drop (Range 0 0) . should_equal 'ABC\u{301}'
'ABC\u{301}'.drop (After "") . should_equal '' 'ABC\u{301}'.drop (After "") . should_equal ''
@ -448,6 +584,16 @@ spec =
'ABC\u{301}'.drop (Before "") . should_equal 'ABC\u{301}' 'ABC\u{301}'.drop (Before "") . should_equal 'ABC\u{301}'
'ABC\u{301}'.drop (Before_Last "") . should_equal '' 'ABC\u{301}'.drop (Before_Last "") . should_equal ''
"ABC".drop (By_Index -1) . should_equal "AB"
"ABC".drop (By_Index [-1, -1, -1, -3, 2]) . should_equal "B"
"ABC".drop (By_Index []) . should_equal "ABC"
"".drop (Every 2) . should_equal ""
"".drop (Every 2 first=1) . should_equal ""
"ABC".drop (Every 5) . should_equal "BC"
"ABC".drop (Every 5 first=4) . should_equal "ABC"
"".drop (Sample 0) . should_equal ""
"".drop (Sample 100) . should_equal ""
Test.specify "should correctly convert character case" <| Test.specify "should correctly convert character case" <|
"FooBar Baz".to_case Case.Lower . should_equal "foobar baz" "FooBar Baz".to_case Case.Lower . should_equal "foobar baz"
"FooBar Baz".to_case Case.Upper . should_equal "FOOBAR BAZ" "FooBar Baz".to_case Case.Upper . should_equal "FOOBAR BAZ"

View File

@ -1,5 +1,6 @@
from Standard.Base import all from Standard.Base import all
import Standard.Base.Data.Text.Text_Sub_Range
import Standard.Base.Data.Time import Standard.Base.Data.Time
import Standard.Base.Data.Time.Duration import Standard.Base.Data.Time.Duration
import Standard.Base.Data.Time.Time_Of_Day import Standard.Base.Data.Time.Time_Of_Day
@ -155,7 +156,7 @@ js_array_date year month=1 day=1 =
arr.at(0) arr.at(0)
java_date year month=1 day=1 = java_date year month=1 day=1 =
Panic.catch Any (LocalDate.of year month day) (err -> Error.throw (Time.Time_Error <| err.payload.to_display_text.take (Range 16 Nothing))) Panic.catch Any (LocalDate.of year month day) (err -> Error.throw (Time.Time_Error <| err.payload.to_display_text.drop (Text_Sub_Range.First 16)))
foreign js js_date_impl year month=1 day=1 = """ foreign js js_date_impl year month=1 day=1 = """
if (month > 12) { if (month > 12) {

View File

@ -55,8 +55,8 @@ spec = Test.group "Vectors" <|
[1,2,3].at -3 . should_equal 1 [1,2,3].at -3 . should_equal 1
Test.specify "should return a dataflow error when accessing elements out of bounds" <| Test.specify "should return a dataflow error when accessing elements out of bounds" <|
[1,2,3].at -4 . should_fail_with Vector.Index_Out_Of_Bounds_Error [1,2,3].at -4 . should_fail_with Index_Out_Of_Bounds_Error
[1,2,3].at 3 . should_fail_with Vector.Index_Out_Of_Bounds_Error [1,2,3].at 3 . should_fail_with Index_Out_Of_Bounds_Error
Test.specify "should have a well-defined length" <| Test.specify "should have a well-defined length" <|
[1,2,3].length . should_equal 3 [1,2,3].length . should_equal 3

View File

@ -36,17 +36,19 @@ import project.Data.Ordering.Natural_Order_Spec
import project.Data.Ordering.Vector_Lexicographic_Order_Spec import project.Data.Ordering.Vector_Lexicographic_Order_Spec
import project.Data.Range_Spec import project.Data.Range_Spec
import project.Data.Ref_Spec import project.Data.Ref_Spec
import project.Data.Text_Spec
import project.Data.Time.Spec as Time_Spec import project.Data.Time.Spec as Time_Spec
import project.Data.Vector_Spec import project.Data.Vector_Spec
import project.Data.Statistics_Spec import project.Data.Statistics_Spec
import project.Data.Regression_Spec import project.Data.Regression_Spec
import project.Data.Text.Regex_Spec
import project.Data.Text.Utils_Spec import project.Data.Text_Spec
import project.Data.Text.Codepoint_Ranges_Spec
import project.Data.Text.Default_Regex_Engine_Spec import project.Data.Text.Default_Regex_Engine_Spec
import project.Data.Text.Matching_Spec
import project.Data.Text.Span_Spec
import project.Data.Text.Encoding_Spec import project.Data.Text.Encoding_Spec
import project.Data.Text.Matching_Spec
import project.Data.Text.Regex_Spec
import project.Data.Text.Span_Spec
import project.Data.Text.Utils_Spec
import project.Network.Http.Header_Spec as Http_Header_Spec import project.Network.Http.Header_Spec as Http_Header_Spec
import project.Network.Http.Request_Spec as Http_Request_Spec import project.Network.Http.Request_Spec as Http_Request_Spec
@ -65,6 +67,8 @@ import project.System.Reporting_Stream_Decoder_Spec
import project.System.Reporting_Stream_Encoder_Spec import project.System.Reporting_Stream_Encoder_Spec
import project.System.System_Spec import project.System.System_Spec
import project.Random_Spec
main = Test.Suite.run_main <| main = Test.Suite.run_main <|
Any_Spec.spec Any_Spec.spec
Array_Spec.spec Array_Spec.spec
@ -110,6 +114,7 @@ main = Test.Suite.run_main <|
Runtime_Spec.spec Runtime_Spec.spec
Span_Spec.spec Span_Spec.spec
Encoding_Spec.spec Encoding_Spec.spec
Codepoint_Ranges_Spec.spec
Bracket_Spec.spec Bracket_Spec.spec
Lazy_Generator_Spec.spec Lazy_Generator_Spec.spec
Stack_Traces_Spec.spec Stack_Traces_Spec.spec
@ -122,3 +127,4 @@ main = Test.Suite.run_main <|
Regression_Spec.spec Regression_Spec.spec
Warnings_Spec.spec Warnings_Spec.spec
System_Spec.spec System_Spec.spec
Random_Spec.spec

View File

@ -0,0 +1,44 @@
from Standard.Base import all
import Standard.Base.Random
import Standard.Test
spec = Test.group "Random" <|
Test.specify "should allow to generate random indices" <|
rng = Random.new 0
two_out_of_three = 0.up_to 100 . map _->
Random.random_indices 3 2 rng
permutations = 0.up_to 100 . map _->
Random.random_indices 3 3 rng
permutations_2 = 0.up_to 100 . map _->
Random.random_indices 3 100 rng
two_out_of_three . should_contain_the_same_elements_as [[0, 1], [0, 2], [1, 2], [1, 0], [2, 0], [2, 1]]
all_permutations = [[0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 0, 1], [2, 1, 0]]
permutations . should_contain_the_same_elements_as all_permutations
permutations_2 . should_contain_the_same_elements_as all_permutations
Random.random_indices 0 0 rng . should_equal []
Random.random_indices 0 100 rng . should_equal []
Random.random_indices 1 1 rng . should_equal [0]
Random.random_indices 1 100 rng . should_equal [0]
Random.random_indices 100 0 rng . should_equal []
Test.specify "should allow to select a random sample from a vector" <|
rng = Random.new 0
vector = ["A", "B", "C"]
shuffles = 0.up_to 100 . map _->
Random.sample vector 2 rng
shuffles . should_contain_the_same_elements_as [["A", "B"], ["A", "C"], ["B", "A"], ["B", "C"], ["C", "A"], ["C", "B"]]
overflow = Random.sample vector 100 rng
overflow.length . should_equal 3
overflow.should_contain_the_same_elements_as vector
Random.sample ["A", "A", "A"] 2 rng . should_equal ["A", "A"]
Random.sample ["A", "A", "A"] 0 rng . should_equal []
Random.sample ["A", "A", "A"] 3 rng . should_equal ["A", "A", "A"]
Random.sample ["A", "A", "A"] 100 rng . should_equal ["A", "A", "A"]
main = Test.Suite.run_main spec