Add Vector.take and Vector.drop functions (#3629)

Implements https://www.pivotaltracker.com/story/show/182307048
This commit is contained in:
Radosław Waśko 2022-08-10 18:02:02 +02:00 committed by GitHub
parent 1083a2532e
commit 3dca738cf7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 449 additions and 173 deletions

View File

@ -171,6 +171,8 @@
type from `Standard.Table`.][3601]
- [Created `Index_Sub_Range` type and updated `Text.take` and
`Text.drop`.][3617]
- [Updated `Vector.take` and `Vector.drop` and removed their obsolete
counterparts.][3629]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -270,6 +272,7 @@
[3593]: https://github.com/enso-org/enso/pull/3593
[3601]: https://github.com/enso-org/enso/pull/3601
[3617]: https://github.com/enso-org/enso/pull/3617
[3629]: https://github.com/enso-org/enso/pull/3629
#### Enso Compiler

View File

@ -127,6 +127,30 @@ type Range
if self.step < 0 then go (<=) self.start else
throw_zero_step_error
## PRIVATE
Applies a function to each element of the range.
Essentially acts like `range.to_vector.each_with_index`, but it is more
efficient.
Arguments:
- function: A function to apply that takes two parameters: first the
index of a given range element and then the actual range element.
> Example
Print range elements with their indices within the range.
(10.up_to 13).each_with_index ix-> elem-> IO.println (Pair ix elem) # Will print Pair 0 10, Pair 1 11, Pair 2 12
each_with_index : (Integer -> Any -> Any) -> Nothing
each_with_index self function =
go end_condition ix current =
if end_condition current self.end then Nothing else
function ix current
@Tail_Call go end_condition ix+1 current+self.step
if self.step > 0 then go (>=) 0 self.start else
if self.step < 0 then go (<=) 0 self.start else
throw_zero_step_error
## Combines all the elements of the range, by iteratively applying the
passed function with next elements of the range.

View File

@ -1099,10 +1099,7 @@ Text.take self range=(First 1) =
Range start end _ ->
Text_Utils.substring self start end
Text_Sub_Range.Codepoint_Ranges char_ranges _ ->
sb = StringBuilder.new
char_ranges.map char_range->
sb.append self char_range.start char_range.end
sb.toString
slice_text self char_ranges
## ALIAS skip, remove
Creates a new Text by removing the specified range of the input.
@ -1152,12 +1149,9 @@ Text.drop self range=(First 1) =
prefix + Text_Utils.drop_first self end
Text_Sub_Range.Codepoint_Ranges _ _ ->
sorted_char_ranges_to_remove = ranges.sorted_and_distinct_ranges
len = Text_Utils.char_length self
sb = StringBuilder.new
ranges_with_sentinels = [Range 0 0] + sorted_char_ranges_to_remove + [Range len len]
ranges_with_sentinels.zip ranges_with_sentinels.tail prev-> next->
sb.append self prev.end next.start
sb.toString
char_length = Text_Utils.char_length self
inverted = Vector.invert_range_selection sorted_char_ranges_to_remove char_length needs_sorting=False
slice_text self inverted
## ALIAS lower, upper, title, proper
Converts each character in `self` to the specified case.
@ -1504,3 +1498,15 @@ Text.location_of_all self term="" matcher=Text_Matcher = case matcher of
case matcher.compile term . match self Mode.All of
Nothing -> []
matches -> matches.map m-> m.span 0 . to_grapheme_span
## PRIVATE
Returns a new Text constructed by slicing the input according to the provided
ranges. The ranges are assumed to have step equal to 1 and bounds within the
input's range.
The input ranges are in UTF-16 code unit space.
slice_text text char_ranges =
sb = StringBuilder.new
char_ranges.map char_range->
sb.append text char_range.start char_range.end
sb.toString

View File

@ -1,5 +1,4 @@
from Standard.Base import all
import Standard.Base.Runtime.Ref
from Standard.Base.Data.Text.Extensions import Index_Out_Of_Bounds_Error
from Standard.Base.Data.Text.Span as Span_Module import Span
from Standard.Base.Data.Index_Sub_Range import First, Last, While, By_Index, Sample, Every
@ -118,19 +117,7 @@ type Codepoint_Ranges
Empty subranges are not discarded.
sorted_and_distinct_ranges : Vector Range
sorted_and_distinct_ranges self = if self.is_sorted_and_distinct then self.ranges else
sorted = self.ranges.filter (range-> range.is_empty.not) . sort on=(.start)
if sorted.is_empty then [] else
current_ref = Ref.new sorted.first
builder = Vector.new_builder
sorted.tail.each range->
current = current_ref.get
case range.start <= current.end of
True -> current_ref.put (Range current.start (Math.max current.end range.end))
False ->
builder.append current
current_ref.put range
builder.append current_ref.get
builder.to_vector
Vector.sort_and_merge_ranges self.ranges
## PRIVATE
Utility function to find char indices for Text_Sub_Range.

View File

@ -1,5 +1,8 @@
from Standard.Base import all
import Standard.Base.Runtime.Ref
import Standard.Base.Runtime.Unsafe
from Standard.Base.Data.Index_Sub_Range import While, By_Index, Sample, Every
import Standard.Base.Random
## Creates a new vector of the given length, initializing elements using
the provided constructor function.
@ -90,7 +93,7 @@ new_builder (capacity=1) = Builder.new capacity
If this didn't happen then it would be possible for the underlying array to
be mutated under the hood, and sneak mutability into our immutable data.
from_array : Any -> Vector.Vector Any
from_array : Any -> Vector Any
from_array arr = new (Polyglot.get_array_size arr) (arr.at _)
## The basic, immutable, vector type.
@ -553,7 +556,7 @@ type Vector
short_display_text : Integer -> Text
short_display_text self max_entries=10 =
if max_entries < 1 then Error.throw <| Illegal_Argument_Error "The `max_entries` parameter must be positive." else
prefix = self.take_start max_entries
prefix = self.take (First max_entries)
if prefix.length == self.length then self.to_text else
remaining_count = self.length - prefix.length
remaining_text = if remaining_count == 1 then "and 1 more element" else
@ -637,7 +640,8 @@ type Vector
if self.length == 1 then prefix + self.unsafe_at 0 + suffix else
prefix + self.unsafe_at 0 + (1.up_to self.length . fold "" acc-> i-> acc + separator + self.unsafe_at i) + suffix
## Creates a new vector with the skipping elements until `start` and then
## PRIVATE
Creates a new vector with the skipping elements until `start` and then
continuing until `end` index.
Arguments:
@ -648,89 +652,81 @@ type Vector
Remove the first 2 elements then continue until index 5 from the vector.
[1, 2, 3, 4, 5, 6, 7, 8].slice 2 5 == [3, 4, 5]
take : Integer -> Integer -> Vector Any
take self start end =
slice : Integer -> Integer -> Vector Any
slice self start end =
slice_start = Math.max 0 start
slice_end = Math.min self.length end
if slice_start >= slice_end then Vector (Array.new 0) else
if slice_start >= slice_end then [] else
if (slice_start == 0) && (slice_end == self.length) then self else
len = slice_end - slice_start
arr = Array.new len
Array.copy self.to_array slice_start arr 0 len
Vector arr
## Creates a new vector with the first `count` elements in `self` removed.
## Creates a new vector with only the specified range of elements from the
input, removing any elements outside the range.
Arguments:
- count: The number of elements to drop from the start of `self`.
- range: The section of the this Vector to return.
If an `Index_Sub_Range`, then the selection is interpreted following
the rules of that type.
If a `Range`, the selection is specified by two indices, from and to.
take : (Index_Sub_Range | Range) -> Vector Any
take self range=(First 1) = case range of
Range _ _ _ -> self.take (By_Index range)
First count -> self.slice 0 (Math.min self.length count)
Last count -> self.slice self.length-count self.length
While predicate ->
end = 0.up_to self.length . find i-> (predicate (self.at i)).not
if end.is_nothing then self else self.slice 0 end
By_Index one_or_many_descriptors -> Panic.recover [Index_Out_Of_Bounds_Error, Illegal_Argument_Error] <|
indices = case one_or_many_descriptors of
Vector _ -> one_or_many_descriptors
_ -> [one_or_many_descriptors]
trimmed = resolve_ranges indices self.length
slice_ranges self trimmed
Sample count seed ->
rng = Random.new seed
Random.sample self count rng
Every step start ->
if step <= 0 then Error.throw (Illegal_Argument_Error "Step within Every must be positive.") else
if start >= self.length then [] else
range = Range start self.length step
self.take (By_Index range)
> Example
Remove the first element from the start of the vector.
[1, 2, 3, 4, 5].drop_start 1
drop_start : Integer -> Vector Any
drop_start self count = self.take count self.length
## Creates a new vector with elements at the beginning of the vector which
satisfy the provided predicate removed.
## Creates a new vector with only the specified range of elements from the
input, removing any elements outside the range.
Arguments:
- predicate: A function returning `Boolean` specifying the prefix to
remove.
> Example
Remove any odd elements at the beginining of the vector (but keeping
any odd elements later on).
[1, 3, 5, 6, 8, 9, 10, 11, 13].drop_while (x-> x%2 == 1) == [6, 8, 9, 10, 11, 13]
drop_while : (Any -> Boolean) -> Vector Any
drop_while self predicate =
find_first_false current =
if current >= self.length then Nothing else
case predicate (self.unsafe_at current) of
False -> current
True -> @Tail_Call find_first_false current+1
case find_first_false 0 of
Nothing -> []
first_false -> self.take first_false self.length
## Creates a new vector with the last `count` elements in `self` removed.
Arguments:
- count: The number of elements to drop from the end of `self`.
> Example
Remove the last two elements from the end of the vector.
[1, 2, 3, 4, 5].drop_end 2
drop_end : Integer -> Vector Any
drop_end self count = self.take 0 (self.length - count)
## Creates a new vector, consisting of the first `count` elements on the
left of `self`.
Arguments:
- count: The number of elements to take from the start of `self`.
> Example
Create a new vector from the first two elements of the vector.
[1, 2, 3, 4, 5].take_start 2
take_start : Integer -> Vector Any
take_start self count = self.take 0 count
## Creates a new vector, consisting of the last `count` elements on the
right of `self`.
Arguments:
- count: The number of elements to take from the end of `self`.
> Example
Create a new vector from the last two elements of the vector.
[1, 2, 3, 4, 5].take_end 3
take_end : Integer -> Vector Any
take_end self count = self.take (self.length - count) self.length
- range: The section of the this Vector to return.
If an `Index_Sub_Range`, then the selection is interpreted following
the rules of that type.
If a `Range`, the selection is specified by two indices, from and to.
drop : (Index_Sub_Range | Range) -> Vector Any
drop self range=(First 1) = case range of
Range _ _ _ -> self.drop (By_Index range)
First count -> self.slice count self.length
Last count -> self.slice 0 self.length-count
While predicate ->
end = 0.up_to self.length . find i-> (predicate (self.at i)).not
if end.is_nothing then [] else self.slice end self.length
By_Index one_or_many_descriptors -> Panic.recover [Index_Out_Of_Bounds_Error, Illegal_Argument_Error] <|
indices = case one_or_many_descriptors of
Vector _ -> one_or_many_descriptors
_ -> [one_or_many_descriptors]
trimmed = resolve_ranges indices self.length
normalized = normalize_ranges trimmed
inverted = invert_range_selection normalized self.length needs_sorting=True
slice_ranges self inverted
Sample count seed ->
rng = Random.new seed
indices_to_drop = Random.random_indices self.length count rng
self.drop (By_Index indices_to_drop)
Every step start ->
if step <= 0 then Error.throw (Illegal_Argument_Error "Step within Every must be positive.") else
if start >= self.length then self else
range = Range start self.length step
self.drop (By_Index range)
## Performs a pair-wise operation passed in `function` on consecutive
elements of `self` and `that`.
@ -808,7 +804,7 @@ type Vector
[1, 2, 3, 4].tail
tail : Vector ! Empty_Error
tail self = if self.length >= 1 then self.drop_start 1 else
tail self = if self.length >= 1 then self.drop (First 1) else
Error.throw Empty_Error
## Get the all elements in the vector except the last.
@ -818,7 +814,7 @@ type Vector
[1, 2, 3, 4].init
init : Vector ! Empty_Error
init self = if self.length >= 1 then self.drop_end 1 else Error.throw Empty_Error
init self = if self.length >= 1 then self.drop (Last 1) else Error.throw Empty_Error
## Get the last element of the vector, or an `Empty_Error` if the vector is
empty.
@ -974,7 +970,7 @@ type Vector
visualization.
to_default_visualization_data : Text
to_default_visualization_data self =
json = self.take_start 100 . to_json
json = self.take (First 100) . to_json
json.to_text
## PRIVATE
@ -1169,3 +1165,115 @@ type Incomparable_Values_Error
handle_incomparable_value ~function =
handle t = Panic.catch t handler=(Error.throw Incomparable_Values_Error)
handle No_Such_Method_Error <| handle Type_Error <| handle Unsupported_Argument_Types <| function
## PRIVATE
Resolves a vector of ranges or indices into a vector of ranges that fit
within a sequence.
resolve_ranges : Vector (Integer | Range) -> Integer -> Vector Range
resolve_ranges ranges length =
## Ensures that a descriptor fits within the range of the current
vector, trimming it or reporting an error if it is invalid.
trim descriptor = case descriptor of
Integer ->
actual_index = if descriptor < 0 then length + descriptor else descriptor
if (actual_index < 0) || (actual_index >= length) then Panic.throw (Index_Out_Of_Bounds_Error descriptor length) else
actual_index
Range start end step ->
if step <= 0 then Panic.throw (Illegal_Argument_Error "Range step must be positive.") else
if (start < 0) || (end < 0) then Panic.throw (Illegal_Argument_Error "Range start and end must not be negative.") else
if start >= length then Panic.throw (Index_Out_Of_Bounds_Error start length) else
actual_end = Math.min end length
if actual_end < start then Range start start step else
Range start actual_end step
ranges.map trim
## PRIVATE
Creates a new vector where for each range, a corresponding section of the
source vector is added to the result.
Assumes that the ranges have been already bounds-checked (for example by
passing them through `resolve_ranges`).
slice_ranges vector ranges =
if ranges.length == 0 then [] else
if ranges.length != 1 then slice_many_ranges vector ranges else
case ranges.first of
Integer -> [vector.unsafe_at ranges.first]
Range start end step -> case step == 1 of
True -> vector.slice start end
False -> slice_many_ranges vector ranges
## PRIVATE
See `slice_ranges`.
slice_many_ranges vector ranges =
new_length = ranges.fold 0 acc-> descriptor-> case descriptor of
Integer -> acc+1
Range _ _ _ -> acc+descriptor.length
arr = Array.new new_length
ranges.fold 0 start_ix-> descriptor-> case descriptor of
Integer ->
arr.set_at start_ix (vector.unsafe_at descriptor)
start_ix+1
Range start end step -> case step == 1 of
True ->
len = end-start
Array.copy vector.to_array start arr start_ix len
start_ix+len
False ->
descriptor.each_with_index within_range_ix-> descriptor_ix->
arr.set_at start_ix+within_range_ix (vector.unsafe_at descriptor_ix)
start_ix+descriptor.length
Vector arr
## PRIVATE
Takes a list of descriptors and returns a new one where ranges with
non-unitary step have been replaced with series of ranges covering the same
set of indices with step equal to 1, and indices have been replaced with
single-element ranges.
normalize_ranges descriptors =
normalize descriptor = case descriptor of
Integer -> [Range descriptor descriptor+1]
Range _ _ _ ->
if descriptor.step == 1 then [descriptor] else
descriptor.to_vector.map ix->
Range ix ix+1
descriptors.flat_map normalize
## PRIVATE
Inverts the selection determined by the input list of ranges.
The input ranges are assumed to be normalized (i.e. all of them have step
equal to 1).
Arguments:
- ranges: The list of ranges determining indices which are selected. The
result will be a list of ranges containing all the indices which were not
originally selected here.
- length: Length of the related sequence.
- needs_sorting: Determines if `ranges` need to be sorted and merged or if it
can be assumed that they are sorted already.
invert_range_selection : Vector Range -> Integer -> Boolean -> Vector Range
invert_range_selection ranges length needs_sorting =
sorted = if needs_sorting then sort_and_merge_ranges ranges else ranges
ranges_with_sentinels = [Range 0 0] + sorted + [Range length length]
ranges_with_sentinels.zip ranges_with_sentinels.tail prev-> next->
Range prev.end next.start
## PRIVATE
Returns a new sorted list of ranges where intersecting ranges have been
merged.
Empty subranges are discarded.
sort_and_merge_ranges ranges =
sorted = ranges.filter (range-> range.is_empty.not) . sort on=(.start)
if sorted.is_empty then [] else
current_ref = Ref.new sorted.first
builder = new_builder
sorted.tail.each range->
current = current_ref.get
case range.start <= current.end of
True -> current_ref.put (Range current.start (Math.max current.end range.end))
False ->
builder.append current
current_ref.put range
builder.append current_ref.get
builder.to_vector

View File

@ -14,6 +14,7 @@ import project.Data.Ordering.Natural_Order
import project.Data.Ordering.Sort_Direction
import project.Data.Pair
import project.Data.Range
import project.Data.Index_Sub_Range
import project.Data.Regression
import project.Data.Statistics
import project.Data.Statistics.Rank_Method
@ -97,3 +98,4 @@ from project.Function export all
from project.Nothing export all
from project.Polyglot export all
from project.Runtime.Extensions export all
from project.Data.Index_Sub_Range export First, Last

View File

@ -1,4 +1,5 @@
import Standard.Base.Data.Vector
from Standard.Base.Data.Index_Sub_Range import First
import Standard.Base.Polyglot
import Standard.Base.Nothing
from Standard.Base.Runtime.Extensions import Source_Location
@ -23,7 +24,7 @@ get_stack_trace =
stack_with_prims = Vector.Vector prim_stack
stack = stack_with_prims.map wrap_primitive_stack_trace_element
# drop this frame and the one from `Runtime.primitive_get_stack_trace`
stack.drop_start 2
stack.drop (First 2)
## ADVANCED

View File

@ -1,4 +1,5 @@
from Standard.Base import all
from Standard.Base.Data.Index_Sub_Range import While
from Standard.Base.Runtime import Stack_Trace_Element
## A representation of a dataflow warning attached to a value.
@ -68,7 +69,7 @@ create payload origin = @Builtin_Method "Warning.create"
attach : Any -> Any -> Any
attach warning value =
origin = Runtime.get_stack_trace
attach_with_stacktrace value warning (origin.drop_start 1)
attach_with_stacktrace value warning (origin.drop (First 1))
## PRIVATE
@ -175,8 +176,8 @@ map_attached_warnings_helper mapper value frames_to_drop =
Maybe.Some new_payload ->
self_call_name = "Warning.map_attached_warnings_helper"
stack_trace = Runtime.get_stack_trace
stack_trace_up_to_this_function = stack_trace.drop_while element-> element.name != self_call_name
new_origin = stack_trace_up_to_this_function.drop_start 1+frames_to_drop
stack_trace_up_to_this_function = stack_trace.drop (While element-> element.name != self_call_name)
new_origin = stack_trace_up_to_this_function.drop (First 1+frames_to_drop)
create new_payload new_origin
## If the mapper did not want to affect this warning, we return the
original (unwrapped) warning instance.

View File

@ -1,4 +1,4 @@
from Standard.Base import all
from Standard.Base import all hiding First, Last
import Standard.Base.Error.Common as Errors
from Standard.Table.Data.Aggregate_Column import all

View File

@ -1,4 +1,4 @@
from Standard.Base import all
from Standard.Base import all hiding First, Last
from Standard.Table.Data.Aggregate_Column import all
from Standard.Database.Data.Sql import Sql_Type

View File

@ -1,4 +1,4 @@
from Standard.Base import all
from Standard.Base import all hiding First, Last
from Standard.Base.Data.Text.Text_Ordering import Text_Ordering
from Standard.Table.Data.Aggregate_Column import all

View File

@ -598,8 +598,8 @@ type Table
right_names_before = right_new_columns.map .name
new_names = combine_names left_names_before right_names_before left_suffix right_suffix
left_indices_count = left_new_meta_index.length
left_new_meta_index_names = new_names.first.take_start left_indices_count
left_new_columns_names = new_names.first.drop_start left_indices_count
left_new_meta_index_names = new_names.first.take (First left_indices_count)
left_new_columns_names = new_names.first.drop (First left_indices_count)
right_new_columns_names = new_names.second
# Rename columns to the newly allocated names

View File

@ -861,7 +861,7 @@ type Column
size = ['length', self.length]
name = ['name', self.name]
max_data = 100
data = ['data', self.to_vector.take_start max_data]
data = ['data', self.to_vector.take (First max_data)])
Json.from_pairs [size, name, data] . to_text
## ALIAS Sum Columns
@ -1021,7 +1021,7 @@ type Column
import Standard.Examples
example_take_start = Examples.integer_column.take_start 2
example_take_start = Examples.integer_column.take (First 2)
take_start : Integer -> Column
take_start self count =
Column (self.java_column.slice 0 count)

View File

@ -219,7 +219,7 @@ type Table
row_count = ['number_of_rows', self.row_count]
cols = self.columns.map c->
name = c.name
items = c.to_vector.take_start max_size
items = c.to_vector.take (First max_size)
Json.from_pairs [['name', name], ['data', items]]
Json.from_pairs [row_count, ['columns', cols]] . to_text

View File

@ -1,4 +1,4 @@
from Standard.Base import all
from Standard.Base import all hiding First, Last
from Standard.Table.Data.Column import Column
from Standard.Table.Data.Aggregate_Column import all

View File

@ -214,7 +214,7 @@ newline_at_eof file encoding =
file_last_bytes = file.read_last_bytes most_bytes
result = newlines.zip newline_bytes . find pair->
bytes = pair.second
bytes == (file_last_bytes.take_end bytes.length)
bytes == (file_last_bytes.take (Last bytes.length))
result.first . catch Nothing
## PRIVATE

View File

@ -173,8 +173,8 @@ rename_columns internal_columns mapping on_problems =
Column_Name_Mapping.By_Position vec ->
good_names = case vec.length > col_count of
True ->
problem_builder.report_other_warning (Too_Many_Column_Names_Provided (vec.drop_start col_count))
vec.take_start col_count
problem_builder.report_other_warning (Too_Many_Column_Names_Provided (vec.drop (First col_count)))
vec.take (First col_count)
False -> vec
new_names = 0.up_to col_count . map i->if i>=good_names.length then Nothing else

View File

@ -516,14 +516,13 @@ Any.should_be_an self typ = self.should_be_a typ
example_should_equal = [1, 2] . should_contain_the_same_elements_as [2, 1]
Any.should_contain_the_same_elements_as : Any -> Integer -> Assertion
Any.should_contain_the_same_elements_as self that frames_to_skip=0 =
loc = Meta.get_source_location 1+frames_to_skip
that.each element->
if self.contains element . not then
loc = Meta.get_source_location 2+frames_to_skip
msg = "The collection (" + self.to_text + ") did not contain "+element.to_text+" (at " + loc + ")."
fail msg
self.each element->
if that.contains element . not then
loc = Meta.get_source_location 2+frames_to_skip
msg = "The collection contained an element ("+element.to_text+") which was not expected (at " + loc + ")."
fail msg

View File

@ -53,7 +53,7 @@ prepare_visualization x max_rows = Helpers.recover_errors <| case x of
# TODO [RW] Should we truncate Vectors?
# We also visualize Vectors and arrays
Vector.Vector _ ->
truncated = x.take_start max_rows
truncated = x.take (First max_rows)
Json.from_pairs [["json", truncated], ["all_rows_count", x.length]] . to_text
Array ->
prepare_visualization (Vector.Vector x) max_rows

View File

@ -8,7 +8,7 @@ public class Random_Utils {
public static Object[] sample(Object[] array, int k, Random rng) {
k = Math.min(k, array.length);
var copy = Arrays.copyOf(array, array.length);
shuffleFirstInPlace(copy, k, rng);
sampleInPlace(copy, k, rng);
return Arrays.copyOf(copy, k);
}
@ -22,11 +22,15 @@ public class Random_Utils {
indices[i] = (long) i;
}
k = Math.min(k, n);
shuffleFirstInPlace(indices, k, rng);
sampleInPlace(indices, k, rng);
return Arrays.copyOf(indices, k);
}
private static <T> void shuffleFirstInPlace(T[] array, int k, Random rng) {
/**
* Reorders the array in such a way that the first k elements contain a random selection (without
* replacement) of k elements from the whole array.
*/
private static <T> void sampleInPlace(T[] array, int k, Random rng) {
int n = array.length;
for (int i = 0; i < Math.min(k, n); ++i) {
int r = i + rng.nextInt(n - i);

View File

@ -27,8 +27,8 @@ main =
Bench.measure (random_vec + [1]) "Append Single" iter_size num_iterations
Bench.measure (random_vec + random_vec_2) "Append Large" iter_size num_iterations
Bench.measure (random_vec.sum) "Sum" iter_size num_iterations
Bench.measure ((random_vec.drop_start 20).sum) "Drop First 20 and Sum" iter_size num_iterations
Bench.measure ((random_vec.drop_end 20).sum) "Drop Last 20 and Sum" iter_size num_iterations
Bench.measure ((random_vec.drop (First 20)).sum) "Drop First 20 and Sum" iter_size num_iterations
Bench.measure ((random_vec.drop (Last 20)).sum) "Drop Last 20 and Sum" iter_size num_iterations
Bench.measure (random_vec.filter (x -> x % 3 == 1)) "Filter" iter_size num_iterations
Bench.measure (random_vec.filter_with_index (i->x -> (i+x) % 3 == 1)) "Filter With Index" iter_size num_iterations
Bench.measure (random_vec.partition (x -> x % 3 == 1)) "Partition" iter_size num_iterations

View File

@ -1,4 +1,4 @@
from Standard.Base import all
from Standard.Base import all hiding First, Last
import Standard.Table.Data.Table
from Standard.Table.Data.Aggregate_Column import all

View File

@ -1,4 +1,4 @@
from Standard.Base import all
from Standard.Base import all hiding First, Last
import Standard.Table
from Standard.Table.Data.Column_Selector import By_Name, By_Index

View File

@ -63,7 +63,7 @@ run_tests connection pending=Nothing =
agg_in_memory_table = (enso_project.data / "data.csv") . read
agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table
tables.append agg_table.name
empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0)
empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take (First 0))
tables.append empty_agg_table.name
Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=True selection pending=pending

View File

@ -486,12 +486,12 @@ spec =
t_1.take_start 10 . at 'col' . to_vector . should_equal (t_1.at 'col' . to_vector)
t_2 = t_1.take_start 2
t_2.index.to_vector . should_equal (t_1.index.to_vector . take_start 2)
t_2.at 'col' . to_vector . should_equal (t_1.at 'col' . to_vector . take_start 2)
t_2.at 'col2' . to_vector . should_equal (t_1.at 'col2' . to_vector . take_start 2)
t_2.at 'col3' . to_vector . should_equal (t_1.at 'col3' . to_vector . take_start 2)
t_2.index.to_vector . should_equal (t_1.index.to_vector . take (First 2))
t_2.at 'col' . to_vector . should_equal (t_1.at 'col' . to_vector . take (First 2))
t_2.at 'col2' . to_vector . should_equal (t_1.at 'col2' . to_vector . take (First 2))
t_2.at 'col3' . to_vector . should_equal (t_1.at 'col3' . to_vector . take (First 2))
t_1.at 'col' . take_start 2 . to_vector . should_equal (t_1.at 'col' . to_vector . take_start 2)
t_1.at 'col' . take_start 2 . to_vector . should_equal (t_1.at 'col' . to_vector . take (First 2))
Test.specify "should allow taking the last n rows" <|
i_1 = ['ix', [1, 2, 3]]
@ -503,10 +503,10 @@ spec =
t_1.take_end 10 . at 'col1' . to_vector . should_equal (t_1.at 'col1' . to_vector)
t_2 = t_1.take_end 2
t_2.index.to_vector . should_equal (t_1.index.to_vector . take_end 2)
t_2.at 'col1' . to_vector . should_equal (t_1.at 'col1' . to_vector . take_end 2)
t_2.at 'col2' . to_vector . should_equal (t_1.at 'col2' . to_vector . take_end 2)
t_2.at 'col3' . to_vector . should_equal (t_1.at 'col3' . to_vector . take_end 2)
t_2.index.to_vector . should_equal (t_1.index.to_vector . take (Last 2))
t_2.at 'col1' . to_vector . should_equal (t_1.at 'col1' . to_vector . take (Last 2))
t_2.at 'col2' . to_vector . should_equal (t_1.at 'col2' . to_vector . take (Last 2))
t_2.at 'col3' . to_vector . should_equal (t_1.at 'col3' . to_vector . take (Last 2))
Test.specify "should allow getting the first / head row" <|
i_1 = ['ix', [1, 2, 3]]

View File

@ -77,6 +77,16 @@ spec = Test.group "Range" <|
x = cell.get
cell.put x+1
cell.get . should_equal n
Test.specify "should allow iteration with index" <|
vec_mut = Vector.new_builder
5.up_to 8 . each_with_index ix-> elem->
vec_mut.append (Pair ix elem)
vec_mut.to_vector . should_equal [Pair 0 5, Pair 1 6, Pair 2 7]
vec_mut_2 = Vector.new_builder
5.up_to 10 . with_step 2 . each_with_index ix-> elem->
vec_mut_2.append (Pair ix elem)
vec_mut_2.to_vector . should_equal [Pair 0 5, Pair 1 7, Pair 2 9]
Test.specify "should be able to be folded" <|
1.up_to 6 . fold 0 (+) . should_equal 15
Test.specify "should check all" <|

View File

@ -8,10 +8,6 @@ polyglot java import java.util.regex.Pattern as Java_Pattern
import Standard.Test
Text.slice_utf_16 self start end =
chars = self.utf_16.drop_start start . drop_end (self.length - end)
Text.from_utf_16 chars
default_mask = Java_Pattern.CANON_EQ.bit_or Java_Pattern.UNICODE_CASE . bit_or Java_Pattern.UNICODE_CHARACTER_CLASS
spec =

View File

@ -4,21 +4,23 @@ from Standard.Base.Data.Text.Text_Sub_Range import all
import Standard.Test
spec = Test.group "Text_Sub_Range.Codepoint_Ranges" <|
run ranges =
Codepoint_Ranges ranges False . sorted_and_distinct_ranges
Test.specify "should be able to sort correctly merge neighboring sequences" <|
run [] . should_equal []
run [Range 0 0] . should_equal []
run [Range 0 10] . should_equal [Range 0 10]
run [Range 0 10, Range 2 4] . should_equal [Range 0 10]
run [Range 0 5, Range 5 10] . should_equal [Range 0 10]
run [Range 5 10, Range 0 0, Range 0 1, Range 1 5] . should_equal [Range 0 10]
run [Range 0 1, Range 1 2] . should_equal [Range 0 2]
run [Range 6 7, Range 7 8, Range 5 5, Range 0 1, Range 2 3] . should_equal [Range 0 1, Range 2 3, Range 6 8]
run [Range 5 10, Range 3 6, Range 3 6, Range 3 5, Range 3 7, Range 0 1] . should_equal [Range 0 1, Range 3 10]
run [Range 0 1, Range 0 1] . should_equal [Range 0 1]
run [Range 0 1, Range 1 2] . should_equal [Range 0 2]
spec = Test.group "Text_Sub_Range" <|
Test.specify "should correctly split a text into grapheme cluster ranges expressed in codepoint indices" <|
character_ranges "" . should_equal []
character_ranges "A" . should_equal [Range 0 1]
character_ranges "abc" . should_equal [Range 0 1, Range 1 2, Range 2 3]
character_ranges 'śs\u0301S' . should_equal [Range 0 1, Range 1 3, Range 3 4]
kshi = '\u0915\u094D\u0937\u093F'
facepalm = '\u{1F926}\u{1F3FC}\u200D\u2642\uFE0F'
accent_1 = '\u00E9'
accent_2 = '\u0065\u{301}'
character_ranges kshi . should_equal [Range 0 4]
character_ranges facepalm . should_equal [Range 0 7]
character_ranges accent_1 . should_equal [Range 0 1]
character_ranges accent_2 . should_equal [Range 0 2]
character_ranges kshi+facepalm+accent_1+accent_2 . should_equal [Range 0 4, Range 4 11, Range 11 12, Range 12 14]
Test.specify "should correctly split a text into grapheme cluster ranges expressed in codepoint indices" <|
character_ranges "" . should_equal []
character_ranges "A" . should_equal [Range 0 1]

View File

@ -4,6 +4,7 @@ from Standard.Base.Data.Text.Extensions import Index_Out_Of_Bounds_Error
import Standard.Base.Data.Text.Regex.Engine.Default as Default_Engine
from Standard.Base.Data.Text.Text_Sub_Range import all
from Standard.Base.Data.Index_Sub_Range import First, Last, While, By_Index, Sample, Every
import Standard.Test
@ -262,7 +263,7 @@ spec =
"ABCDEFGH".drop (Every 3 first=1) . should_equal "ACDFG"
"ABCDEFGHI".drop (Every 3 first=1) . should_equal "ACDFGI"
Test.specify "should allow selecting a random sample of a substring"
Test.specify "should allow taking or dropping a random sample of a substring"
"AAAAA".take (Sample 3) . should_equal "AAA"
"AAAAA".drop (Sample 3) . should_equal "AA"
@ -275,6 +276,10 @@ spec =
"ABCDEFGH".take (Sample 1 seed=42) . should_equal "F"
"ABCDEFGH".take (Sample 100 seed=42) . should_equal "FGCHABED"
samples_1 = 0.up_to 10000 . map seed->
"ABCD".take (Sample 2 seed)
samples_1.should_contain_the_same_elements_as ["AB", "BA", "AC", "CA", "AD", "DA", "BC", "CB", "BD", "DB", "CD", "DC"]
"ABCDEFGH".drop (Sample 0) . should_equal "ABCDEFGH"
"ABCDEFGH".drop (Sample 1 seed=42) . should_equal "ABCDEGH"
"ABCDEFGH".drop (Sample 2 seed=42) . should_equal "ABCDEH"
@ -282,6 +287,10 @@ spec =
"ABCDEFGH".drop (Sample 8 seed=42) . should_equal ""
"ABCDEFGH".drop (Sample 100 seed=42) . should_equal ""
samples_2 = 0.up_to 10000 . map seed->
"ABCD".drop (Sample 2 seed)
samples_2.should_contain_the_same_elements_as ["AB", "AC", "AD", "BC", "CD", "BD"]
Test.specify "should allow taking or dropping many indices or subranges (possibly overlapping)" <|
"123"*1000 . take (By_Index (Vector.new 3000 ix-> 2999-ix)) . should_equal "321"*1000
"123"*1000 . take (By_Index (Vector.new 3000 _-> 0)) . should_equal "1"*3000

View File

@ -0,0 +1,20 @@
from Standard.Base import all
import Standard.Test
spec = Test.group "Vector Slicing Helpers" <|
Test.specify "should be able to sort correctly merge neighboring sequences" <|
merge = Vector.sort_and_merge_ranges
merge [] . should_equal []
merge [Range 0 0] . should_equal []
merge [Range 0 10] . should_equal [Range 0 10]
merge [Range 0 10, Range 2 4] . should_equal [Range 0 10]
merge [Range 0 5, Range 5 10] . should_equal [Range 0 10]
merge [Range 5 10, Range 0 0, Range 0 1, Range 1 5] . should_equal [Range 0 10]
merge [Range 0 1, Range 1 2] . should_equal [Range 0 2]
merge [Range 6 7, Range 7 8, Range 5 5, Range 0 1, Range 2 3] . should_equal [Range 0 1, Range 2 3, Range 6 8]
merge [Range 5 10, Range 3 6, Range 3 6, Range 3 5, Range 3 7, Range 0 1] . should_equal [Range 0 1, Range 3 10]
merge [Range 0 1, Range 0 1] . should_equal [Range 0 1]
merge [Range 0 1, Range 1 2] . should_equal [Range 0 2]
main = Test.Suite.run_main spec

View File

@ -1,4 +1,5 @@
from Standard.Base import all
from Standard.Base.Data.Index_Sub_Range import While, By_Index, Sample, Every
import Standard.Test
@ -189,15 +190,110 @@ spec = Test.group "Vectors" <|
vec = [1, 2, 3, 4, 5, 6]
first_four = [1, 2, 3, 4]
last_four = [3, 4, 5, 6]
vec.take 2 4 . should_equal [3, 4]
vec.drop_start 2 . should_equal last_four
vec.drop_end 2 . should_equal first_four
vec.take_start 4 . should_equal first_four
vec.take_end 4 . should_equal last_four
[1, 3, 5, 6, 8, 9, 10, 11, 13].drop_while (x-> x%2 == 1) . should_equal [6, 8, 9, 10, 11, 13]
[1, 2, 3] . drop_while (_ > 10) . should_equal [1, 2, 3]
[1, 2, 3] . drop_while (_ < 10) . should_equal []
vec.take . should_equal [1]
vec.drop . should_equal [2, 3, 4, 5, 6]
vec.take (Range 2 4) . should_equal [3, 4]
vec.take (Range 0 0) . should_equal []
vec.take (Range 100 100) . should_fail_with Index_Out_Of_Bounds_Error
vec.take (Range 100 100) . catch . should_equal (Index_Out_Of_Bounds_Error 100 6)
vec.take (Range 0 100) . should_equal vec
[].take (Range 0 0) . should_fail_with Index_Out_Of_Bounds_Error
[].take (Range 0 0) . catch . should_equal (Index_Out_Of_Bounds_Error 0 0)
vec.take (Range 100 99) . should_fail_with Index_Out_Of_Bounds_Error
vec.drop (Range 2 4) . should_equal [1, 2, 5, 6]
vec.drop (Range 0 0) . should_equal vec
vec.drop (Range 100 100) . should_fail_with Index_Out_Of_Bounds_Error
vec.drop (Range 100 100) . catch . should_equal (Index_Out_Of_Bounds_Error 100 6)
vec.drop (Range 0 100) . should_equal []
[].drop (Range 0 0) . should_fail_with Index_Out_Of_Bounds_Error
[].drop (Range 0 0) . catch . should_equal (Index_Out_Of_Bounds_Error 0 0)
vec.drop (Range 100 99) . should_fail_with Index_Out_Of_Bounds_Error
vec.take (First 4) . should_equal first_four
vec.take (First 0) . should_equal []
vec.take (First -1) . should_equal []
vec.take (First 100) . should_equal vec
vec.drop (First 2) . should_equal last_four
vec.drop (First 0) . should_equal vec
vec.drop (First -1) . should_equal vec
vec.drop (First 100) . should_equal []
vec.take (Last 4) . should_equal last_four
vec.take (Last 0) . should_equal []
vec.take (Last -1) . should_equal []
vec.take (Last 100) . should_equal vec
vec.drop (Last 2) . should_equal first_four
vec.drop (Last 0) . should_equal vec
vec.drop (Last -1) . should_equal vec
vec.drop (Last 100) . should_equal []
vec.take (Every 3) . should_equal [1, 4]
vec.take (Every 3 first=1) . should_equal [2, 5]
vec.take (Every 2 first=1) . should_equal [2, 4, 6]
vec.take (Every 2 first=100) . should_equal []
vec.take (Every 200) . should_equal [1]
[].take (Every 2) . should_equal []
vec.take (Every 0) . should_fail_with Illegal_Argument_Error
[].take (Every 0) . should_fail_with Illegal_Argument_Error
vec.drop (Every 3) . should_equal [2, 3, 5, 6]
vec.drop (Every 3 first=1) . should_equal [1, 3, 4, 6]
vec.drop (Every 2 first=1) . should_equal [1, 3, 5]
vec.drop (Every 2 first=100) . should_equal vec
vec.drop (Every 200) . should_equal [2, 3, 4, 5, 6]
[].drop (Every 2) . should_equal []
vec.drop (Every 0) . should_fail_with Illegal_Argument_Error
[].drop (Every 0) . should_fail_with Illegal_Argument_Error
vec.take (By_Index 0) . should_equal [1]
[].take (By_Index 0) . should_fail_with Index_Out_Of_Bounds_Error
vec.take (By_Index []) . should_equal []
vec.take (By_Index [-1, -1]) . should_equal [6, 6]
vec.take (By_Index [0, 0, Range 3 100]) . should_equal [1, 1, 4, 5, 6]
vec.take (Range 0 100 2) . should_equal [1, 3, 5]
vec.take (By_Index [Range 0 100 2, Range 1 6 2]) . should_equal [1, 3, 5, 2, 4, 6]
vec.take (By_Index [Range 1 3, Range 2 5]) . should_equal [2, 3, 3, 4, 5]
vec.take (By_Index [Range 2 5, Range 1 3]) . should_equal [3, 4, 5, 2, 3]
vec.take (By_Index [0, 1, Range 100 200]) . should_fail_with Index_Out_Of_Bounds_Error
vec.take (By_Index 100) . should_fail_with Index_Out_Of_Bounds_Error
vec.drop (By_Index 0) . should_equal [2, 3, 4, 5, 6]
vec.drop (By_Index []) . should_equal vec
vec.drop (By_Index [-1, -1]) . should_equal [1, 2, 3, 4, 5]
vec.drop (By_Index [0, 0, Range 3 100]) . should_equal [2, 3]
vec.drop (Range 0 100 2) . should_equal [2, 4, 6]
vec.drop (By_Index [Range 0 100 2, Range 1 6 2]) . should_equal []
vec.drop (By_Index [Range 1 3, Range 2 5]) . should_equal [1, 6]
vec.drop (By_Index [Range 2 5, Range 1 3]) . should_equal [1, 6]
vec.drop (By_Index [0, 1, Range 100 200]) . should_fail_with Index_Out_Of_Bounds_Error
vec.drop (By_Index 100) . should_fail_with Index_Out_Of_Bounds_Error
[1, 3, 5, 6, 8, 9, 10, 11, 13].take (While (x-> x%2 == 1)) . should_equal [1, 3, 5]
[1, 2, 3] . take (While (_ > 10)) . should_equal []
[1, 2, 3] . take (While (_ < 10)) . should_equal [1, 2, 3]
[1, 3, 5, 6, 8, 9, 10, 11, 13].drop (While (x-> x%2 == 1)) . should_equal [6, 8, 9, 10, 11, 13]
[1, 2, 3] . drop (While (_ > 10)) . should_equal [1, 2, 3]
[1, 2, 3] . drop (While (_ < 10)) . should_equal []
vec.take (Sample 0) . should_equal []
[].take (Sample 0) . should_equal []
[].take (Sample 1) . should_equal []
["a"].take (Sample 1) . should_equal ["a"]
["a", "a", "a"].take (Sample 1) . should_equal ["a"]
["a", "a", "a"].take (Sample 100) . should_equal ["a", "a", "a"]
vec.drop (Sample 0) . should_equal vec
[].drop (Sample 0) . should_equal []
[].drop (Sample 1) . should_equal []
["a"].drop (Sample 1) . should_equal []
["a", "a", "a"].drop (Sample 1) . should_equal ["a", "a"]
["a", "a", "a"].drop (Sample 100) . should_equal []
Test.specify "should allow getting the head element" <|
non_empty_vec = [1, 2, 3, 4, 5]

View File

@ -42,7 +42,7 @@ import project.Data.Statistics_Spec
import project.Data.Regression_Spec
import project.Data.Text_Spec
import project.Data.Text.Codepoint_Ranges_Spec
import project.Data.Text.Text_Sub_Range_Spec
import project.Data.Text.Default_Regex_Engine_Spec
import project.Data.Text.Encoding_Spec
import project.Data.Text.Matching_Spec
@ -50,6 +50,8 @@ import project.Data.Text.Regex_Spec
import project.Data.Text.Span_Spec
import project.Data.Text.Utils_Spec
import project.Data.Vector.Slicing_Helpers_Spec
import project.Network.Http.Header_Spec as Http_Header_Spec
import project.Network.Http.Request_Spec as Http_Request_Spec
import project.Network.Http_Spec
@ -114,7 +116,7 @@ main = Test.Suite.run_main <|
Runtime_Spec.spec
Span_Spec.spec
Encoding_Spec.spec
Codepoint_Ranges_Spec.spec
Text_Sub_Range_Spec.spec
Bracket_Spec.spec
Lazy_Generator_Spec.spec
Stack_Traces_Spec.spec
@ -123,6 +125,7 @@ main = Test.Suite.run_main <|
Time_Spec.spec
URI_Spec.spec
Vector_Spec.spec
Slicing_Helpers_Spec.spec
Statistics_Spec.spec
Regression_Spec.spec
Warnings_Spec.spec

View File

@ -25,6 +25,11 @@ spec = Test.group "Random" <|
Random.random_indices 1 100 rng . should_equal [0]
Random.random_indices 100 0 rng . should_equal []
two_out_of_four = 0.up_to 10000 . map seed->
Random.random_indices 4 2 (Random.new seed)
two_out_of_four . should_contain_the_same_elements_as <|
[[0, 1], [0, 2], [1, 2], [1, 0], [2, 0], [2, 1], [0, 3], [1, 3], [2, 3], [3, 0], [3, 1], [3, 2]]
Test.specify "should allow to select a random sample from a vector" <|
rng = Random.new 0
vector = ["A", "B", "C"]

View File

@ -15,7 +15,7 @@ spec = Test.group "Stack traces" <|
modname = Meta.Constructor (Meta.meta Stack_Traces_Spec . constructor) . name
stack = My_Type.foo
names = [modname + ".bar", modname + ".baz", "Number.foo", modname + ".foo", "My_Type.foo"]
stack.take_start 5 . map .name . should_equal names
stack.take (First 5) . map .name . should_equal names
file = enso_project.root / 'src' / 'Runtime' / 'Stack_Traces_Spec.enso'
stack.take_start 5 . map (.source_location >> .file) . each (_.should_equal file)
stack.take (First 5) . map (.source_location >> .file) . each (_.should_equal file)

View File

@ -107,7 +107,7 @@ spec = Test.group "Dataflow Warnings" <|
current = Runtime.get_stack_trace
warned = foo "value"
warning_stack = Warning.get_all warned . head . origin
relevant = warning_stack . drop_end current.length
relevant = warning_stack . drop (Last current.length)
relevant.map .name . should_equal (['baz', 'bar', 'foo'].map ('Warnings_Spec.'+))
Test.specify "should attach reassignment info in the last-reassigned-first order" <|

View File

@ -84,12 +84,12 @@ spec =
Test.group "read_bytes" <|
Test.specify "should allow reading a file to byte vector" <|
contents = sample_file.read_bytes
contents.take_start 6 . should_equal [67, 117, 112, 99, 97, 107]
contents.take (First 6) . should_equal [67, 117, 112, 99, 97, 107]
Test.specify "should allow reading a file to byte vector via path" <|
full_path = sample_file . path
contents = File.read_bytes full_path
contents.take_start 6 . should_equal [67, 117, 112, 99, 97, 107]
contents.take (First 6) . should_equal [67, 117, 112, 99, 97, 107]
Test.specify "should allow to read last n bytes from a file" <|
file = enso_project.data / "transient" / "bytes.txt"
@ -111,10 +111,10 @@ spec =
Test.specify "should open and read the file in one shot" <|
path_name = sample_file.path
contents = File.read_bytes path_name
contents.take_start 6 . should_equal [67, 117, 112, 99, 97, 107]
contents.take (First 6) . should_equal [67, 117, 112, 99, 97, 107]
file = sample_file
contents_2 = File.read_bytes file
contents_2.take_start 6 . should_equal [67, 117, 112, 99, 97, 107]
contents_2.take (First 6) . should_equal [67, 117, 112, 99, 97, 107]
Test.group "read_text" <|
Test.specify "should allow reading a UTF-8 file" <|