Implement In-Memory Table order_by (#3515)

Implemented the `order_by` function with support for all modes of operation.
Added support for case insensitive natural order.

# Important Notes
- Improved MultiValueIndex/Key to not create loads of arrays.
- Adjusted HashCode for MultiValueKey to have a simple algorithm.
- Added Text_Utils.compare_normalized_ignoring_case to allow for case insensitive comparisons.
- Fixed issues with ObjectComparator and added some unit tests for it.
This commit is contained in:
James Dunkerley 2022-06-08 13:30:50 +01:00 committed by GitHub
parent c602404b1a
commit 8afba43add
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 392 additions and 163 deletions

View File

@ -134,6 +134,7 @@
- [Added rank data, correlation and covariance statistics for `Vector`][3484] - [Added rank data, correlation and covariance statistics for `Vector`][3484]
- [Implemented `Table.order_by` for the SQLite backend.][3502] - [Implemented `Table.order_by` for the SQLite backend.][3502]
- [Implemented `Table.order_by` for the PostgreSQL backend.][3514] - [Implemented `Table.order_by` for the PostgreSQL backend.][3514]
- [Implemented `Table.order_by` for the in-memory table.][3515]
- [Renamed `File_Format.Text` to `Plain_Text`, updated `File_Format.Delimited` - [Renamed `File_Format.Text` to `Plain_Text`, updated `File_Format.Delimited`
API and added builders for customizing less common settings.][3516] API and added builders for customizing less common settings.][3516]
@ -212,6 +213,7 @@
[3484]: https://github.com/enso-org/enso/pull/3484 [3484]: https://github.com/enso-org/enso/pull/3484
[3502]: https://github.com/enso-org/enso/pull/3502 [3502]: https://github.com/enso-org/enso/pull/3502
[3514]: https://github.com/enso-org/enso/pull/3514 [3514]: https://github.com/enso-org/enso/pull/3514
[3515]: https://github.com/enso-org/enso/pull/3515
[3516]: https://github.com/enso-org/enso/pull/3516 [3516]: https://github.com/enso-org/enso/pull/3516
#### Enso Compiler #### Enso Compiler

View File

@ -1,4 +1,6 @@
from Standard.Base import Any, Ordering, Nothing, Vector from Standard.Base import all
import Standard.Base.Data.Ordering.Natural_Order
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
polyglot java import org.enso.base.ObjectComparator polyglot java import org.enso.base.ObjectComparator
@ -9,10 +11,26 @@ polyglot java import org.enso.base.ObjectComparator
- custom_comparator: - custom_comparator:
If `Nothing` will get a singleton instance for `.compare_to`. If `Nothing` will get a singleton instance for `.compare_to`.
Otherwise can support a custom fallback comparator. Otherwise can support a custom fallback comparator.
new : Nothing | (Any->Any->Ordering) new : Nothing | (Any->Any->Ordering) -> ObjectComparator
new custom_comparator=Nothing = new custom_comparator=Nothing =
comparator_to_java cmp x y = Vector.handle_incomparable_value (cmp x y . to_sign) comparator_to_java cmp x y = Vector.handle_incomparable_value (cmp x y . to_sign)
case custom_comparator of case custom_comparator of
Nothing -> ObjectComparator.getInstance (comparator_to_java .compare_to) Nothing -> ObjectComparator.getInstance (comparator_to_java .compare_to)
_ -> ObjectComparator.new (comparator_to_java custom_comparator) _ -> ObjectComparator.new (comparator_to_java custom_comparator)
## ADVANCED
Create a Java Comparator with the specified Text_Ordering
Arguments:
- text_ordering:
Specifies how to compare Text values within the Comparator.
for_text_ordering : Text_Ordering -> ObjectComparator
for_text_ordering text_ordering =
case text_ordering.sort_digits_as_numbers of
True ->
txt_cmp a b = Natural_Order.compare a b text_ordering.case_sensitive . to_sign
here.new.withCustomTextComparator txt_cmp
False -> case text_ordering.case_sensitive of
Case_Insensitive locale -> here.new.withCaseInsensitivity locale.java_locale
_ -> here.new

View File

@ -18,8 +18,12 @@ polyglot java import com.ibm.icu.text.BreakIterator
Sort a vector of texts according to the natural dictionary ordering. Sort a vector of texts according to the natural dictionary ordering.
["a2", "a1", "a100", "a001", "a0001"].sort by=Natural_Order.compare . should_equal ["a0001", "a001", "a1", "a2", "a100"] ["a2", "a1", "a100", "a001", "a0001"].sort by=Natural_Order.compare . should_equal ["a0001", "a001", "a1", "a2", "a100"]
compare : Text -> Text -> Ordering compare : Text -> Text -> (True|Case_Insensitive) Ordering
compare text1 text2 = compare text1 text2 case_sensitive=True =
compare_text = case case_sensitive of
Case_Insensitive locale -> a -> b -> a.compare_to_ignore_case b locale
_ -> _.compare_to _
iter1 = BreakIterator.getCharacterInstance iter1 = BreakIterator.getCharacterInstance
iter1.setText text1 iter1.setText text1
@ -79,7 +83,7 @@ compare text1 text2 =
if (tmp.first.not && tmp.second) then Ordering.Greater else if (tmp.first.not && tmp.second) then Ordering.Greater else
case tmp.first.not of case tmp.first.not of
True -> True ->
text_comparison = substring1.compare_to substring2 text_comparison = compare_text substring1 substring2
if text_comparison != Ordering.Equal then text_comparison else if text_comparison != Ordering.Equal then text_comparison else
@Tail_Call order next1 iter1.next next2 iter2.next @Tail_Call order next1 iter1.next next2 iter2.next
False -> False ->
@ -93,7 +97,7 @@ compare text1 text2 =
value_comparison = value1.compare_to value2 value_comparison = value1.compare_to value2
if value_comparison != Ordering.Equal then value_comparison else if value_comparison != Ordering.Equal then value_comparison else
text_comparison = num_text1.compare_to num_text2 text_comparison = compare_text num_text1 num_text2
if text_comparison != Ordering.Equal then text_comparison else if text_comparison != Ordering.Equal then text_comparison else
@Tail_Call order (parsed1.at 2) (parsed1.at 3) (parsed2.at 2) (parsed2.at 3) @Tail_Call order (parsed1.at 2) (parsed1.at 3) (parsed2.at 2) (parsed2.at 3)

View File

@ -614,6 +614,22 @@ Text.compare_to that =
if comparison_result < 0 then Ordering.Less else if comparison_result < 0 then Ordering.Less else
Ordering.Greater Ordering.Greater
## Compare two texts to discover their ordering.
Arguments:
- that: The text to order `this` with respect to.
> Example
Checking how "a" orders in relation to "b".
"a".compare_to_ignore_case "b"
Text.compare_to_ignore_case : Text -> Locale -> Ordering
Text.compare_to_ignore_case that locale=Locale.default =
comparison_result = Text_Utils.compare_normalized_ignoring_case this that locale.java_locale
if comparison_result == 0 then Ordering.Equal else
if comparison_result < 0 then Ordering.Less else
Ordering.Greater
## ALIAS Check Emptiness ## ALIAS Check Emptiness
Check if `this` is empty. Check if `this` is empty.

View File

@ -156,7 +156,7 @@ make_first_aggregator reverse ignore_null args =
filter_clause = if ignore_null.not then Sql.code "" else filter_clause = if ignore_null.not then Sql.code "" else
Sql.code " FILTER (WHERE " ++ result_expr.paren ++ Sql.code " IS NOT NULL)" Sql.code " FILTER (WHERE " ++ result_expr.paren ++ Sql.code " IS NOT NULL)"
modified_order_exprs = modified_order_exprs =
order_exprs.map expr-> expr ++ Sql.code " ASC NULLS LAST" order_exprs.map expr-> expr ++ Sql.code " ASC NULLS FIRST"
order_clause = order_clause =
Sql.code " ORDER BY " ++ Sql.join "," modified_order_exprs Sql.code " ORDER BY " ++ Sql.join "," modified_order_exprs
index_expr = case reverse of index_expr = case reverse of

View File

@ -19,10 +19,14 @@ from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_For
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning
from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, Duplicate_Type_Selector
import Standard.Table.Data.Column_Mapping import Standard.Table.Data.Column_Mapping
import Standard.Table.Data.Position import Standard.Table.Data.Position
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
import Standard.Table.Data.Aggregate_Column import Standard.Table.Data.Aggregate_Column
import Standard.Base.Data.Ordering.Comparator
polyglot java import org.enso.table.data.table.Table as Java_Table polyglot java import org.enso.table.data.table.Table as Java_Table
polyglot java import org.enso.table.data.table.Column as Java_Column polyglot java import org.enso.table.data.table.Column as Java_Column
@ -524,7 +528,7 @@ type Table
on_problems.attach_problems_before validated.problems <| on_problems.attach_problems_before validated.problems <|
java_key_columns = validated.key_columns.map .java_column java_key_columns = validated.key_columns.map .java_column
index = this.java_table.indexFromColumns java_key_columns.to_array index = this.java_table.indexFromColumns java_key_columns.to_array Comparator.new
new_columns = validated.valid_columns.map c->(Aggregate_Column_Helper.java_aggregator c.first c.second) new_columns = validated.valid_columns.map c->(Aggregate_Column_Helper.java_aggregator c.first c.second)
@ -535,6 +539,50 @@ type Table
problems = java_table.getProblems problems = java_table.getProblems
Aggregate_Column_Helper.parse_aggregated_problems problems Aggregate_Column_Helper.parse_aggregated_problems problems
## Sorts the rows of the table according to the specified columns and order.
Arguments:
- columns: The columns and order to sort the table.
- text_ordering: The ordering method to use on text values.
- on_problems: Specifies how to handle if a problem occurs, raising as a
warning by default. The following problems can occur:
- If a column in `columns` is not present in the input table, a
`Missing_Input_Columns`.
- If duplicate columns, names or indices are provided, a
`Duplicate_Column_Selectors`.
- If a column index is out of range, a `Column_Indexes_Out_Of_Range`.
- If two distinct indices refer to the same column, an
`Input_Indices_Already_Matched`.
- If two name matchers match the same column, a
`Column_Matched_By_Multiple_Selectors`.
- If no valid columns are selected, a `No_Input_Columns_Selected`.
- If values do not implement an ordering, an
`Incomparable_Values_Error`.
> Example
Order the table by the column "alpha" in ascending order.
table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "alpha"])
> Example
Order the table by the second column in ascending order. In case of any
ties, break them based on the 7th column from the end of the table in
descending order.
table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -7 Sort_Direction.Descending])
order_by : Sort_Column_Selector -> Text_Ordering -> Problem_Behavior -> Table
order_by (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (this.columns.at 0 . name))])) text_ordering=Text_Ordering on_problems=Report_Warning =
columns_for_ordering = Table_Helpers.prepare_order_by this.columns columns on_problems
selected_columns = columns_for_ordering.map c->c.column.java_column
ordering = columns_for_ordering.map c->
case c.associated_selector.direction of
Sort_Direction.Ascending -> 1
Sort_Direction.Descending -> -1
comparator = Comparator.for_text_ordering text_ordering
Table <|
this.java_table.orderBy selected_columns.to_array ordering.to_array comparator
## Parses columns within a Table to a specific value type. ## Parses columns within a Table to a specific value type.
By default, it looks at all `Text` columns and attempts to deduce the By default, it looks at all `Text` columns and attempts to deduce the
type (columns with other types are not affected). If `column_types` are type (columns with other types are not affected). If `column_types` are

View File

@ -120,7 +120,7 @@ resolve_aggregate table problem_builder aggregate_column =
resolve_selector_to_vector : Column_Selector -> [Column] ! Internal_Missing_Column_Error resolve_selector_to_vector : Column_Selector -> [Column] ! Internal_Missing_Column_Error
resolve_selector_to_vector selector = resolve_selector_to_vector selector =
resolved = Table_Helpers.select_columns_helper table_columns selector reorder=False problem_builder resolved = Table_Helpers.select_columns_helper table_columns selector reorder=True problem_builder
if resolved.is_empty then Error.throw Internal_Missing_Column_Error else resolved if resolved.is_empty then Error.throw Internal_Missing_Column_Error else resolved
resolve_selector_or_nothing selector = case selector of resolve_selector_or_nothing selector = case selector of
@ -175,7 +175,7 @@ java_aggregator name column =
Count _ -> CountAggregator.new name Count _ -> CountAggregator.new name
Count_Distinct columns _ ignore_nothing -> Count_Distinct columns _ ignore_nothing ->
resolved = columns.map .java_column resolved = columns.map .java_column
CountDistinctAggregator.new name resolved.to_array ignore_nothing CountDistinctAggregator.new name resolved.to_array ignore_nothing Comparator.new
Count_Not_Nothing c _ -> CountNothingAggregator.new name c.java_column False Count_Not_Nothing c _ -> CountNothingAggregator.new name c.java_column False
Count_Nothing c _ -> CountNothingAggregator.new name c.java_column True Count_Nothing c _ -> CountNothingAggregator.new name c.java_column True
Count_Not_Empty c _ -> CountEmptyAggregator.new name c.java_column False Count_Not_Empty c _ -> CountEmptyAggregator.new name c.java_column False

View File

@ -4,27 +4,30 @@ import java.time.LocalDate;
import java.time.LocalDateTime; import java.time.LocalDateTime;
import java.time.LocalTime; import java.time.LocalTime;
import java.util.Comparator; import java.util.Comparator;
import java.util.Locale;
import java.util.function.BiFunction; import java.util.function.BiFunction;
public class ObjectComparator implements Comparator<Object> { public class ObjectComparator implements Comparator<Object> {
private static ObjectComparator INSTANCE; private static ObjectComparator INSTANCE;
/** /**
* A singleton instance of an ObjectComparator * A singleton instance of an ObjectComparator.
* *
* @param fallbackComparator this MUST be the default .compare_to function for Enso. Needs to be * @param fallbackComparator this MUST be the default .compare_to function for Enso. Needs to be
* passed to allow calling back from Java. * passed to allow calling back from Java.
* @return Comparator object * @return Comparator object.
*/ */
public static ObjectComparator getInstance(BiFunction<Object, Object, Long> fallbackComparator) { public static ObjectComparator getInstance(BiFunction<Object, Object, Long> fallbackComparator) {
if (INSTANCE == null) { if (INSTANCE == null) {
INSTANCE = new ObjectComparator((l, r) -> fallbackComparator.apply(l, r).intValue()); INSTANCE = new ObjectComparator(fallbackComparator);
} }
return INSTANCE; return INSTANCE;
} }
private final BiFunction<Object, Object, Integer> fallbackComparator; private final BiFunction<Object, Object, Long> fallbackComparator;
private final BiFunction<String, String, Long> textComparator;
public ObjectComparator() { public ObjectComparator() {
this( this(
@ -33,8 +36,31 @@ public class ObjectComparator implements Comparator<Object> {
}); });
} }
public ObjectComparator(BiFunction<Object, Object, Integer> fallbackComparator) { public ObjectComparator(BiFunction<Object, Object, Long> fallbackComparator) {
this(fallbackComparator, (a, b) -> Long.valueOf(Text_Utils.compare_normalized(a, b)));
}
private ObjectComparator(BiFunction<Object, Object, Long> fallbackComparator, BiFunction<String, String, Long> textComparator) {
this.fallbackComparator = fallbackComparator; this.fallbackComparator = fallbackComparator;
this.textComparator = textComparator;
}
/**
* Create a copy of the ObjectComparator with case-insensitive text comparisons.
* @param locale to use for case folding.
* @return Comparator object.
*/
public ObjectComparator withCaseInsensitivity(Locale locale) {
return new ObjectComparator(this.fallbackComparator, (a, b) -> Long.valueOf(Text_Utils.compare_normalized_ignoring_case(a, b, locale)));
}
/**
* Create a copy of the ObjectComparator with case-insensitive text comparisons.
* @param textComparator custom comparator for Text.
* @return Comparator object.
*/
public ObjectComparator withCustomTextComparator(BiFunction<String, String, Long> textComparator) {
return new ObjectComparator(this.fallbackComparator, textComparator);
} }
@Override @Override
@ -42,18 +68,16 @@ public class ObjectComparator implements Comparator<Object> {
// NULLs // NULLs
if (thisValue == null) { if (thisValue == null) {
if (thatValue != null) { if (thatValue != null) {
return 1; return -1;
} }
return 0; return 0;
} }
if (thatValue == null) { if (thatValue == null) {
return -1; return 1;
} }
// Booleans // Booleans
if (thisValue instanceof Boolean && thatValue instanceof Boolean) { if (thisValue instanceof Boolean thisBool && thatValue instanceof Boolean thatBool) {
boolean thisBool = (Boolean) thisValue;
boolean thatBool = (Boolean) thatValue;
if (thisBool == thatBool) { if (thisBool == thatBool) {
return 0; return 0;
} }
@ -61,13 +85,11 @@ public class ObjectComparator implements Comparator<Object> {
} }
// Long this // Long this
if (thisValue instanceof Long) { if (thisValue instanceof Long thisLong) {
Long thisLong = (Long) thisValue; if (thatValue instanceof Long thatLong) {
if (thatValue instanceof Long) { return thisLong.compareTo(thatLong);
return thisLong.compareTo((Long) thatValue);
} }
if (thatValue instanceof Double) { if (thatValue instanceof Double thatDouble) {
Double thatDouble = (Double) thatValue;
if (thisLong > thatDouble) { if (thisLong > thatDouble) {
return 1; return 1;
} }
@ -79,13 +101,11 @@ public class ObjectComparator implements Comparator<Object> {
} }
// Double this // Double this
if (thisValue instanceof Double) { if (thisValue instanceof Double thisDouble) {
Double thisDouble = (Double) thisValue; if (thatValue instanceof Double thatDouble) {
if (thatValue instanceof Double) { return thisDouble.compareTo(thatDouble);
return thisDouble.compareTo((Double) thatValue);
} }
if (thatValue instanceof Long) { if (thatValue instanceof Long thatLong) {
Long thatLong = (Long) thatValue;
if (thisDouble > thatLong) { if (thisDouble > thatLong) {
return 1; return 1;
} }
@ -97,39 +117,36 @@ public class ObjectComparator implements Comparator<Object> {
} }
// Text // Text
if (thisValue instanceof String && thatValue instanceof String) { if (thisValue instanceof String thisString && thatValue instanceof String thatString) {
return Text_Utils.compare_normalized((String) thisValue, (String) thatValue); return textComparator.apply(thisString, thatString).intValue();
} }
// DateTimes // DateTimes
if (thisValue instanceof LocalDate) { if (thisValue instanceof LocalDate thisDate) {
LocalDate thisDate = (LocalDate) thisValue; if (thatValue instanceof LocalDate thatDate) {
if (thatValue instanceof LocalDate) { return thisDate.compareTo(thatDate);
return thisDate.compareTo((LocalDate) thatValue);
} }
if (thatValue instanceof LocalDateTime) { if (thatValue instanceof LocalDateTime thatDateTime) {
return thisDate.atStartOfDay().compareTo((LocalDateTime) thatValue); return thisDate.atStartOfDay().compareTo(thatDateTime);
} }
} }
if (thisValue instanceof LocalDateTime) { if (thisValue instanceof LocalDateTime thisDateTime) {
LocalDateTime thisDateTime = (LocalDateTime) thisValue; if (thatValue instanceof LocalDate thatDate) {
if (thatValue instanceof LocalDate) { return thisDateTime.compareTo(thatDate.atStartOfDay());
return thisDateTime.compareTo(((LocalDate) thatValue).atStartOfDay());
} }
if (thatValue instanceof LocalDateTime) { if (thatValue instanceof LocalDateTime thatDateTime) {
return thisDateTime.compareTo((LocalDateTime) thatValue); return thisDateTime.compareTo(thatDateTime);
} }
} }
// TimeOfDay // TimeOfDay
if (thisValue instanceof LocalTime) { if (thisValue instanceof LocalTime thisTime) {
LocalTime thisTime = (LocalTime) thisValue; if (thatValue instanceof LocalTime thatTime) {
if (thatValue instanceof LocalTime) { return thisTime.compareTo(thatTime);
return thisTime.compareTo((LocalTime) thatValue);
} }
} }
// Fallback to Enso // Fallback to Enso
return fallbackComparator.apply(thisValue, thatValue); return fallbackComparator.apply(thisValue, thatValue).intValue();
} }
} }

View File

@ -124,9 +124,8 @@ public class Text_Utils {
* @return the result of comparison * @return the result of comparison
*/ */
public static boolean equals_ignore_case(String str1, Object str2, Locale locale) { public static boolean equals_ignore_case(String str1, Object str2, Locale locale) {
if (str2 instanceof String) { if (str2 instanceof String string2) {
Fold fold = CaseFoldedString.caseFoldAlgorithmForLocale(locale); return compare_normalized_ignoring_case(str1, string2, locale) == 0;
return compare_normalized(fold.apply(str1), fold.apply((String) str2)) == 0;
} else { } else {
return false; return false;
} }
@ -165,6 +164,21 @@ public class Text_Utils {
return Normalizer.compare(a, b, Normalizer.FOLD_CASE_DEFAULT); return Normalizer.compare(a, b, Normalizer.FOLD_CASE_DEFAULT);
} }
/**
* Compares {@code a} to {@code b} according to the lexicographical order, handling Unicode
* normalization.
*
* @param a the left operand
* @param b the right operand
* @param locale the locale to use for case folding
* @return a negative value if {@code a} is before {@code b}, 0 if both values are equal and a
* positive value if {@code a} is after {@code b}
*/
public static int compare_normalized_ignoring_case(String a, String b, Locale locale) {
Fold fold = CaseFoldedString.caseFoldAlgorithmForLocale(locale);
return Normalizer.compare(fold.apply(a), fold.apply(b), Normalizer.FOLD_CASE_DEFAULT);
}
/** /**
* Checks if {@code substring} is a substring of {@code string}. * Checks if {@code substring} is a substring of {@code string}.
* *

View File

@ -5,10 +5,7 @@ import org.enso.table.data.index.MultiValueKey;
import org.enso.table.data.table.Column; import org.enso.table.data.table.Column;
import org.enso.table.data.table.problems.FloatingPointGrouping; import org.enso.table.data.table.problems.FloatingPointGrouping;
import java.util.Arrays; import java.util.*;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/** /**
* Aggregate Column counting the number of distinct items in a group. If `ignoreAllNull` is true, * Aggregate Column counting the number of distinct items in a group. If `ignoreAllNull` is true,
@ -16,6 +13,7 @@ import java.util.Set;
*/ */
public class CountDistinct extends Aggregator { public class CountDistinct extends Aggregator {
private final Storage[] storage; private final Storage[] storage;
private final Comparator<Object> objectComparator;
private final boolean ignoreAllNull; private final boolean ignoreAllNull;
/** /**
@ -25,18 +23,19 @@ public class CountDistinct extends Aggregator {
* @param columns input columns * @param columns input columns
* @param ignoreAllNull if true ignore then all values are null * @param ignoreAllNull if true ignore then all values are null
*/ */
public CountDistinct(String name, Column[] columns, boolean ignoreAllNull) { public CountDistinct(
String name, Column[] columns, boolean ignoreAllNull, Comparator<Object> objectComparator) {
super(name, Storage.Type.LONG); super(name, Storage.Type.LONG);
this.storage = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new); this.storage = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new);
this.ignoreAllNull = ignoreAllNull; this.ignoreAllNull = ignoreAllNull;
this.objectComparator = objectComparator;
} }
@Override @Override
public Object aggregate(List<Integer> indexes) { public Object aggregate(List<Integer> indexes) {
Set<MultiValueKey> set = new HashSet<>(); Set<MultiValueKey> set = new HashSet<>();
for (int row : indexes) { for (int row : indexes) {
MultiValueKey key = MultiValueKey key = new MultiValueKey(storage, row, objectComparator);
new MultiValueKey(Arrays.stream(storage).map(s -> s.getItemBoxed(row)).toArray());
if (key.hasFloatValues()) { if (key.hasFloatValues()) {
this.addProblem(new FloatingPointGrouping(this.getName(), row)); this.addProblem(new FloatingPointGrouping(this.getName(), row));
} }

View File

@ -54,10 +54,7 @@ public class First extends Aggregator {
continue; continue;
} }
MultiValueKey newKey = MultiValueKey newKey = new MultiValueKey(this.ordering, row, objectComparator);
new MultiValueKey(
Arrays.stream(this.ordering).map(o -> o.getItemBoxed(row)).toArray(),
objectComparator);
if (key == null || key.compareTo(newKey) > 0) { if (key == null || key.compareTo(newKey) > 0) {
key = newKey; key = newKey;
current = storage.getItemBoxed(row); current = storage.getItemBoxed(row);

View File

@ -54,10 +54,7 @@ public class Last extends Aggregator {
continue; continue;
} }
MultiValueKey newKey = MultiValueKey newKey = new MultiValueKey(this.ordering, row, objectComparator);
new MultiValueKey(
Arrays.stream(this.ordering).map(o -> o.getItemBoxed(row)).toArray(),
objectComparator);
if (key == null || key.compareTo(newKey) < 0) { if (key == null || key.compareTo(newKey) < 0) {
key = newKey; key = newKey;
current = storage.getItemBoxed(row); current = storage.getItemBoxed(row);

View File

@ -9,11 +9,7 @@ import org.enso.table.data.table.Table;
import org.enso.table.data.table.problems.AggregatedProblems; import org.enso.table.data.table.problems.AggregatedProblems;
import org.enso.table.data.table.problems.FloatingPointGrouping; import org.enso.table.data.table.problems.FloatingPointGrouping;
import java.util.Arrays; import java.util.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.IntStream; import java.util.stream.IntStream;
@ -22,18 +18,20 @@ public class MultiValueIndex {
private final Map<MultiValueKey, List<Integer>> locs; private final Map<MultiValueKey, List<Integer>> locs;
private final AggregatedProblems problems; private final AggregatedProblems problems;
public MultiValueIndex(Column[] keyColumns, int tableSize) { public MultiValueIndex(Column[] keyColumns, int tableSize, Comparator<Object> objectComparator) {
this(keyColumns, tableSize, null, objectComparator);
}
public MultiValueIndex(Column[] keyColumns, int tableSize, int[] ordering, Comparator<Object> objectComparator) {
this.keyColumnsLength = keyColumns.length; this.keyColumnsLength = keyColumns.length;
this.locs = new HashMap<>(); this.locs = ordering == null ? new HashMap<>() : new TreeMap<>();
this.problems = new AggregatedProblems(); this.problems = new AggregatedProblems();
if (keyColumns.length != 0) { if (keyColumns.length != 0) {
int size = keyColumns[0].getSize(); int size = keyColumns[0].getSize();
Storage[] storage = Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new);
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
int finalI = i; MultiValueKey key = new MultiValueKey(storage, i, ordering, objectComparator);
MultiValueKey key =
new MultiValueKey(
Arrays.stream(keyColumns).map(c -> c.getStorage().getItemBoxed(finalI)).toArray());
if (key.hasFloatValues()) { if (key.hasFloatValues()) {
problems.add(new FloatingPointGrouping("GroupBy", i)); problems.add(new FloatingPointGrouping("GroupBy", i));
@ -43,9 +41,7 @@ public class MultiValueIndex {
ids.add(i); ids.add(i);
} }
} else { } else {
this.locs.put( this.locs.put(new MultiValueKey(new Storage[0], 0, objectComparator), IntStream.range(0, tableSize).boxed().collect(Collectors.toList()));
new MultiValueKey(new Object[0]),
IntStream.range(0, tableSize).boxed().collect(Collectors.toList()));
} }
} }
@ -87,19 +83,31 @@ public class MultiValueIndex {
merged); merged);
} }
private static Builder getBuilderForType(int type, int size) { public int[] makeOrderMap(int rowCount) {
switch (type) { if (this.locs.size() == 0) {
case Storage.Type.BOOL: return new int[0];
return new BoolBuilder();
case Storage.Type.DOUBLE:
return NumericBuilder.createDoubleBuilder(size);
case Storage.Type.LONG:
return NumericBuilder.createLongBuilder(size);
case Storage.Type.STRING:
return new StringBuilder(size);
case Storage.Type.OBJECT:
return new ObjectBuilder(size);
} }
return new InferredBuilder(size);
int[] output = new int[rowCount];
int idx = 0;
for (List<Integer> rowIndexes : this.locs.values()) {
for (Integer rowIndex : rowIndexes) {
output[idx++] = rowIndex;
}
}
return output;
}
private static Builder getBuilderForType(int type, int size) {
return switch (type) {
case Storage.Type.BOOL -> new BoolBuilder();
case Storage.Type.DOUBLE -> NumericBuilder.createDoubleBuilder(size);
case Storage.Type.LONG -> NumericBuilder.createLongBuilder(size);
case Storage.Type.STRING -> new StringBuilder(size);
case Storage.Type.OBJECT -> new ObjectBuilder(size);
default -> new InferredBuilder(size);
};
} }
} }

View File

@ -1,45 +1,63 @@
package org.enso.table.data.index; package org.enso.table.data.index;
import org.enso.table.data.column.storage.Storage;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator; import java.util.Comparator;
import java.util.Objects;
public class MultiValueKey implements Comparable<MultiValueKey> { public class MultiValueKey implements Comparable<MultiValueKey> {
private final Object[] values; private final Storage[] storage;
private final int[] directions;
private final int rowIndex;
private final Comparator<Object> objectComparator; private final Comparator<Object> objectComparator;
private final int hashCodeValue; private final int hashCodeValue;
private final boolean allNull; private final boolean allNull;
private final boolean floatValue; private final boolean floatValue;
public MultiValueKey(Object[] values) { public MultiValueKey(Storage[] storage, int rowIndex, Comparator<Object> objectComparator) {
this(values, null); this(storage, rowIndex, null, objectComparator);
} }
public MultiValueKey(Object[] values, Comparator<Object> objectComparator) { public MultiValueKey(
this.values = values; Storage[] storage, int rowIndex, int[] directions, Comparator<Object> objectComparator) {
this.storage = storage;
this.rowIndex = rowIndex;
if (directions == null) {
directions = new int[storage.length];
Arrays.fill(directions, 1);
}
this.directions = directions;
this.objectComparator = objectComparator; this.objectComparator = objectComparator;
boolean allNull = true; boolean allNull = true;
boolean floatValue = false; boolean floatValue = false;
// Precompute HashCode - using Apache.Commons.Collections.Map.MultiKeyMap.hash algorithm // Precompute HashCode - using Apache.Commons.Collections.Map.MultiKeyMap.hash algorithm
int h = 0; int h = 1;
for (Object value : this.values) { for (int i = 0; i < storage.length; i++) {
h = 31 * h;
Object value = this.get(i);
if (value != null) { if (value != null) {
Object folded = foldObject(value); Object folded = foldObject(value);
floatValue = floatValue || (folded instanceof Double); floatValue = floatValue || (folded instanceof Double);
h ^= folded.hashCode(); h += folded.hashCode();
allNull = false; allNull = false;
} }
} }
h += ~(h << 9);
h ^= h >>> 14;
h += h << 4;
this.hashCodeValue = h ^ (h >>> 10); this.hashCodeValue = h;
this.allNull = allNull; this.allNull = allNull;
this.floatValue = floatValue; this.floatValue = floatValue;
} }
public Object get(int column) {
return storage[column].getItemBoxed(rowIndex);
}
@Override @Override
public int hashCode() { public int hashCode() {
return this.hashCodeValue; return this.hashCodeValue;
@ -48,9 +66,16 @@ public class MultiValueKey implements Comparable<MultiValueKey> {
@Override @Override
public boolean equals(Object o) { public boolean equals(Object o) {
if (this == o) return true; if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false; if (!(o instanceof MultiValueKey that)) return false;
MultiValueKey that = (MultiValueKey) o; if (storage.length != that.storage.length) return false;
return hashCodeValue == that.hashCodeValue && Arrays.equals(values, that.values); if (hashCodeValue != that.hashCodeValue) return false;
for (int i = 0; i < storage.length; i++) {
if (objectComparator.compare(get(i), that.get(i)) != 0) {
return false;
}
}
return true;
} }
public boolean areAllNull() { public boolean areAllNull() {
@ -87,14 +112,14 @@ public class MultiValueKey implements Comparable<MultiValueKey> {
throw new NullPointerException(); throw new NullPointerException();
} }
if (that.values.length != values.length) { if (that.storage.length != storage.length) {
throw new ClassCastException("Incomparable keys."); throw new ClassCastException("Incomparable keys.");
} }
for (int i = 0; i < values.length; i++) { for (int i = 0; i < storage.length; i++) {
int comparison = objectComparator.compare(values[i], that.values[i]); int comparison = objectComparator.compare(get(i), that.get(i));
if (comparison != 0) { if (comparison != 0) {
return comparison; return comparison * directions[i];
} }
} }

View File

@ -1,11 +1,6 @@
package org.enso.table.data.table; package org.enso.table.data.table;
import java.util.ArrayList; import java.util.*;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.enso.table.data.column.builder.object.InferredBuilder; import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.storage.BoolStorage; import org.enso.table.data.column.storage.BoolStorage;
@ -115,11 +110,10 @@ public class Table {
* @return the result of masking this table with the provided column * @return the result of masking this table with the provided column
*/ */
public Table mask(Column maskCol) { public Table mask(Column maskCol) {
if (!(maskCol.getStorage() instanceof BoolStorage)) { if (!(maskCol.getStorage() instanceof BoolStorage storage)) {
throw new UnexpectedColumnTypeException("Boolean"); throw new UnexpectedColumnTypeException("Boolean");
} }
BoolStorage storage = (BoolStorage) maskCol.getStorage();
var mask = BoolStorage.toMask(storage); var mask = BoolStorage.toMask(storage);
var localStorageMask = new BitSet(); var localStorageMask = new BitSet();
localStorageMask.set(0, rowCount()); localStorageMask.set(0, rowCount());
@ -212,13 +206,28 @@ public class Table {
} }
/** /**
* Creates an index fpr this table by using values from the specified columns. * Creates an index for this table by using values from the specified columns.
* *
* @param columns set of columns to use as an Index * @param columns set of columns to use as an Index
* @param objectComparator Object comparator allowing calling back to `compare_to` when needed.
* @return a table indexed by the proper column * @return a table indexed by the proper column
*/ */
public MultiValueIndex indexFromColumns(Column[] columns) { public MultiValueIndex indexFromColumns(Column[] columns, Comparator<Object> objectComparator) {
return new MultiValueIndex(columns, this.rowCount()); return new MultiValueIndex(columns, this.rowCount(), objectComparator);
}
/**
* Creates a new table with the rows sorted
*
* @param columns set of columns to use as an Index
* @param objectComparator Object comparator allowing calling back to `compare_to` when needed.
* @return a table indexed by the proper column
*/
public Table orderBy(Column[] columns, Long[] directions, Comparator<Object> objectComparator) {
int[] directionInts = Arrays.stream(directions).mapToInt(Long::intValue).toArray();
MultiValueIndex index = new MultiValueIndex(columns, this.rowCount(), directionInts, objectComparator);
OrderMask mask = new OrderMask(index.makeOrderMap(this.rowCount()));
return this.applyMask(mask);
} }
/** /**

View File

@ -350,11 +350,7 @@ public class Reader {
* @throws IOException when the input stream cannot be read. * @throws IOException when the input stream cannot be read.
*/ */
public static Table readSheetByName( public static Table readSheetByName(
InputStream stream, InputStream stream, String sheetName, int skip_rows, Integer row_limit, boolean xls_format)
String sheetName,
int skip_rows,
Integer row_limit,
boolean xls_format)
throws IOException, IllegalArgumentException { throws IOException, IllegalArgumentException {
Workbook workbook = getWorkbook(stream, xls_format); Workbook workbook = getWorkbook(stream, xls_format);
@ -364,11 +360,7 @@ public class Reader {
} }
return readSheetToTable( return readSheetToTable(
workbook, workbook, sheetIndex, null, skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit);
sheetIndex,
null,
skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit);
} }
/** /**
@ -394,11 +386,7 @@ public class Reader {
} }
return readSheetToTable( return readSheetToTable(
workbook, workbook, index - 1, null, skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit);
index - 1,
null,
skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit);
} }
/** /**
@ -447,18 +435,13 @@ public class Reader {
return xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); return xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream);
} }
private static Table readRange( private static Table readRange(Workbook workbook, Range range, int skip_rows, Integer row_limit) {
Workbook workbook, Range range, int skip_rows, Integer row_limit) {
int sheetIndex = getSheetIndex(workbook, range.getSheetName()); int sheetIndex = getSheetIndex(workbook, range.getSheetName());
if (sheetIndex == -1) { if (sheetIndex == -1) {
throw new IllegalArgumentException("Unknown sheet '" + range.getSheetName() + "'."); throw new IllegalArgumentException("Unknown sheet '" + range.getSheetName() + "'.");
} }
return readSheetToTable( return readSheetToTable(
workbook, workbook, sheetIndex, range, skip_rows, row_limit == null ? Integer.MAX_VALUE : row_limit);
sheetIndex,
range,
skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit);
} }
} }

View File

@ -147,7 +147,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
materialized.columns.length . should_equal 2 materialized.columns.length . should_equal 2
materialized.columns.at 0 . name . should_equal "First Index" materialized.columns.at 0 . name . should_equal "First Index"
materialized.columns.at 0 . at 0 . should_equal 9 materialized.columns.at 0 . at 0 . should_equal 5
materialized.columns.at 1 . name . should_equal "Last ValueWithNothing" materialized.columns.at 1 . name . should_equal "Last ValueWithNothing"
materialized.columns.at 1 . at 0 . should_equal -89.78 epsilon=0.000001 materialized.columns.at 1 . at 0 . should_equal -89.78 epsilon=0.000001
@ -517,17 +517,17 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te
materialized.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001 materialized.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001
Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <|
grouped = table.aggregate [Group_By "Index", First "TextWithNothing" (order_by = By_Name ["Hexadecimal", "Flag"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])] grouped = table.aggregate [Group_By "Index", First "TextWithNothing" (order_by = By_Name ["Value", "Flag"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])]
materialized = materialize grouped materialized = materialize grouped
grouped.row_count . should_equal 10 grouped.row_count . should_equal 10
materialized.columns.length . should_equal 3 materialized.columns.length . should_equal 3
materialized.columns.at 0 . name . should_equal "Index" materialized.columns.at 0 . name . should_equal "Index"
idx = find_row [6] materialized idx = find_row [7] materialized
idx.is_nothing . should_be_false idx.is_nothing . should_be_false
materialized.columns.at 1 . name . should_equal "First TextWithNothing" materialized.columns.at 1 . name . should_equal "First TextWithNothing"
materialized.columns.at 1 . at idx . should_equal "v78nbv8fr1" materialized.columns.at 1 . at idx . should_equal "8g6kidngic"
materialized.columns.at 2 . name . should_equal "Last ValueWithNothing" materialized.columns.at 2 . name . should_equal "Last ValueWithNothing"
materialized.columns.at 2 . at idx . should_equal 19.77 epsilon=0.000001 materialized.columns.at 2 . at idx . should_equal -89.78 epsilon=0.000001
Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <|
grouped = table.aggregate [Group_By "Index", First "TextWithNothing", Last "Value"] grouped = table.aggregate [Group_By "Index", First "TextWithNothing", Last "Value"]
@ -701,18 +701,18 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te
materialized.columns.at 7 . at idx . should_equal -17.174000 epsilon=0.000001 materialized.columns.at 7 . at idx . should_equal -17.174000 epsilon=0.000001
Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <|
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing" (order_by = By_Name ["Hexadecimal", "Flag"]), Last "ValueWithNothing" (order_by = By_Name ["Value"]), Group_By "Index"] grouped = table.aggregate [Group_By "Flag", First "TextWithNothing" (order_by = By_Name ["Value", "Flag"]), Last "ValueWithNothing" (order_by = By_Name ["Value"]), Group_By "Index"]
materialized = materialize grouped materialized = materialize grouped
grouped.row_count . should_equal 20 grouped.row_count . should_equal 20
materialized.columns.length . should_equal 4 materialized.columns.length . should_equal 4
materialized.columns.at 0 . name . should_equal "Flag" materialized.columns.at 0 . name . should_equal "Flag"
materialized.columns.at 3 . name . should_equal "Index" materialized.columns.at 3 . name . should_equal "Index"
idx = find_row [False, 6] materialized [0, 3] idx = find_row [False, 7] materialized [0, 3]
idx.is_nothing . should_be_false idx.is_nothing . should_be_false
materialized.columns.at 1 . name . should_equal "First TextWithNothing" materialized.columns.at 1 . name . should_equal "First TextWithNothing"
materialized.columns.at 1 . at idx . should_equal "v78nbv8fr1" materialized.columns.at 1 . at idx . should_equal "8g6kidngic"
materialized.columns.at 2 . name . should_equal "Last ValueWithNothing" materialized.columns.at 2 . name . should_equal "Last ValueWithNothing"
materialized.columns.at 2 . at idx . should_equal 42.17 epsilon=0.000001 materialized.columns.at 2 . at idx . should_equal -89.78 epsilon=0.000001
Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <|
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing", Last "Value", Group_By "Index"] grouped = table.aggregate [Group_By "Flag", First "TextWithNothing", Last "Value", Group_By "Index"]

View File

@ -122,6 +122,13 @@ spec prefix table_builder test_selection pending=Nothing =
problems = [Duplicate_Column_Selectors ["foo"]] problems = [Duplicate_Column_Selectors ["foo"]]
Problems.test_problem_handling action problems tester Problems.test_problem_handling action problems tester
Test.specify "should correctly handle problems: duplicate matches due to case insensitivity" pending="TODO needs fixing" <|
selector = By_Name.new ["FOO", "foo"] (Text_Matcher case_sensitive=Case_Insensitive)
action = table.select_columns selector on_problems=_
tester = expect_column_names ["foo"]
problems = [Duplicate_Column_Selectors ["foo"]]
Problems.test_problem_handling action problems tester
Test.specify "should correctly handle problems: unmatched names" <| Test.specify "should correctly handle problems: unmatched names" <|
weird_name = '.*?-!@#!"' weird_name = '.*?-!@#!"'
selector = By_Name ["foo", "hmm", weird_name] selector = By_Name ["foo", "hmm", weird_name]
@ -238,6 +245,13 @@ spec prefix table_builder test_selection pending=Nothing =
problems = [Duplicate_Column_Selectors ["foo"]] problems = [Duplicate_Column_Selectors ["foo"]]
Problems.test_problem_handling action problems tester Problems.test_problem_handling action problems tester
Test.specify "should correctly handle problems: duplicate matches due to case insensitivity" pending="TODO needs fixing" <|
selector = By_Name.new ["FOO", "foo"] (Text_Matcher case_sensitive=Case_Insensitive)
action = table.remove_columns selector on_problems=_
tester = expect_column_names ["bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123"]
problems = [Duplicate_Column_Selectors ["foo"]]
Problems.test_problem_handling action problems tester
Test.specify "should correctly handle problems: unmatched names" <| Test.specify "should correctly handle problems: unmatched names" <|
weird_name = '.*?-!@#!"' weird_name = '.*?-!@#!"'
selector = By_Name ["foo", "hmm", weird_name] selector = By_Name ["foo", "hmm", weird_name]
@ -723,16 +737,16 @@ spec prefix table_builder test_selection pending=Nothing =
Test.specify "should support natural and case insensitive ordering at the same time" pending=(if (test_selection.natural_ordering.not || test_selection.case_insensitive_ordering.not) then "Natural ordering or case sensitive ordering is not supported.") <| Test.specify "should support natural and case insensitive ordering at the same time" pending=(if (test_selection.natural_ordering.not || test_selection.case_insensitive_ordering.not) then "Natural ordering or case sensitive ordering is not supported.") <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering sort_digits_as_numbers=True case_sensitive=Case_Insensitive) t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering sort_digits_as_numbers=True case_sensitive=Case_Insensitive)
t1.at "psi" . to_vector . should_equal ["c01", "C2", "c10", Nothing] t1.at "psi" . to_vector . should_equal [Nothing, "c01", "C2", "c10"]
t2 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering sort_digits_as_numbers=True) t2 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering sort_digits_as_numbers=True)
t2.at "psi" . to_vector . should_equal ["C2", "c01", "c10", Nothing] t2.at "psi" . to_vector . should_equal [Nothing, "C2", "c01", "c10"]
t3 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering case_sensitive=Case_Insensitive) t3 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering case_sensitive=Case_Insensitive)
t3.at "psi" . to_vector . should_equal ["c01", "c10", "C2", Nothing] t3.at "psi" . to_vector . should_equal [Nothing, "c01", "c10", "C2"]
t4 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) t4 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"])
t4.at "psi" . to_vector . should_equal ["C2", "c01", "c10", Nothing] t4.at "psi" . to_vector . should_equal [Nothing, "C2", "c01", "c10"]
Test.specify "text ordering settings should not affect numeric columns" <| Test.specify "text ordering settings should not affect numeric columns" <|
ordering = Text_Ordering sort_digits_as_numbers=True case_sensitive=Case_Insensitive ordering = Text_Ordering sort_digits_as_numbers=True case_sensitive=Case_Insensitive

View File

@ -635,7 +635,7 @@ spec =
t_3 = Table.new [c_3_1, c_3_2, c_3_3] t_3 = Table.new [c_3_1, c_3_2, c_3_3]
t_3.default_visualization.should_equal Visualization.Id.table t_3.default_visualization.should_equal Visualization.Id.table
selection = Common_Table_Spec.Test_Selection supports_case_sensitive_columns=True order_by=False natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True selection = Common_Table_Spec.Test_Selection supports_case_sensitive_columns=True order_by=True natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True
Common_Table_Spec.spec "[In-Memory] " table_builder=Table.new test_selection=selection Common_Table_Spec.spec "[In-Memory] " table_builder=Table.new test_selection=selection
Test.group "Use First Row As Names" <| Test.group "Use First Row As Names" <|

View File

@ -0,0 +1,63 @@
from Standard.Base import all
import Standard.Base.Data.Ordering.Comparator
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
import Standard.Test
polyglot java import java.lang.ClassCastException
# === Test Resources ===
type Ord number
Ord.compare_to : Ord -> Ordering
Ord.compare_to that = that.number.compare_to this.number
type No_Ord number
# Tests
spec = Test.group "Object Comparator" <|
handle_classcast = Panic.catch ClassCastException handler=(Error.throw Vector.Incomparable_Values_Error)
default_comparator a b = handle_classcast <| Comparator.new.compare a b
case_insensitive a b = handle_classcast <| Comparator.for_text_ordering (Text_Ordering False Case_Insensitive) . compare a b
Test.specify "can compare numbers" <|
((default_comparator 1 2) < 0) . should_equal True
((default_comparator 1 1.2) < 0) . should_equal True
((default_comparator 1 1) == 0) . should_equal True
Test.specify "can compare booleans" <|
((default_comparator True False) > 0) . should_equal True
((default_comparator True True) == 0) . should_equal True
((default_comparator False False) == 0) . should_equal True
Test.specify "can compare Nothing and it ends up as lowest value" <|
((default_comparator 1 Nothing) > 0) . should_equal True
((default_comparator Nothing 1.235) < 0) . should_equal True
((default_comparator True Nothing) > 0) . should_equal True
((default_comparator Nothing False) < 0) . should_equal True
((default_comparator "A" Nothing) > 0) . should_equal True
((default_comparator Nothing "ZSA") < 0) . should_equal True
((default_comparator Nothing Nothing) == 0) . should_equal True
Test.specify "can compare Text with Enso standard defaults" <|
((default_comparator "A" "a") < 0) . should_equal True
((default_comparator "ABBA" "ABBA") == 0) . should_equal True
((default_comparator '\u00E9' '\u0065\u{301}') == 0) . should_equal True
Test.specify "can compare Text with case-insensitive comparisons" <|
((case_insensitive "A" "a") == 0) . should_equal True
((case_insensitive "ABBA" "abba") == 0) . should_equal True
((case_insensitive '\u00E9' '\u0065\u{301}') == 0) . should_equal True
Test.specify "can compare custom types" <|
((default_comparator (Ord 1) (Ord 0)) < 0) . should_equal True
((default_comparator (Ord 1) (Ord 1)) == 0) . should_equal True
Test.specify "should fail gracefully for incomparable items" <|
(default_comparator 1 True).should_fail_with Vector.Incomparable_Values_Error
(default_comparator (No_Ord 1) (No_Ord 2)).should_fail_with Vector.Incomparable_Values_Error
main = Test.Suite.run_main here.spec

View File

@ -5,9 +5,13 @@ import Standard.Base.Data.Ordering.Natural_Order
import Standard.Test import Standard.Test
spec = Test.group "Natural Order" <| spec = Test.group "Natural Order" <|
case_insensitive_compare a b = Natural_Order.compare a b Case_Insensitive
Test.specify "should behave as shown in examples" <| Test.specify "should behave as shown in examples" <|
Natural_Order.compare "a2" "a100" . should_equal Ordering.Less Natural_Order.compare "a2" "a100" . should_equal Ordering.Less
["a2", "a1", "a100", "a001", "a0001"].sort by=Natural_Order.compare . should_equal ["a0001", "a001", "a1", "a2", "a100"] ["a2", "a1", "a100", "a001", "a0001"].sort by=Natural_Order.compare . should_equal ["a0001", "a001", "a1", "a2", "a100"]
["A2", "a1", "A100", "A001", "a0001"].sort by=Natural_Order.compare . should_equal ["A001", "A2", "A100", "a0001", "a1"]
["A2", "a1", "A100", "A001", "a0001"].sort by=case_insensitive_compare . should_equal ["a0001", "A001", "a1", "A2", "A100"]
Test.specify "should correctly compare values" <| Test.specify "should correctly compare values" <|
Natural_Order.compare "a1" "a2" . should_equal Ordering.Less Natural_Order.compare "a1" "a2" . should_equal Ordering.Less

View File

@ -247,5 +247,4 @@ spec =
Statistics.covariance_matrix series . should_fail_with Illegal_Argument_Error Statistics.covariance_matrix series . should_fail_with Illegal_Argument_Error
Statistics.pearson_correlation series . should_fail_with Illegal_Argument_Error Statistics.pearson_correlation series . should_fail_with Illegal_Argument_Error
main = Test.Suite.run_main here.spec main = Test.Suite.run_main here.spec

View File

@ -93,8 +93,14 @@ spec =
'a\u0321\u0302'=='a\u0302\u0321' . should_be_true 'a\u0321\u0302'=='a\u0302\u0321' . should_be_true
'a\u0321\u0302'=='A\u0302\u0321' . should_be_false 'a\u0321\u0302'=='A\u0302\u0321' . should_be_false
accent_1+"a" . compare_to accent_2+"a" . should_equal Ordering.Equal
accent_1+"A" . compare_to accent_2+"a" . should_equal Ordering.Less
accent_1+"A" . compare_to_ignore_case accent_2+"a" . should_equal Ordering.Equal
accent_1+"a" . compare_to accent_2+"b" . should_equal Ordering.Less accent_1+"a" . compare_to accent_2+"b" . should_equal Ordering.Less
accent_1+"a" . compare_to_ignore_case accent_2+"B" . should_equal Ordering.Less
accent_2+"a" . compare_to accent_1+"b" . should_equal Ordering.Less accent_2+"a" . compare_to accent_1+"b" . should_equal Ordering.Less
accent_1+"a" . compare_to accent_2+"B" . should_equal Ordering.Greater
accent_1+"a" . compare_to_ignore_case accent_2+"B" . should_equal Ordering.Less
accent_1+"b" . compare_to accent_2+"a" . should_equal Ordering.Greater accent_1+"b" . compare_to accent_2+"a" . should_equal Ordering.Greater
accent_2+"b" . compare_to accent_1+"a" . should_equal Ordering.Greater accent_2+"b" . compare_to accent_1+"a" . should_equal Ordering.Greater

View File

@ -30,6 +30,9 @@ import project.Data.Noise.Generator_Spec as Noise_Generator_Spec
import project.Data.Noise_Spec import project.Data.Noise_Spec
import project.Data.Numbers_Spec import project.Data.Numbers_Spec
import project.Data.Ordering_Spec import project.Data.Ordering_Spec
import project.Data.Ordering.Comparator_Spec
import project.Data.Ordering.Natural_Order_Spec
import project.Data.Ordering.Vector_Lexicographic_Order_Spec
import project.Data.Range_Spec import project.Data.Range_Spec
import project.Data.Ref_Spec import project.Data.Ref_Spec
import project.Data.Text_Spec import project.Data.Text_Spec
@ -88,6 +91,9 @@ main = Test.Suite.run_main <|
Noise_Spec.spec Noise_Spec.spec
Numbers_Spec.spec Numbers_Spec.spec
Ordering_Spec.spec Ordering_Spec.spec
Comparator_Spec.spec
Natural_Order_Spec.spec
Vector_Lexicographic_Order_Spec.spec
Process_Spec.spec Process_Spec.spec
Python_Interop_Spec.spec Python_Interop_Spec.spec
R_Interop_Spec.spec R_Interop_Spec.spec