Add Is_Empty, Not_Empty, Like and Not_Like to Filter_Condition (#3775)

Implements https://www.pivotaltracker.com/story/show/183389890
This commit is contained in:
Radosław Waśko 2022-10-11 01:11:04 +02:00 committed by GitHub
parent af3ebccb39
commit 592a8516a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 466 additions and 45 deletions

View File

@ -207,6 +207,8 @@
- [Added `Date_Period.Week` to `start_of` and `end_of` methods.][3733]
- [Replaced `Table.where` with a new API relying on `Table.filter`.][3750]
- [Added `Filter_Condition` to `Vector`, `Range` and `List`.][3770]
- [Extended `Filter_Condition` with `Is_Empty`, `Not_Empty`, `Like` and
`Not_Like`.][3775]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -332,6 +334,7 @@
[3749]: https://github.com/enso-org/enso/pull/3749
[3750]: https://github.com/enso-org/enso/pull/3750
[3770]: https://github.com/enso-org/enso/pull/3770
[3775]: https://github.com/enso-org/enso/pull/3775
#### Enso Compiler

View File

@ -2039,7 +2039,7 @@ buildEngineDistribution := {
log.info(s"Engine package created at $root")
}
val stdBitsProjects = List("Base", "Database", "Google_Api", "Image", "Table")
val stdBitsProjects = List("Base", "Database", "Google_Api", "Image", "Table", "All")
val allStdBits: Parser[String] =
stdBitsProjects.map(v => v: Parser[String]).reduce(_ | _)
@ -2057,7 +2057,7 @@ buildStdLib := Def.inputTaskDyn {
}.evaluated
lazy val pkgStdLibInternal = inputKey[Unit]("Use `buildStdLib`")
pkgStdLibInternal := Def.inputTaskDyn {
pkgStdLibInternal := Def.inputTask {
val cmd = allStdBits.parsed
val root = engineDistributionRoot.value
val log: sbt.Logger = streams.value.log
@ -2073,15 +2073,27 @@ pkgStdLibInternal := Def.inputTaskDyn {
(`std-image` / Compile / packageBin).value
case "Table" =>
(`std-table` / Compile / packageBin).value
case "All" =>
(`std-base` / Compile / packageBin).value
(`std-table` / Compile / packageBin).value
(`std-database` / Compile / packageBin).value
(`std-image` / Compile / packageBin).value
(`std-google-api` / Compile / packageBin).value
case _ =>
}
StdBits.buildStdLibPackage(
cmd,
root,
cacheFactory,
log,
defaultDevEnsoVersion
)
val libs = if (cmd != "All") Seq(cmd) else {
val prefix = "Standard."
Editions.standardLibraries.filter(_.startsWith(prefix)).map(_.stripPrefix(prefix))
}
libs.foreach { lib =>
StdBits.buildStdLibPackage(
lib,
root,
cacheFactory,
log,
defaultDevEnsoVersion
)
}
}.evaluated
lazy val buildLauncherDistribution =

View File

@ -2,6 +2,8 @@ from Standard.Base import all
from Standard.Base.Data.Filter_Condition.Filter_Condition import all
polyglot java import org.enso.base.Regex_Utils
type Filter_Condition
## Is less than a value (or another column, in case of Table operations)?
Less than:Any
@ -57,6 +59,52 @@ type Filter_Condition
## Is the value equal to False (Boolean only)?
Is_False
## Is equal to "" or Nothing (Text only)?
Is_Empty
## Is not equal to "" and Nothing (Text only)?
Not_Empty
## Does the value match the SQL pattern (Text only)?
It accepts a Text value representing the matching pattern. In case of
Table operations, it can accept another column - then the corresponding
values from the source column and the provided column are checked.
The pattern is interpreted according to the standard SQL convention:
- the `%` character matches any sequence of characters,
- the `_` character matches any single character,
- any other character is matched literally.
! Known Bugs
There is a known bug in Java Regex where escape characters are not
handled properly in Unicode-normalized matching mode. Due to this
limitation, Unicode normalization has been disabled for this function,
so beware that some equivalent graphemes like 'ś' and 's\u0301' will
not be matched.
See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926
Like pattern:Text
## Does the value not match the SQL pattern (Text only)?
It accepts a Text value representing the matching pattern. In case of
Table operations, it can accept another column - then the corresponding
values from the source column and the provided column are checked.
The pattern is interpreted according to the standard SQL convention:
- the `%` character matches any sequence of characters,
- the `_` character matches any single character,
- any other character is matched literally.
! Known Bugs
There is a known bug in Java Regex where escape characters are not
handled properly in Unicode-normalized matching mode. Due to this
limitation, Unicode normalization has been disabled for this function,
so beware that some equivalent graphemes like 'ś' and 's\u0301' will
not be matched.
See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926
Not_Like pattern:Text
## Converts a `Filter_Condition` condition into a predicate taking an
element and returning a value indicating whether the element should be
accepted by the filter.
@ -80,3 +128,25 @@ type Filter_Condition
_ -> True
Is_True -> ==True
Is_False -> ==False
Is_Empty -> elem -> case elem of
Nothing -> True
"" -> True
_ -> False
Not_Empty -> elem -> case elem of
Nothing -> False
"" -> False
_ -> True
Like sql_pattern ->
regex = sql_like_to_regex sql_pattern
regex.matches
Not_Like sql_pattern ->
regex = sql_like_to_regex sql_pattern
elem -> regex.matches elem . not
## PRIVATE
sql_like_to_regex sql_pattern =
regex_pattern = Regex_Utils.sql_like_pattern_to_regex sql_pattern
## There is a bug with Java Regex in Unicode normalized mode (CANON_EQ) with quoting.
https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926
Once that bug is fixed, `match_ascii` may be set back to `False`.
Regex.compile regex_pattern dot_matches_newline=True match_ascii=True

View File

@ -113,6 +113,34 @@ type Column
to_sql : SQL_Statement
to_sql self = self.to_table.to_sql
## PRIVATE
Sets up an operation of arbitrary arity.
Arguments:
- op_kind: The kind of the operation
- operands: A vector of additional operation arguments (the column itself
is always passed as the first argument).
- new_type: The type of the SQL column that results from applying the
operator. If not specified, the type of this column is used.
- operand_types: The SQL types of the additional arguments. They are used
if additional arguments are constants (and if not provided, the type of
this column is used). If the other argument is a column, its type is
used.
make_op self op_kind operands new_type=Nothing operand_types=Nothing =
prepare_operand operand operand_type = case operand of
other_column : Column ->
if Helpers.check_integrity self other_column then other_column.expression else
Error.throw <| Unsupported_Database_Operation_Error "Cannot use columns coming from different contexts in one expression without a join."
constant ->
actual_operand_type = operand_type.if_nothing self.sql_type
Expression.Constant actual_operand_type constant
actual_operand_types = operand_types.if_nothing (Vector.fill operands.length Nothing)
expressions = operands.zip actual_operand_types prepare_operand
actual_new_type = new_type.if_nothing self.sql_type
new_expr = Expression.Operation op_kind ([self.expression] + expressions)
Column.Value self.name self.connection actual_new_type new_expr self.context
## PRIVATE
Creates a binary operation with given kind and operand.
@ -129,20 +157,7 @@ type Column
defaults to the current type if not provided.
make_binary_op : Text -> Text -> (Column | Any) -> (SQL_Type | Nothing) -> (SQL_Type | Nothing) -> Column
make_binary_op self op_kind operand new_type=Nothing operand_type=Nothing =
actual_new_type = new_type.if_nothing self.sql_type
case operand of
Column.Value _ _ _ other_expr _ ->
case Helpers.check_integrity self operand of
False ->
Error.throw <| Unsupported_Database_Operation_Error "Cannot compare columns coming from different contexts. Only columns of a single table can be compared."
True ->
new_expr = Expression.Operation op_kind [self.expression, other_expr]
Column.Value self.name self.connection actual_new_type new_expr self.context
_ ->
actual_operand_type = operand_type.if_nothing self.sql_type
other = Expression.Constant actual_operand_type operand
new_expr = Expression.Operation op_kind [self.expression, other]
Column.Value self.name self.connection actual_new_type new_expr self.context
self.make_op op_kind [operand] new_type [operand_type]
## PRIVATE
@ -153,10 +168,7 @@ type Column
- new_type: The type of the SQL column that results from applying the
operator.
make_unary_op : Text -> Text -> (SQL_Type | Nothing) -> Column
make_unary_op self op_kind new_type=Nothing =
actual_new_type = new_type.if_nothing self.sql_type
new_expr = Expression.Operation op_kind [self.expression]
Column.Value self.name self.connection actual_new_type new_expr self.context
make_unary_op self op_kind new_type=Nothing = self.make_op op_kind [] new_type
## UNSTABLE
@ -314,6 +326,22 @@ type Column
< : Column | Any -> Column
< self other = self.make_binary_op "<" other new_type=SQL_Type.boolean
## Element-wise inclusive bounds check.
Arguments:
- lower: The lower bound to compare elements of `self` against. If
`lower` is a column, the comparison is performed pairwise between
corresponding elements of `self` and `lower`.
- upper: The upper bound to compare elements of `self` against. If
`upper` is a column, the comparison is performed pairwise between
corresponding elements of `self` and `upper`.
Returns a column with boolean values indicating whether values of this
column fit between the lower and upper bounds (both ends inclusive).
between : (Column | Any) -> (Column | Any) -> Column
between self lower upper =
self.make_op "BETWEEN" [lower, upper] new_type=SQL_Type.boolean
## UNSTABLE
Element-wise addition.
@ -407,6 +435,12 @@ type Column
is_missing : Column
is_missing self = self.make_unary_op "ISNULL" new_type=SQL_Type.boolean
## PRIVATE
Returns a column of booleans, with `True` items at the positions where
this column contains an empty string or `Nothing`.
is_empty : Column
is_empty self = self.make_unary_op "ISEMPTY" new_type=SQL_Type.boolean
## UNSTABLE
Returns a new column where missing values have been replaced with the
@ -517,6 +551,11 @@ type Column
contains : Column | Text -> Column
contains self other = self.make_binary_op "contains" other new_type=SQL_Type.boolean
## PRIVATE
Checks for each element of the column if it matches an SQL-like pattern.
like : Column | Text -> Column
like self other = self.make_binary_op "LIKE" other new_type=SQL_Type.boolean
## PRIVATE
as_internal : Internal_Column
as_internal self = Internal_Column.Value self.name self.sql_type self.expression

View File

@ -168,15 +168,39 @@ base_dialect =
bin = name -> [name, make_binary_op name]
unary = name -> [name, make_unary_op name]
fun = name -> [name, make_function name]
arith = [bin "+", bin "-", bin "*", bin "/"]
logic = [bin "AND", bin "OR", unary "NOT"]
compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">="]
compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]]
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
text = [["ISEMPTY", make_is_empty], bin "LIKE"]
nulls = [["ISNULL", make_right_unary_op "IS NULL"], ["FILLNULL", make_function "COALESCE"]]
base_map = Map.from_vector (arith + logic + compare + agg + nulls + counts)
base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls)
Internal_Dialect.Value base_map wrap_in_quotes
## PRIVATE
make_is_empty : Vector Builder -> Builder
make_is_empty arguments = case arguments.length of
1 ->
arg = arguments.at 0
is_null = (arg ++ " IS NULL").paren
is_empty = (arg ++ " = ''").paren
(is_null ++ " OR " ++ is_empty).paren
_ ->
Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation ISEMPTY")
## PRIVATE
make_between : Vector Builder -> Builder
make_between arguments = case arguments.length of
3 ->
expr = arguments.at 0
lower = arguments.at 1
upper = arguments.at 2
(expr ++ " BETWEEN " ++ lower ++ " AND " ++ upper).paren
_ ->
Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation BETWEEN")
## PRIVATE
Builds code for an expression.

View File

@ -247,6 +247,21 @@ type Column
< : Column | Any -> Column
< self other = run_vectorized_binary_op self "<" (<) other
## Element-wise inclusive bounds check.
Arguments:
- lower: The lower bound to compare elements of `self` against. If
`lower` is a column, the comparison is performed pairwise between
corresponding elements of `self` and `lower`.
- upper: The upper bound to compare elements of `self` against. If
`upper` is a column, the comparison is performed pairwise between
corresponding elements of `self` and `upper`.
Returns a column with boolean values indicating whether values of this
column fit between the lower and upper bounds (both ends inclusive).
between : (Column | Any) -> (Column | Any) -> Column
between self lower upper = (self >= lower) && (self <= upper)
## ALIAS Add Columns
Element-wise addition.
@ -444,6 +459,12 @@ type Column
is_missing : Column
is_missing self = run_vectorized_unary_op self "is_missing" (== Nothing)
## PRIVATE
Returns a column of booleans, with `True` items at the positions where
this column contains an empty string or `Nothing`.
is_empty : Column
is_empty self = run_vectorized_unary_op self "is_empty" Filter_Condition.Is_Empty.to_predicate
## Returns a column of booleans, with `True` items at the positions where
this column does not contain a `Nothing`.
@ -564,6 +585,12 @@ type Column
contains self other =
run_vectorized_binary_op self "contains" (a -> b -> a.contains b) other
## PRIVATE
Checks for each element of the column if it matches an SQL-like pattern.
like : Column | Text -> Column
like self other =
run_vectorized_binary_op self "like" (_ -> _ -> Error.throw (Illegal_State_Error "The `Like` operation should only be used on Text columns.")) other
## ALIAS Transform Column
Applies `function` to each item in this column and returns the column

View File

@ -12,13 +12,24 @@ from Standard.Base.Data.Filter_Condition.Filter_Condition import all
It also performs validation and will throw errors if unexpected column types
are encountered.
make_filter_column source_column filter_condition = case filter_condition of
# Equality
Equal value -> (source_column == value)
Not_Equal value -> (source_column != value)
# Nothing
Is_Nothing -> source_column.is_missing
Not_Nothing -> source_column.is_missing.not
# Boolean
Is_True ->
Value_Type.expect_boolean source_column.value_type <| source_column
Is_False ->
Value_Type.expect_boolean source_column.value_type <| source_column.not
# Comparisons
Less value -> (source_column < value)
Equal_Or_Less value -> (source_column <= value)
Equal value -> (source_column == value)
Equal_Or_Greater value -> (source_column >= value)
Greater value -> (source_column > value)
Not_Equal value -> (source_column != value)
Between lower upper -> ((source_column >= lower) && (source_column <= upper))
Between lower upper -> source_column.between lower upper
# Text
Starts_With prefix ->
Value_Type.expect_text source_column.value_type <|
expect_column_or_value_as_text "prefix" prefix <|
@ -31,12 +42,20 @@ make_filter_column source_column filter_condition = case filter_condition of
Value_Type.expect_text source_column.value_type <|
expect_column_or_value_as_text "substring" substring <|
source_column.contains substring
Is_Nothing -> source_column.is_missing
Not_Nothing -> source_column.is_missing.not
Is_True ->
Value_Type.expect_boolean source_column.value_type <| source_column
Is_False ->
Value_Type.expect_boolean source_column.value_type <| source_column.not
Is_Empty ->
Value_Type.expect_text source_column.value_type <|
source_column.is_empty
Not_Empty ->
Value_Type.expect_text source_column.value_type <|
source_column.is_empty.not
Like pattern ->
Value_Type.expect_text source_column.value_type <|
expect_column_or_value_as_text "pattern" pattern <|
source_column.like pattern
Not_Like pattern ->
Value_Type.expect_text source_column.value_type <|
expect_column_or_value_as_text "pattern" pattern <|
source_column.like pattern . not
## PRIVATE
expect_column_or_value_as_text field_name column_or_value ~action = case column_or_value of

View File

@ -132,7 +132,7 @@ object StdBits {
cacheFactory: sbt.util.CacheStoreFactory,
log: sbt.Logger,
defaultDevEnsoVersion: String
) = Def.task {
) = {
log.info(s"Building standard library package for '$name'")
val prefix = "Standard"
val targetPkgRoot = root / "lib" / prefix / name / defaultDevEnsoVersion

View File

@ -70,4 +70,42 @@ public class Regex_Utils {
}
return allMatches.toArray(new String[0]);
}
/**
* Converts a SQL-like pattern into a Regex with the same semantics.
*
* <p>Special regex characters present in the input pattern are quoted to match them literally
* according to the SQL-like format.
*/
public static String sql_like_pattern_to_regex(String sql_pattern) {
StringBuilder result = new StringBuilder();
// Accumulates the intermittent characters between wildcards. These will be quoted in bulk.
StringBuilder acc = new StringBuilder();
for (int i = 0; i < sql_pattern.length(); ++i) {
char c = sql_pattern.charAt(i);
if (c == '%' || c == '_') {
// Before inserting the converted wildcard, we append the accumulated characters, quoting
// them first.
if (acc.length() > 0) {
result.append(Pattern.quote(acc.toString()));
acc.setLength(0);
}
if (c == '%') {
result.append(".*");
} else {
result.append(".");
}
} else {
acc.append(c);
}
}
// If any trailing characters were left, we append them too.
if (acc.length() > 0) {
result.append(Pattern.quote(acc.toString()));
}
return result.toString();
}
}

View File

@ -0,0 +1,60 @@
package org.enso.table.data.column.operation.map.text;
import java.util.BitSet;
import java.util.regex.Pattern;
import com.ibm.icu.impl.UnicodeRegex;
import org.enso.base.Regex_Utils;
import org.enso.table.data.column.storage.BoolStorage;
import org.enso.table.data.column.storage.SpecializedStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.error.UnexpectedTypeException;
public class LikeOp extends StringBooleanOp {
public LikeOp() {
super(Storage.Maps.LIKE);
}
/**
* There is <a href="https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926">a bug with Java Regex in Unicode normalized mode (CANON_EQ) with quoting</a>.
* Once that bug is fixed, we should add all relevant Unicode flags here too,
* consistently with the Default Enso regex engine.
*/
private final static int REGEX_FLAGS = Pattern.DOTALL;
private Pattern createRegexPatternFromSql(String sqlPattern) {
String regex = Regex_Utils.sql_like_pattern_to_regex(sqlPattern);
String unicodeTransformed = UnicodeRegex.fix(regex);
return Pattern.compile(unicodeTransformed, REGEX_FLAGS);
}
@Override
protected boolean doString(String a, String b) {
return createRegexPatternFromSql(b).matcher(a).matches();
}
@Override
public Storage runMap(SpecializedStorage<String> storage, Object arg) {
if (arg == null) {
BitSet newVals = new BitSet();
BitSet newMissing = new BitSet();
newMissing.set(0, storage.size());
return new BoolStorage(newVals, newMissing, storage.size(), false);
} else if (arg instanceof String argString) {
Pattern pattern = createRegexPatternFromSql(argString);
BitSet newVals = new BitSet();
BitSet newMissing = new BitSet();
for (int i = 0; i < storage.size(); i++) {
if (storage.isNa(i)) {
newMissing.set(i);
} else if (pattern.matcher(storage.getItem(i)).matches()) {
newVals.set(i);
}
}
return new BoolStorage(newVals, newMissing, storage.size(), false);
} else {
throw new UnexpectedTypeException("a Text");
}
}
}

View File

@ -78,9 +78,11 @@ public abstract class Storage {
public static final String AND = "&&";
public static final String OR = "||";
public static final String IS_MISSING = "is_missing";
public static final String IS_EMPTY = "is_empty";
public static final String STARTS_WITH = "starts_with";
public static final String ENDS_WITH = "ends_with";
public static final String CONTAINS = "contains";
public static final String LIKE = "like";
}
public static final class Aggregators {

View File

@ -5,6 +5,8 @@ import org.enso.base.Text_Utils;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.MapOperation;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.text.LikeOp;
import org.enso.table.data.column.operation.map.text.StringBooleanOp;
import org.graalvm.polyglot.Value;
@ -93,6 +95,20 @@ public class StringStorage extends SpecializedStorage<String> {
return new BoolStorage(r, missing, storage.size(), false);
}
});
t.add(
new UnaryMapOperation<>(Maps.IS_EMPTY) {
@Override
protected Storage run(SpecializedStorage<String> storage) {
BitSet r = new BitSet();
for (int i = 0; i < storage.size; i++) {
String s = storage.data[i];
if (s == null || s.isEmpty()) {
r.set(i);
}
}
return new BoolStorage(r, new BitSet(), storage.size, false);
}
});
t.add(
new StringBooleanOp(Maps.STARTS_WITH) {
@Override
@ -114,6 +130,7 @@ public class StringStorage extends SpecializedStorage<String> {
return Text_Utils.contains(a, b);
}
});
t.add(new LikeOp());
return t;
}
}

View File

@ -16,7 +16,7 @@ import Standard.Test.Problems
from project.Util import all
type Test_Selection
Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True
Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False
## A common test suite for shared operations on the Table API.
@ -1118,8 +1118,8 @@ spec prefix table_builder test_selection pending=Nothing =
t.filter "X" (Filter_Condition.Equal to=(t.at "Y")) . at "X" . to_vector . should_equal ["b", "c"]
t.filter "X" (Filter_Condition.Between (t.at "Y") "bzzzz") . at "X" . to_vector . should_equal ["abb", "baca", "b"]
Test.specify "by text search (contains, starts_with, ends_with)" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "baca", "banana", Nothing, "nana"]], ["Y", ["a", "b", "b", "c", "a"]]]
Test.specify "by text search (contains, starts_with, ends_with, like)" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "baca", "banana", Nothing, "nana"]], ["Y", ["a", "b", "b", "c", "a"]], ["Z", ["aaaaa", "bbbbb", "[ab]", "[ab]aaaa", "[ab]ccc"]]]
t.filter "X" (Filter_Condition.Starts_With "ba") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana"]
t.filter "X" (Filter_Condition.Ends_With "na") on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"]
@ -1129,8 +1129,51 @@ spec prefix table_builder test_selection pending=Nothing =
t.filter "X" (Filter_Condition.Ends_With (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["nana"]
t.filter "X" (Filter_Condition.Contains (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "baca", "banana", "nana"]
t.filter "X" (Filter_Condition.Like "%an%") on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"]
t.filter "X" (Filter_Condition.Like "_a%") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana", "nana"]
t.filter "X" (Filter_Condition.Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["abb"]
t.filter "X" (Filter_Condition.Like "nana") on_problems=Report_Error . at "X" . to_vector . should_equal ["nana"]
t.filter "Z" (Filter_Condition.Like "[ab]_%") on_problems=Report_Error . at "Z" . to_vector . should_equal ["[ab]aaaa", "[ab]ccc"]
t.filter "X" (Filter_Condition.Not_Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana", "nana"]
t.filter "Z" (Filter_Condition.Not_Like "[ab]%") on_problems=Report_Error . at "Z" . to_vector . should_equal ["aaaaa", "bbbbb"]
Test.specify "text operations should also match newlines" <|
t = table_builder [["X", ['a\n\n\n', 'a\n', 'a\n\n\nb', 'a\nb', 'caa\nbb']]]
t.filter "X" (Filter_Condition.Like 'a_') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n']
t.filter "X" (Filter_Condition.Like 'a%') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n\n\n', 'a\n', 'a\n\n\nb', 'a\nb']
t.filter "X" (Filter_Condition.Like 'a_b') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\nb']
t.filter "X" (Filter_Condition.Like '%\nb') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n\n\nb', 'a\nb']
t.filter "X" (Filter_Condition.Contains '\nb') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n\n\nb', 'a\nb', 'caa\nbb']
t.filter "X" (Filter_Condition.Ends_With '\nb') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n\n\nb', 'a\nb']
t.filter "X" (Filter_Condition.Ends_With '\n') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n\n\n', 'a\n']
t.filter "X" (Filter_Condition.Starts_With 'c') on_problems=Report_Error . at "X" . to_vector . should_equal ['caa\nbb']
if test_selection.supports_unicode_normalization then
t = table_builder [["X", ['śnieg', 's\u0301nieg', 'X', Nothing, 'połać', 'połac\u0301']]]
Test.specify "text operations should support Unicode normalization" <|
t.filter "X" (Filter_Condition.Starts_With 'ś') on_problems=Report_Error . at "X" . to_vector . should_equal ['śnieg', 's\u0301nieg']
t.filter "X" (Filter_Condition.Contains 'ś') on_problems=Report_Error . at "X" . to_vector . should_equal ['śnieg', 's\u0301nieg']
t.filter "X" (Filter_Condition.Ends_With 'ś') on_problems=Report_Error . at "X" . to_vector . should_equal []
t.filter "X" (Filter_Condition.Ends_With 'ć') on_problems=Report_Error . at "X" . to_vector . should_equal ['połać', 'połac\u0301']
# This should be replaced with the disabled test below, once the related bug is fixed.
t.filter "X" (Filter_Condition.Like 'ś%') on_problems=Report_Error . at "X" . to_vector . should_equal ['śnieg']
# This test is split off just to mark is as pending, once resolved it can be merged with the one above.
Test.specify "text operations should support Unicode normalization (like)" pending='There is a bug with Java Regex in Unicode normalized mode (CANON_EQ) with quoting.\nhttps://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926' <|
t.filter "X" (Filter_Condition.Like 'ś%') on_problems=Report_Error . at "X" . to_vector . should_equal ['śnieg', 's\u0301nieg']
Test.specify "by empty text" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "", " ", Nothing, "nana"]]]
t.filter "X" Filter_Condition.Is_Empty on_problems=Report_Error . at "X" . to_vector . should_equal ["", Nothing]
t.filter "X" Filter_Condition.Not_Empty on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", " ", "nana"]
Test.specify "should check types for text operations" <|
t = table_builder [["ix", [1, 2, 3, 4]], ["X", [Nothing, "A", "", " "]]]
check_column_type_error_handling action =
tester = check_empty ["ix", "X", "Y"]
tester = check_empty ["ix", "X"]
check_problem problem =
problem.should_be_a Invalid_Value_Type.Invalid_Value_Type_Data
problem.expected . should_equal Value_Type.Char
@ -1142,17 +1185,27 @@ spec prefix table_builder test_selection pending=Nothing =
check_column_type_error_handling (t.filter "X" (Filter_Condition.Starts_With (t.at "ix")) on_problems=_)
check_column_type_error_handling (t.filter "X" (Filter_Condition.Ends_With (t.at "ix")) on_problems=_)
check_column_type_error_handling (t.filter "X" (Filter_Condition.Contains (t.at "ix")) on_problems=_)
check_column_type_error_handling (t.filter "X" (Filter_Condition.Like (t.at "ix")) on_problems=_)
check_column_type_error_handling (t.filter "X" (Filter_Condition.Not_Like (t.at "ix")) on_problems=_)
check_column_type_error_handling (t.filter "ix" (Filter_Condition.Starts_With "A") on_problems=_)
check_column_type_error_handling (t.filter "ix" (Filter_Condition.Ends_With "A") on_problems=_)
check_column_type_error_handling (t.filter "ix" (Filter_Condition.Contains "A") on_problems=_)
check_column_type_error_handling (t.filter "ix" (Filter_Condition.Like "A") on_problems=_)
check_column_type_error_handling (t.filter "ix" (Filter_Condition.Not_Like "A") on_problems=_)
check_column_type_error_handling (t.filter "ix" Filter_Condition.Is_Empty on_problems=_)
check_column_type_error_handling (t.filter "ix" Filter_Condition.Not_Empty on_problems=_)
check_scalar_type_error_handling name action =
tester = check_empty ["ix", "X", "Y"]
tester = check_empty ["ix", "X"]
problems = [Type_Error_Data Text Integer name]
Problems.test_problem_handling action problems tester
check_scalar_type_error_handling "prefix" (t.filter "X" (Filter_Condition.Starts_With 42) on_problems=_)
check_scalar_type_error_handling "suffix" (t.filter "X" (Filter_Condition.Ends_With 42) on_problems=_)
check_scalar_type_error_handling "substring" (t.filter "X" (Filter_Condition.Contains 42) on_problems=_)
check_scalar_type_error_handling "pattern" (t.filter "X" (Filter_Condition.Like 42) on_problems=_)
check_scalar_type_error_handling "pattern" (t.filter "X" (Filter_Condition.Not_Like 42) on_problems=_)
Test.specify "by nulls" <|
t = table_builder [["ix", [1, 2, 3, 4]], ["X", [Nothing, 1, Nothing, 4]]]
@ -1169,6 +1222,8 @@ spec prefix table_builder test_selection pending=Nothing =
t.filter "b" on_problems=Report_Error . at "ix" . to_vector . should_equal [1, 4, 5]
t.filter "b" Filter_Condition.Is_False on_problems=Report_Error . at "ix" . to_vector . should_equal [2]
Test.specify "should check types of boolean operations" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]]
tester = check_empty ["ix", "b"]
check_problem problem =
problem.should_be_a Invalid_Value_Type.Invalid_Value_Type_Data

View File

@ -98,6 +98,16 @@ spec =
c2 = t1.filter (t1.at "A" == t1.at "C") . at "B"
c2.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" WHERE ("T1"."A" = "T1"."C")', []]
Test.specify "should generate a single BETWEEN expression" <|
t2 = t1.filter "A" (Filter_Condition.Between 10 20)
t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN ? AND ?)', [[10, int], [20, int]]]
t3 = t1.filter "A" (Filter_Condition.Between (t1.at "B") (t1.at "C"))
t3.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN "T1"."B" AND "T1"."C")', []]
t4 = t1.filter "A" (Filter_Condition.Between (t1.at "B") 33)
t4.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN "T1"."B" AND ?)', [[33, int]]]
Test.group "[Codegen] Joining Tables" <|
t2 = test_connection.query (SQL_Query.Table_Name "T2")
t3 = test_connection.query (SQL_Query.Table_Name "T3")

View File

@ -670,7 +670,7 @@ spec =
t_3 = Table.new [c_3_1, c_3_2, c_3_3]
t_3.default_visualization.should_equal Visualization.Id.table
selection = Common_Table_Spec.Test_Selection.Config supports_case_sensitive_columns=True order_by=True natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True
selection = Common_Table_Spec.Test_Selection.Config supports_case_sensitive_columns=True order_by=True natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True supports_unicode_normalization=True
Common_Table_Spec.spec "[In-Memory] " table_builder=Table.new test_selection=selection
Test.group "Use First Row As Names" <|

View File

@ -49,6 +49,7 @@ spec = Test.group "List" <|
list.filter (Filter_Condition.Greater than=3) . should_equal [4, 5].to_list
list.filter (Filter_Condition.Less than=3.5) . should_equal [1, 2, 3].to_list
list.filter (Filter_Condition.Equal to=3) . should_equal (Cons 3 Nil)
list.filter (Filter_Condition.Not_Equal to=3) . should_equal [1, 2, 4, 5].to_list
list.filter (Filter_Condition.Equal_Or_Greater than=3) . should_equal [3, 4, 5].to_list
list.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal Nil
list.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4].to_list
@ -69,6 +70,13 @@ spec = Test.group "List" <|
txt.filter (Filter_Condition.Between "b" "c") . should_equal ["bbb", "baaa"].to_list
Test.expect_panic_with (txt.filter (Filter_Condition.Starts_With 42)) Unsupported_Argument_Types_Data
["", Nothing, " ", "a"].to_list.filter (Filter_Condition.Is_Empty) . should_equal ["", Nothing].to_list
["", Nothing, " ", "a"].to_list.filter (Filter_Condition.Not_Empty) . should_equal [" ", "a"].to_list
["abab", "aaabaaaa", "ba"].to_list.filter (Filter_Condition.Like "ba") . should_equal (Cons "ba" Nil)
["abab", "aaabaaaa"].to_list.filter (Filter_Condition.Like "_ba_") . should_equal ["abab"].to_list
["abab", "aaabaaaa"].to_list.filter (Filter_Condition.Like "%ba__%") . should_equal ["aaabaaaa"].to_list
["abab", "aaabaaaa"].to_list.filter (Filter_Condition.Not_Like "%ba%") . should_equal Nil
mixed = [1, Nothing, "b"].to_list
mixed.filter Filter_Condition.Is_Nothing . should_equal (Cons Nothing Nil)
mixed.filter Filter_Condition.Not_Nothing . should_equal (Cons 1 (Cons "b" Nil))

View File

@ -72,16 +72,21 @@ spec = Test.group "Range" <|
range.filter (Filter_Condition.Greater than=3) . should_equal [4, 5]
range.filter (Filter_Condition.Less than=3.5) . should_equal [1, 2, 3]
range.filter (Filter_Condition.Equal to=3) . should_equal [3]
range.filter (Filter_Condition.Not_Equal to=3) . should_equal [1, 2, 4, 5]
range.filter (Filter_Condition.Equal_Or_Greater than=3) . should_equal [3, 4, 5]
range.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal []
range.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4]
range.filter (Filter_Condition.Between 2.1 4.5) . should_equal [3, 4]
Test.expect_panic_with (range.filter (Filter_Condition.Starts_With "a")) No_Such_Method_Error_Data
Test.expect_panic_with (range.filter (Filter_Condition.Like "a%")) Unsupported_Argument_Types_Data
Test.expect_panic_with (range.filter (Filter_Condition.Not_Like "a_")) Unsupported_Argument_Types_Data
range.filter Filter_Condition.Is_True . should_equal []
range.filter Filter_Condition.Is_False . should_equal []
range.filter Filter_Condition.Is_Nothing . should_equal []
range.filter Filter_Condition.Not_Nothing . should_equal [1, 2, 3, 4, 5]
range.filter Filter_Condition.Is_Empty . should_equal []
range.filter Filter_Condition.Not_Empty . should_equal [1, 2, 3, 4, 5]
Test.specify "should allow iteration" <|
vec_mut = Vector.new_builder

View File

@ -144,6 +144,9 @@ spec = Test.group "Vectors" <|
vec.filter (Filter_Condition.Greater than=3) . should_equal [4, 5]
vec.filter (Filter_Condition.Less than=3.5) . should_equal [1, 2, 3]
vec.filter (Filter_Condition.Equal to=3) . should_equal [3]
vec.filter (Filter_Condition.Equal to=3.0) . should_equal [3]
vec.filter (Filter_Condition.Equal to=3.1) . should_equal []
vec.filter (Filter_Condition.Not_Equal to=3) . should_equal [1, 2, 4, 5]
vec.filter (Filter_Condition.Equal_Or_Greater than=3) . should_equal [3, 4, 5]
vec.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal []
vec.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4]
@ -165,9 +168,38 @@ spec = Test.group "Vectors" <|
txtvec.filter (Filter_Condition.Between "b" "c") . should_equal ["bbb", "baaa"]
Test.expect_panic_with (txtvec.filter (Filter_Condition.Starts_With 42)) Unsupported_Argument_Types_Data
["", Nothing, " ", "a"].filter (Filter_Condition.Is_Empty) . should_equal ["", Nothing]
["", Nothing, " ", "a"].filter (Filter_Condition.Not_Empty) . should_equal [" ", "a"]
["abab", "aaabaaaa", "ba"].filter (Filter_Condition.Like "ba") . should_equal ["ba"]
["abab", "aaabaaaa"].filter (Filter_Condition.Like "_ba_") . should_equal ["abab"]
["abab", "aaabaaaa"].filter (Filter_Condition.Like "%ba__%") . should_equal ["aaabaaaa"]
["aaaa", "bbbbb", "[ab]aaaa"].filter (Filter_Condition.Like "[ab]%") . should_equal ["[ab]aaaa"]
["a\Qa\Eabb", "aaabb"].filter (Filter_Condition.Like "_\Qa\Ea%") . should_equal ["a\Qa\Eabb"]
["f.txt", "abc.*"].filter (Filter_Condition.Like "%.*") . should_equal ["abc.*"]
["f.txt", "abc.*"].filter (Filter_Condition.Not_Like "%.*") . should_equal ["f.txt"]
txt2 = ['a\n\n\n', 'a\n', 'a\n\n\nb', 'a\nb', 'caa\nbb']
txt2.filter (Filter_Condition.Like 'a_') . should_equal ['a\n']
txt2.filter (Filter_Condition.Like 'a%') . should_equal ['a\n\n\n', 'a\n', 'a\n\n\nb', 'a\nb']
txt2.filter (Filter_Condition.Like 'a_b') . should_equal ['a\nb']
txt2.filter (Filter_Condition.Like '%\nb') . should_equal ['a\n\n\nb', 'a\nb']
txt3 = ['śnieg', 's\u0301nieg', 'X', 'połać', 'połac\u0301']
txt3.filter (Filter_Condition.Starts_With 'ś') . should_equal ['śnieg', 's\u0301nieg']
txt3.filter (Filter_Condition.Contains 'ś') . should_equal ['śnieg', 's\u0301nieg']
txt3.filter (Filter_Condition.Ends_With 'ś') . should_equal []
txt3.filter (Filter_Condition.Ends_With 'ć') . should_equal ['połać', 'połac\u0301']
## There is a bug with Java Regex in Unicode normalized mode (CANON_EQ) with quoting.
https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926
# txt3.filter (Filter_Condition.Like 'ś%') . should_equal ['śnieg', 's\u0301nieg']
# This should be replaced with the disabled test above, once the related bug is fixed.
txt3.filter (Filter_Condition.Like 'ś%') . should_equal ['śnieg']
mixed = [1, Nothing, "b"]
mixed.filter Filter_Condition.Is_Nothing . should_equal [Nothing]
mixed.filter Filter_Condition.Not_Nothing . should_equal [1, "b"]
mixed.filter Filter_Condition.Is_Empty . should_equal [Nothing]
mixed.filter Filter_Condition.Not_Empty . should_equal [1, "b"]
boolvec = [True, False, Nothing, True]
boolvec.filter Filter_Condition.Is_True . should_equal [True, True]