Add Fallback to Prim_Text_Helper.compile_regex; accept Regex in Text.parse_to_table (#7297)

This PR does three related things:
- Fails more gracefully when a non-string is passed to compile_regex
- Don't pass a non-string to compile_regex
- Allow a Regex param to parse_to_table
This commit is contained in:
GregoryTravis 2023-07-18 15:55:56 -04:00 committed by GitHub
parent fd0bdc86dd
commit 2fb5c3710b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 33 additions and 6 deletions

View File

@ -525,6 +525,7 @@
`date_part` and some shorthands. Extended `Time_Period` with milli-, micro-
and nanosecond periods.][7221]
- [Retire `Column_Selector` and allow regex based selection of columns.][7295]
- [`Text.parse_to_table` can take a `Regex`.][7297]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -755,6 +756,7 @@
[7234]: https://github.com/enso-org/enso/pull/7234
[7221]: https://github.com/enso-org/enso/pull/7221
[7295]: https://github.com/enso-org/enso/pull/7295
[7297]: https://github.com/enso-org/enso/pull/7297
#### Enso Compiler

View File

@ -103,7 +103,7 @@ Table.from_objects value fields=Nothing =
regex).
Arguments:
- pattern: The pattern used to search within the text.
- pattern: The regular expression as either `Text` or `Regex` to search within the text.
- case_sensitivity: Specifies if the text values should be compared case
sensitively.
- parse_values: Parse any values using the default value parser.
@ -114,8 +114,8 @@ Table.from_objects value fields=Nothing =
If the marked groups are named, the names will be used otherwise the column
will be named `Column <N>` where `N` is the number of the marked group.
(Group 0 is not included.)
Text.parse_to_table : Text -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Type_Error | Regex_Syntax_Error | Illegal_Argument
Text.parse_to_table self pattern case_sensitivity=Case_Sensitivity.Sensitive parse_values=True on_problems=Report_Warning =
Text.parse_to_table : Text | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Type_Error | Regex_Syntax_Error | Illegal_Argument
Text.parse_to_table self (pattern : Text | Regex) case_sensitivity=Case_Sensitivity.Sensitive parse_values=True on_problems=Report_Warning =
Parse_To_Table.parse_text_to_table self pattern case_sensitivity parse_values on_problems
## PRIVATE

View File

@ -15,10 +15,12 @@ from project.Errors import Duplicate_Output_Column_Names
Converts a Text into a Table using a regular expression pattern.
See Table.parse_text_to_table.
parse_text_to_table : Text -> Text -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Type_Error | Regex_Syntax_Error | Illegal_Argument
parse_text_to_table text pattern_string="." case_sensitivity=Case_Sensitivity.Sensitive parse_values=True on_problems=Report_Warning =
parse_text_to_table : Text | Regex -> Text -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Type_Error | Regex_Syntax_Error | Illegal_Argument
parse_text_to_table text regex_or_pattern_string="." case_sensitivity=Case_Sensitivity.Default parse_values=True on_problems=Report_Warning =
case_insensitive = case_sensitivity.is_case_insensitive_in_memory
pattern = Regex.compile pattern_string case_insensitive=case_insensitive
pattern = case regex_or_pattern_string of
_ : Regex -> regex_or_pattern_string.recompile case_sensitivity
_ : Text -> Regex.compile regex_or_pattern_string case_insensitive=case_insensitive
matches = pattern.match_all text
columns = case pattern.group_count == 1 of

View File

@ -2,6 +2,7 @@ package org.enso.interpreter.node.expression.builtin.text;
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.dsl.Fallback;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.exception.AbstractTruffleException;
import com.oracle.truffle.api.nodes.Node;
@ -45,6 +46,13 @@ public abstract class RegexCompileNode extends Node {
return compile(pattern.toString(), options.toString());
}
@Fallback
Object doOther(Object pattern, Object options) {
Builtins builtins = EnsoContext.get(this).getBuiltins();
Atom err = builtins.error().makeTypeError(builtins.text(), pattern, "pattern");
throw new PanicException(err, this);
}
@TruffleBoundary
Object compile(String pattern, String options) {
var ctx = EnsoContext.get(this);

View File

@ -1,6 +1,7 @@
from Standard.Base import all
import Standard.Base.Data.Text.Regex.Regex_Syntax_Error
import Standard.Base.Errors.Common.Type_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Table.Data.Table_Conversions
import Standard.Test.Extensions
@ -19,6 +20,12 @@ spec =
actual = "a 7 ab12 bt100 c12d20q 12".parse_to_table "[a-z]+\d*"
actual.should_equal expected
Test.specify "text_to_table with a regex" <|
expected = Table.from_rows ["Column"]
[["a"], ["ab12"], ["bt100"], ["c12"], ["d20"], ["q"]]
actual = "a 7 ab12 bt100 c12d20q 12".parse_to_table "[a-z]+\d*".to_regex
actual.should_equal expected
Test.group "Text.parse_to_table with groups" <|
Test.specify "with groups" <|
expected = Table.from_rows ["Column 1", "Column 2"]
@ -68,4 +75,7 @@ spec =
Test.specify "enpty pattern" <|
"abc".parse_to_table "" . should_fail_with Illegal_Argument
Test.specify "bad arg" <|
Test.expect_panic_with (actual = "a 7 ab12 bt100 c12d20q 12".parse_to_table 12) Type_Error
main = Test_Suite.run_main spec

View File

@ -33,6 +33,11 @@ spec =
Test.specify "should disallow empty patterns in `compile`" <|
Regex.compile "" . should_fail_with Illegal_Argument
Test.specify "passing a non-string should fail with a type error" <|
Test.expect_panic_with (Regex.compile 12) Type_Error
p = Regex.compile "[a-z]"
Test.expect_panic_with (Regex.compile p) Type_Error
Test.group "Escape" <|
Test.specify "should escape an expression for use as a literal" <|
Regex.escape "[a-z\d]+" . should_equal '\\[a-z\\d\\]\\+'