Implement better splitting for Text (#1298)

This commit is contained in:
Ara Adkins 2020-11-19 13:28:03 +00:00 committed by GitHub
parent ab2c5ed097
commit fbe1f4c439
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 511 additions and 115 deletions

View File

@ -1,6 +1,8 @@
from Base import all
from Builtins import Text, Prim_Text_Helpers
import Base.Data.Text.Split_Kind
from Builtins export Text
polyglot java import com.ibm.icu.text.BreakIterator
@ -69,9 +71,13 @@ Text.characters =
"ham,eggs,cheese,tomatoes".split ","
The code above returns:
["ham", "eggs", "cheese", "tomatoes"]
Text.split : Text -> Vector.Vector
Text.split (separator = " ") =
Vector.from_polyglot_array (Text_Utils.split_at [this, separator])
Text.split : Split_Kind -> Vector.Vector
Text.split (separator = Split_Kind.Whitespace) =
result = case separator of
Split_Kind.Whitespace -> Text_Utils.split_on_whitespace [this]
Split_Kind.Lines -> Text_Utils.split_on_lines [this]
Text -> Text_Utils.split_by_literal [this, separator]
Vector.from_polyglot_array result
## Checks whether `this` is equal to `that`.

View File

@ -0,0 +1,11 @@
from Base import all
## The type of split for splitting text.
type Split_Kind
## Split on unicode whitespace.
type Whitespace
## Split into lines.
type Lines
## Split on a literal.
Text

View File

@ -0,0 +1,48 @@
package org.enso.interpreter.node.controlflow;
import com.oracle.truffle.api.RootCallTarget;
import com.oracle.truffle.api.dsl.Fallback;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.nodes.NodeInfo;
import com.oracle.truffle.api.profiles.ConditionProfile;
import org.enso.interpreter.runtime.callable.atom.Atom;
import org.enso.interpreter.runtime.callable.atom.AtomConstructor;
import org.enso.interpreter.runtime.data.Array;
@NodeInfo(shortName = "ArrayMatch", description = "Allows matching on the Array type.")
public abstract class ArrayBranchNode extends BranchNode {
private final AtomConstructor array;
private final ConditionProfile profile = ConditionProfile.createCountingProfile();
ArrayBranchNode(AtomConstructor array, RootCallTarget branch) {
super(branch);
this.array = array;
}
/**
* Create a new node to handle matching with the Array constructor.
*
* @param array the constructor used for matching in this case
* @param branch the code to execute in this case
* @return an array branch node
*/
public static ArrayBranchNode build(AtomConstructor array, RootCallTarget branch) {
return ArrayBranchNodeGen.create(array, branch);
}
@Specialization
void doConstructor(VirtualFrame frame, Object state, Atom target) {
if (profile.profile(array == target.getConstructor())) {
accept(frame, state, target.getFields());
}
}
@Specialization
void doArray(VirtualFrame frame, Object state, Array target) {
accept(frame, state, new Object[0]);
}
@Fallback
void doFallback(VirtualFrame frame, Object state, Object target) {}
}

View File

@ -23,11 +23,10 @@ import org.enso.interpreter.runtime.type.TypesGen;
public abstract class BooleanBranchNode extends BranchNode {
private final boolean matched;
private final ConditionProfile profile = ConditionProfile.createCountingProfile();
private @Child DirectCallNode callNode;
BooleanBranchNode(boolean matched, RootCallTarget branch) {
super(branch);
this.matched = matched;
this.callNode = DirectCallNode.create(branch);
}
/**
@ -41,39 +40,13 @@ public abstract class BooleanBranchNode extends BranchNode {
return BooleanBranchNodeGen.create(matched, branch);
}
/**
* Handles the boolean scrutinee case.
*
* @param frame the stack frame in which to execute
* @param state current monadic state
* @param target the atom to destructure
*/
@Specialization
public void doAtom(VirtualFrame frame, Object state, boolean target) {
void doAtom(VirtualFrame frame, Object state, boolean target) {
if (profile.profile(matched == target)) {
Stateful result =
(Stateful)
callNode.call(
Function.ArgumentsHelper.buildArguments(
frame.materialize(), state, new Object[0]));
// Note [Caller Info For Case Branches]
throw new BranchSelectedException(result);
accept(frame, state, new Object[0]);
}
}
/**
* The fallback specialisation for executing the boolean branch node.
*
* @param frame the stack frame in which to execute
* @param target the object to execute on
*/
@Fallback
public void doFallback(VirtualFrame frame, Object state, Object target) {}
/* Note [Caller Info For Case Branches]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* It is assumed that functions serving as pattern match logic branches are always function
* literals, not references, curried functions etc. Therefore, as function literals, they
* have no way of accessing the caller frame and can safely be passed null.
*/
void doFallback(VirtualFrame frame, Object state, Object target) {}
}

View File

@ -1,15 +1,23 @@
package org.enso.interpreter.node.controlflow;
import com.oracle.truffle.api.RootCallTarget;
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.nodes.DirectCallNode;
import com.oracle.truffle.api.nodes.NodeInfo;
import com.oracle.truffle.api.nodes.UnexpectedResultException;
import org.enso.interpreter.node.BaseNode;
import org.enso.interpreter.runtime.callable.atom.Atom;
import org.enso.interpreter.runtime.callable.function.Function;
import org.enso.interpreter.runtime.state.Stateful;
/** An abstract representation of a case branch. */
@NodeInfo(shortName = "case_branch", description = "Represents a case branch at runtime.")
public abstract class BranchNode extends BaseNode {
private @Child DirectCallNode callNode;
BranchNode(RootCallTarget branch) {
this.callNode = DirectCallNode.create(branch);
}
/**
* Executes the case branch.
@ -19,4 +27,36 @@ public abstract class BranchNode extends BaseNode {
* @param target the object to match against
*/
public abstract void execute(VirtualFrame frame, Object state, Object target);
/**
* Accepts the case branch, continuing the execution of the case expression.
*
* @param frame the stack frame in which to execute
* @param state current monadic state
* @param args the arguments to be passed to the branch body
*/
protected void accept(VirtualFrame frame, Object state, Object[] args) {
// Note [Caller Info For Case Branches]
Stateful result =
(Stateful)
callNode.call(
Function.ArgumentsHelper.buildArguments(frame.materialize(), state, args));
throw new BranchSelectedException(result);
}
/* Note [Caller Info For Case Branches]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* It is assumed that functions serving as pattern match logic branches are always function
* literals, not references, curried functions etc. Therefore, as function literals, they
* have no way of accessing the caller frame and can safely be passed null.
*/
/* Note [Safe Casting to Function in Catch All Branches]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* The syntactic nature of a catch all node guarantees that it has _only one_
* matcher in its pattern, regardless of whether it is named or a blank. As
* a result, we _know_ that the expression of the branch will _always_ be a
* function at code generation time, and hence we know that we can safely cast
* it to a function during execution.
*/
}

View File

@ -1,20 +1,8 @@
package org.enso.interpreter.node.controlflow;
import com.oracle.truffle.api.RootCallTarget;
import com.oracle.truffle.api.frame.FrameUtil;
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.nodes.DirectCallNode;
import com.oracle.truffle.api.nodes.NodeInfo;
import com.oracle.truffle.api.nodes.UnexpectedResultException;
import com.oracle.truffle.api.profiles.ConditionProfile;
import org.enso.interpreter.node.ExpressionNode;
import org.enso.interpreter.node.callable.ExecuteCallNode;
import org.enso.interpreter.node.callable.ExecuteCallNodeGen;
import org.enso.interpreter.node.callable.function.CreateFunctionNode;
import org.enso.interpreter.runtime.callable.atom.Atom;
import org.enso.interpreter.runtime.callable.function.Function;
import org.enso.interpreter.runtime.state.Stateful;
import org.enso.interpreter.runtime.type.TypesGen;
/**
* This node represents an explicit catch-call case in a pattern match, as provided by the user. It
@ -24,10 +12,9 @@ import org.enso.interpreter.runtime.type.TypesGen;
shortName = "Catch_All",
description = "An explicit catch-all branch in a case expression")
public class CatchAllBranchNode extends BranchNode {
private @Child DirectCallNode callNode;
private CatchAllBranchNode(RootCallTarget functionNode) {
this.callNode = DirectCallNode.create(functionNode);
super(functionNode);
}
/**
@ -48,21 +35,6 @@ public class CatchAllBranchNode extends BranchNode {
* @param target the object to match against
*/
public void execute(VirtualFrame frame, Object state, Object target) {
// Note [Safe Casting to Function in Catch All Branches]
Stateful result =
(Stateful)
callNode.call(
Function.ArgumentsHelper.buildArguments(
frame.materialize(), state, new Object[] {target}));
throw new BranchSelectedException(result);
accept(frame, state, new Object[] {target});
}
/* Note [Safe Casting to Function in Catch All Branches]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* The syntactic nature of a catch all node guarantees that it has _only one_
* matcher in its pattern, regardless of whether it is named or a blank. As
* a result, we _know_ that the expression of the branch will _always_ be a
* function at code generation time, and hence we know that we can safely cast
* it to a function during execution.
*/
}

View File

@ -1,36 +1,23 @@
package org.enso.interpreter.node.controlflow;
import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.RootCallTarget;
import com.oracle.truffle.api.dsl.Fallback;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.frame.FrameUtil;
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.nodes.DirectCallNode;
import com.oracle.truffle.api.nodes.ExplodeLoop;
import com.oracle.truffle.api.nodes.NodeInfo;
import com.oracle.truffle.api.profiles.ConditionProfile;
import org.enso.compiler.Compiler;
import org.enso.interpreter.node.ExpressionNode;
import org.enso.interpreter.node.callable.ExecuteCallNode;
import org.enso.interpreter.node.callable.ExecuteCallNodeGen;
import org.enso.interpreter.node.callable.function.CreateFunctionNode;
import org.enso.interpreter.runtime.callable.atom.Atom;
import org.enso.interpreter.runtime.callable.atom.AtomConstructor;
import org.enso.interpreter.runtime.callable.function.Function;
import org.enso.interpreter.runtime.state.Stateful;
import org.enso.interpreter.runtime.type.TypesGen;
/** An implementation of the case expression specialised to working on constructors. */
@NodeInfo(shortName = "ConstructorMatch")
public abstract class ConstructorBranchNode extends BranchNode {
private final AtomConstructor matcher;
private @Child DirectCallNode callNode;
private final ConditionProfile profile = ConditionProfile.createCountingProfile();
ConstructorBranchNode(AtomConstructor matcher, RootCallTarget branch) {
super(branch);
this.matcher = matcher;
this.callNode = DirectCallNode.create(branch);
}
/**
@ -44,42 +31,13 @@ public abstract class ConstructorBranchNode extends BranchNode {
return ConstructorBranchNodeGen.create(matcher, branch);
}
/**
* Handles the atom scrutinee case.
*
* <p>The atom's constructor is checked and if it matches the conditional branch is executed with
* all the atom's fields as arguments.
*
* @param frame the stack frame in which to execute
* @param state current monadic state
* @param target the atom to destructure
*/
@Specialization
public void doAtom(VirtualFrame frame, Object state, Atom target) {
void doAtom(VirtualFrame frame, Object state, Atom target) {
if (profile.profile(matcher == target.getConstructor())) {
// Note [Caller Info For Case Branches]
Stateful result =
(Stateful)
callNode.call(
Function.ArgumentsHelper.buildArguments(
frame.materialize(), state, target.getFields()));
throw new BranchSelectedException(result);
accept(frame, state, target.getFields());
}
}
/**
* The fallback specialisation for executing the constructor branch node.
*
* @param frame the stack frame in which to execute
* @param target the object to execute on
*/
@Fallback
public void doFallback(VirtualFrame frame, Object state, Object target) {}
/* Note [Caller Info For Case Branches]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* It is assumed that functions serving as pattern match logic branches are always function
* literals, not references, curried functions etc. Therefore, as function literals, they
* have no way of accessing the caller frame and can safely be passed null.
*/
void doFallback(VirtualFrame frame, Object state, Object target) {}
}

View File

@ -0,0 +1,47 @@
package org.enso.interpreter.node.controlflow;
import com.oracle.truffle.api.RootCallTarget;
import com.oracle.truffle.api.dsl.Fallback;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.nodes.NodeInfo;
import com.oracle.truffle.api.profiles.ConditionProfile;
import org.enso.interpreter.runtime.callable.atom.Atom;
import org.enso.interpreter.runtime.callable.atom.AtomConstructor;
@NodeInfo(shortName = "TextMatch", description = "Allows matching on the Decimal type.")
public abstract class DecimalBranchNode extends BranchNode {
private final AtomConstructor decimal;
private final ConditionProfile profile = ConditionProfile.createCountingProfile();
DecimalBranchNode(AtomConstructor decimal, RootCallTarget branch) {
super(branch);
this.decimal = decimal;
}
/**
* Create a new node to handle matching with the Decimal constructor.
*
* @param decimal the constructor used for matching
* @param branch the code to execute in this case
* @return a decimal branch node
*/
public static DecimalBranchNode build(AtomConstructor decimal, RootCallTarget branch) {
return DecimalBranchNodeGen.create(decimal, branch);
}
@Specialization
void doConstructor(VirtualFrame frame, Object state, Atom target) {
if (profile.profile(decimal == target.getConstructor())) {
accept(frame, state, target.getFields());
}
}
@Specialization
void doLiteral(VirtualFrame frame, Object state, double target) {
accept(frame, state, new Object[0]);
}
@Fallback
void doFallback(VirtualFrame frame, Object state, Object target) {}
}

View File

@ -0,0 +1,53 @@
package org.enso.interpreter.node.controlflow;
import com.oracle.truffle.api.RootCallTarget;
import com.oracle.truffle.api.dsl.Fallback;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.nodes.NodeInfo;
import com.oracle.truffle.api.profiles.ConditionProfile;
import org.enso.interpreter.runtime.callable.atom.Atom;
import org.enso.interpreter.runtime.callable.atom.AtomConstructor;
import org.enso.interpreter.runtime.number.EnsoBigInteger;
@NodeInfo(shortName = "IntegerMatch", description = "Allows matching on the Integer type.")
public abstract class IntegerBranchNode extends BranchNode {
private final AtomConstructor integer;
private final ConditionProfile profile = ConditionProfile.createCountingProfile();
public IntegerBranchNode(AtomConstructor integer, RootCallTarget branch) {
super(branch);
this.integer = integer;
}
/**
* Create a new node to handle matching with the Integer constructor.
*
* @param integer the constructor used for matching
* @param branch the code to execute
* @return an integer branch node
*/
public static IntegerBranchNode build(AtomConstructor integer, RootCallTarget branch) {
return IntegerBranchNodeGen.create(integer, branch);
}
@Specialization
void doConstructor(VirtualFrame frame, Object state, Atom target) {
if (profile.profile(integer == target.getConstructor())) {
accept(frame, state, target.getFields());
}
}
@Specialization
void doSmallInteger(VirtualFrame frame, Object state, long target) {
accept(frame, state, new Object[0]);
}
@Specialization
void doBigInteger(VirtualFrame frame, Object state, EnsoBigInteger target) {
accept(frame, state, new Object[0]);
}
@Fallback
void doFallback(VirtualFrame frame, Object state, Object target) {}
}

View File

@ -0,0 +1,58 @@
package org.enso.interpreter.node.controlflow;
import com.oracle.truffle.api.RootCallTarget;
import com.oracle.truffle.api.dsl.Fallback;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.nodes.NodeInfo;
import com.oracle.truffle.api.profiles.ConditionProfile;
import org.enso.interpreter.runtime.callable.atom.Atom;
import org.enso.interpreter.runtime.callable.atom.AtomConstructor;
import org.enso.interpreter.runtime.number.EnsoBigInteger;
@NodeInfo(shortName = "NumberMatch", description = "Allows matching on the Number type.")
public abstract class NumberBranchNode extends BranchNode {
private final AtomConstructor number;
private final ConditionProfile profile = ConditionProfile.createCountingProfile();
NumberBranchNode(AtomConstructor number, RootCallTarget branch) {
super(branch);
this.number = number;
}
/**
* Create a new node to handle matching with the Number constructor.
*
* @param number the constructor used for matching
* @param branch the code to execute
* @return an integer branch node
*/
public static NumberBranchNode build(AtomConstructor number, RootCallTarget branch) {
return NumberBranchNodeGen.create(number, branch);
}
@Specialization
void doConstructor(VirtualFrame frame, Object state, Atom target) {
if (profile.profile(number == target.getConstructor())) {
accept(frame, state, target.getFields());
}
}
@Specialization
void doSmallInteger(VirtualFrame frame, Object state, long target) {
accept(frame, state, new Object[0]);
}
@Specialization
void doBigInteger(VirtualFrame frame, Object state, EnsoBigInteger target) {
accept(frame, state, new Object[0]);
}
@Specialization
void doDecimal(VirtualFrame frame, Object state, double target) {
accept(frame, state, new Object[0]);
}
@Fallback
void doFallback(VirtualFrame frame, Object state, Object target) {}
}

View File

@ -0,0 +1,61 @@
package org.enso.interpreter.node.controlflow;
import com.oracle.truffle.api.RootCallTarget;
import com.oracle.truffle.api.dsl.CachedContext;
import com.oracle.truffle.api.dsl.Fallback;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.nodes.NodeInfo;
import com.oracle.truffle.api.profiles.ConditionProfile;
import org.enso.interpreter.Language;
import org.enso.interpreter.runtime.Context;
import org.enso.interpreter.runtime.callable.atom.Atom;
import org.enso.interpreter.runtime.callable.atom.AtomConstructor;
@NodeInfo(shortName = "PolyglotMatch", description = "Allows matching on polyglot objects.")
public abstract class PolyglotBranchNode extends BranchNode {
private final AtomConstructor polyglot;
private final ConditionProfile constructorProfile = ConditionProfile.createCountingProfile();
private final ConditionProfile polyglotProfile = ConditionProfile.createCountingProfile();
PolyglotBranchNode(AtomConstructor polyglot, RootCallTarget branch) {
super(branch);
this.polyglot = polyglot;
}
/**
* Create a new node to handle matching with the Polyglot constructor.
*
* @param polyglot the constructor used for matching
* @param branch the code to execute
* @return an integer branch node
*/
public static PolyglotBranchNode build(AtomConstructor polyglot, RootCallTarget branch) {
return PolyglotBranchNodeGen.create(polyglot, branch);
}
@Specialization
void doConstructor(VirtualFrame frame, Object state, Atom target) {
if (constructorProfile.profile(polyglot == target.getConstructor())) {
accept(frame, state, target.getFields());
}
}
@Specialization(guards = "isPolyglotObject(context,obj)")
void doLiteral(
VirtualFrame frame,
Object state,
Object obj,
@CachedContext(Language.class) Context context) {
if (polyglotProfile.profile(isPolyglotObject(context, obj))) {
accept(frame, state, new Object[0]);
}
}
@Fallback
void doFallback(VirtualFrame frame, Object state, Object target) {}
boolean isPolyglotObject(Context context, Object o) {
return context.getEnvironment().isHostObject(o);
}
}

View File

@ -0,0 +1,48 @@
package org.enso.interpreter.node.controlflow;
import com.oracle.truffle.api.RootCallTarget;
import com.oracle.truffle.api.dsl.Fallback;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.nodes.NodeInfo;
import com.oracle.truffle.api.profiles.ConditionProfile;
import org.enso.interpreter.runtime.callable.atom.Atom;
import org.enso.interpreter.runtime.callable.atom.AtomConstructor;
import org.enso.interpreter.runtime.data.text.Text;
@NodeInfo(shortName = "TextMatch", description = "Allows matching on the Text type.")
public abstract class TextBranchNode extends BranchNode {
private final AtomConstructor text;
private final ConditionProfile profile = ConditionProfile.createCountingProfile();
TextBranchNode(AtomConstructor text, RootCallTarget branch) {
super(branch);
this.text = text;
}
/**
* Creates a new node for handling matching on a text in a case expression.
*
* @param text the expression to use for matching
* @param branch the expression to be executed if (@code matcher} matches
* @return a node for matching on text in a case expression
*/
public static TextBranchNode build(AtomConstructor text, RootCallTarget branch) {
return TextBranchNodeGen.create(text, branch);
}
@Specialization
void doConstructor(VirtualFrame frame, Object state, Atom target) {
if (profile.profile(text == target.getConstructor())) {
accept(frame, state, target.getFields());
}
}
@Specialization
void doLiteral(VirtualFrame frame, Object state, Text target) {
accept(frame, state, new Object[0]);
}
@Fallback
void doFallback(VirtualFrame frame, Object state, Object target) {}
}

View File

@ -16,6 +16,7 @@ import org.enso.interpreter.runtime.scope.ModuleScope;
/** A container class for all Polyglot-related stdlib builtins. */
public class Polyglot {
private final AtomConstructor polyglot;
private final RootCallTarget interopDispatchRoot;
private final FunctionSchema interopDispatchSchema;
private final Function newInstanceFunction;
@ -30,6 +31,7 @@ public class Polyglot {
* @param scope the builtin scope.
*/
public Polyglot(Language language, ModuleScope scope) {
this.polyglot = new AtomConstructor("Polyglot", scope).initializeFields();
// Note [Syntactic Functions]
interopDispatchRoot = Truffle.getRuntime().createCallTarget(MethodDispatchNode.build(language));
@ -60,7 +62,6 @@ public class Polyglot {
*/
private void createPolyglot(Language language, ModuleScope scope) {
AtomConstructor polyglot = new AtomConstructor("Polyglot", scope).initializeFields();
scope.registerConstructor(polyglot);
scope.registerMethod(polyglot, "execute", ExecuteMethodGen.makeFunction(language));
scope.registerMethod(polyglot, "invoke", InvokeMethodGen.makeFunction(language));
@ -81,6 +82,11 @@ public class Polyglot {
return new Function(interopDispatchRoot, null, interopDispatchSchema, preAppliedArr, null);
}
/** @return the atom constructor for polyglot */
public AtomConstructor getPolyglot() {
return polyglot;
}
/** @return a function taking a polyglot array and returning its length. */
public Function getPolyglotArrayLengthFunction() {
return polyglotArrayLengthFunction;

View File

@ -701,12 +701,30 @@ class IrToTruffle(
)
runtimeConsOpt.map { atomCons =>
val bool = context.getBuiltins.bool()
val array = context.getBuiltins.mutable.constructor
val bool = context.getBuiltins.bool
val decimal = context.getBuiltins.number.getDecimal
val integer = context.getBuiltins.number.getInteger
val number = context.getBuiltins.number.getNumber
val polyglot = context.getBuiltins.polyglot.getPolyglot
val text = context.getBuiltins.text
val branchNode: BranchNode =
if (atomCons == bool.getTrue) {
BooleanBranchNode.build(true, branchCodeNode.getCallTarget)
} else if (atomCons == bool.getFalse) {
BooleanBranchNode.build(false, branchCodeNode.getCallTarget)
} else if (atomCons == text.getText) {
TextBranchNode.build(text.getText, branchCodeNode.getCallTarget)
} else if (atomCons == integer) {
IntegerBranchNode.build(integer, branchCodeNode.getCallTarget)
} else if (atomCons == decimal) {
DecimalBranchNode.build(decimal, branchCodeNode.getCallTarget)
} else if (atomCons == number) {
NumberBranchNode.build(number, branchCodeNode.getCallTarget)
} else if (atomCons == array) {
ArrayBranchNode.build(array, branchCodeNode.getCallTarget)
} else if (atomCons == polyglot) {
PolyglotBranchNode.build(polyglot, branchCodeNode.getCallTarget)
} else {
ConstructorBranchNode.build(
atomCons,

View File

@ -1,12 +1,16 @@
package org.enso.base;
import com.ibm.icu.text.Normalizer2;
import java.nio.charset.StandardCharsets;
import java.util.regex.Pattern;
/** Utils for standard library operations on Text. */
public class Text_Utils {
private static final Pattern whitespace =
Pattern.compile("\\s+", Pattern.UNICODE_CHARACTER_CLASS);
private static final Pattern vertical_space =
Pattern.compile("\\v+", Pattern.UNICODE_CHARACTER_CLASS);
/**
* Creates a substring of the given string, indexing using the Java standard (UTF-16) indexing
* mechanism.
@ -59,10 +63,32 @@ public class Text_Utils {
* @param sep the separator string
* @return array of substrings of {@code str} contained between occurences of {@code sep}
*/
public static String[] split_at(String str, String sep) {
public static String[] split_by_literal(String str, String sep) {
return str.split(Pattern.quote(sep));
}
/**
* Splits the string on each occurrence of UTF-8 whitespace, returning the resulting substrings in
* an array.
*
* @param str the string to split
* @return the array of substrings of {@code str}
*/
public static String[] split_on_whitespace(String str) {
return whitespace.split(str);
}
/**
* Splits the string on each occurrence of UTF-8 vertical whitespace, returning the resulting
* substrings in an array.
*
* @param str the string to split
* @return the array of substrings of {@code str}
*/
public static String[] split_on_lines(String str) {
return vertical_space.split(str);
}
/**
* Checks whether two strings are equal up to Unicode canonicalization.
*

View File

@ -1,6 +1,8 @@
from Base import all
import Test
import Base.Data.Text.Split_Kind
type Auto a
type Manual b
@ -14,6 +16,10 @@ spec = describe "Text" <|
facepalm_codes = [129318, 127996, 8205, 9794, 65039]
accent_1 = '\u00E9'
accent_2 = '\u0065\u{301}'
utf_8_whitespace = 'foo\n bar baz \u202F quux'
utf_8_whitespace_split = ["foo", "bar", "baz", "quux"]
utf_8_vertical = 'foo\n bar \v baz \r quux'
utf_8_vertical_split = ["foo", " bar ", " baz ", " quux"]
it "should allow naive length computation over grapheme clusters" <|
kshi.length . should_equal 1
facepalm.length . should_equal 1
@ -23,7 +29,11 @@ spec = describe "Text" <|
it "should split the text into grapheme clusters" <|
str = kshi + facepalm + accent_1 + accent_2
str.characters . should_equal [kshi, facepalm, accent_1, accent_2]
it "should split the text on arbitrary sequence" <|
it "should be able to split the text on UTF-8 whitespace" <|
utf_8_whitespace.split . should_equal utf_8_whitespace_split
it "should be able to split the text on UTF-8 newlines" <|
utf_8_vertical.split Split_Kind.Lines . should_equal utf_8_vertical_split
it "should be able to split the text on arbitrary text sequence" <|
"foo, bar, baz" . split ", " . should_equal ["foo", "bar", "baz"]
it "should dump utf-8 bytes to a vector" <|
kshi.utf_8.should_equal kshi_utf_8

View File

@ -1,5 +1,6 @@
import Test
import Tests.Semantic.Case_Spec
import Tests.Semantic.Deep_Export.Spec as Deep_Export_Spec
import Tests.Semantic.Error_Spec
import Tests.Semantic.Import_Loop.Spec as Import_Loop_Spec
@ -27,6 +28,7 @@ import Tests.System.File_Spec
import Tests.System.Process_Spec
main = Test.Suite.runMain <|
Case_Spec.spec
Deep_Export_Spec.spec
Error_Spec.spec
File_Spec.spec

View File

@ -0,0 +1,59 @@
from Base import all
import Test
polyglot java import java.util.Random
spec = describe "Pattern Matches" <|
it "should be able to match on the Integer type" <|
case 1 of
Integer -> Nothing
_ -> Test.fail "Expected an integer to match."
case 100000000000000000000000000000000000 of
Integer -> Nothing
_ -> Test.fail "Expected an integer to match."
case Integer of
Integer -> Nothing
_ -> Test.fail "Expected the Integer constructor to match."
it "should be able to match on the Decimal type" <|
case 1.7 of
Decimal -> Nothing
_ -> Test.fail "Expected a decimal to match."
case Decimal of
Decimal -> Nothing
_ -> Test.fail "Expected the Decimal constructor to match."
it "should be able to match on the Number type" <|
case 1 of
Number -> Nothing
_ -> Test.fail "Expected a number to match."
case 100000000000000000000000000000000000 of
Number -> Nothing
_ -> Test.fail "Expected a number to match."
case 1.7 of
Number -> Nothing
_ -> Test.fail "Expected a number to match."
case Number of
Number -> Nothing
_ -> Test.fail "Expected the Number constructor to match."
it "should be able to match on the Text type" <|
case "foo" of
Text -> Nothing
_ -> Test.fail "Expected a text to match."
case Text of
Text -> Nothing
_ -> Test.fail "Expected the Text constructor to match."
it "should be able to match on the Array type" <|
case [].to_array of
Array -> Nothing
_ -> Test.fail "Expected an array to match."
case Array of
Array -> Nothing
_ -> Test.fail "Expected the Array constructor to match."
it "should be able to match on the Polyglot type" <|
random_gen = Random.new [].to_array
case random_gen of
Polyglot -> Nothing
_ -> Test.fail "Expected a polyglot object to match."
case Polyglot of
Polyglot -> Nothing
_ -> Test.fail "Expected the Polyglot constructor to match."