JSON Library (#1241)

This commit is contained in:
Marcin Kostrzewa 2020-10-23 14:16:48 +02:00 committed by GitHub
parent 746521f8b2
commit c0de753d95
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 1074 additions and 146 deletions

View File

@ -896,7 +896,7 @@ lazy val runtime = (project in file("engine/runtime"))
logBuffered in Test := false,
scalacOptions += "-Ymacro-annotations",
scalacOptions ++= Seq("-Ypatmat-exhaust-depth", "off"),
libraryDependencies ++= circe ++ jmh ++ jaxb ++ Seq(
libraryDependencies ++= jmh ++ jaxb ++ Seq(
"com.chuusai" %% "shapeless" % shapelessVersion,
"org.apache.commons" % "commons-lang3" % commonsLangVersion,
"org.apache.tika" % "tika-core" % tikaVersion,

View File

@ -0,0 +1,119 @@
from Base import all
import Base.Data.Json.Internal
## Represents a JSON structure.
type Json
type Object fields
type Array items
type String value
type Number value
type Boolean value
type Null
## Marshalls this JSON into an arbitrary value described by
`type_descriptor`.
The type descriptor is a fully-applied type, describing all required
sub-types. It can either be an Atom or one of the primitive types
(`Number`, `Text`, `Boolean`, `Vector`).
> Example
The following shows an example of reading a nested JSON into a desired
type. It will return a vector of `Book` objects containing data from
`json_string`.
type Book title author
type Author name year_of_birth
read_data =
json_string = '''
[
{
"title": "Lord of the Rings",
"author": {
"name": "J. R. R. Tolkien",
"year_of_birth": 1892
}
},
{
"title": "The Little Prince",
"author": {
"name": "Antoine de Saint-Exupéry",
"year_of_birth": 1900
}
},
{
"title": "And Then There Were None",
"author": {
"name": "Agatha Christie",
"year_of_birth": 1890
}
}
]
parsed = Json.parse json_string
parsed.into (Vector (Book title=Text (Author name=Text year_of_birth=Number)))
into : Any -> Any ! Marshalling_Error
into type_descriptor =
Panic.recover (Internal.into_helper type_descriptor this)
## Returns this Json object.
Included to implement the `to_json` interface.
to_json : Json
to_json = this
## Renders this JSON into an RFC-8259 compliant text.
to_text : Text
to_text = Internal.render_helper this
## A failure indicating malformed text input into the JSON parser.
Check the `message` field for detailed information on the specific failure.
type Parse_Error message
## Parses an RFC-8259 compliant JSON text into a `Json` structure.
parse : Text -> Json ! Parse_Error
parse json_text =
r = Panic.recover (Internal.parse_helper json_text)
r.catch <| case _ of
Polyglot_Error err -> Error.throw (Parse_Error (err.getMessage []))
p -> Panic.throw p
## A failure indicating the inability to marshall a `Json` object into the
specified format.
type Marshalling_Error
## The `json` object could not be converted into `format`, due to a type
mismatch.
This can occur e.g. when trying to reinterpret a number as a `Text`, etc.
type Type_Mismatch_Error json format
## The `json` object could not be converted into `format`, due to a field
missing in the `json` structure.
This can occure when trying to reinterpret a JSON object into an atom,
when the JSON does not contain all the fields required by the atom.
type Missing_Field_Error json field format
## Generically converts an atom into a JSON object.
The input atom is converted into a JSON object, with a `"type"` field set to
the atom's type name and all other fields serialized with their name as
object key and the value as the object value.
Any.to_json =
m = Meta.meta this
case m of
Meta.Atom _ ->
cons = Meta.Constructor m.constructor
fs = m.fields
fnames = cons.fields
json_fs = 0.upto fnames.length . fold Map.empty m-> i->
m.insert (fnames.at i) (fs.at i . to_json)
with_tp = json_fs . insert "type" (String cons.name)
Object with_tp
Meta.Constructor _ ->
Object (Map.empty . insert "type" (String m.name))
## The following two cases cannot be handled generically and should
instead define their own `to_json` implementations.
Meta.Polyglot _ -> Null
Meta.Primitive _ -> Null

View File

@ -0,0 +1,170 @@
from Base import all hiding Number, Boolean, Array
from Base.Data.Json import all
polyglot java import org.enso.base.json.Parser
polyglot java import org.enso.base.json.Printer
## A JSON parser event consumer, passed to the Java parser backend.
Conforms to the `org.enso.base.json.Parser.JsonConsumer` Java interface.
type Consumer
type Consumer child_consumer value
## Helper for handling "value emitted" events.
on_value v = case Ref.get this.child_consumer of
Nil -> Ref.put this.value v
cons -> cons.on_value v
## Closes the child consumer and either sets the current consumer to its
parent, or takes its returned value as the final result of parsing.
seal_child =
child = Ref.get this.child_consumer
val = child.seal
case child.parent of
Nil ->
Ref.put this.value val
p ->
Ref.put this.child_consumer p
p.on_value val
## Consumes the `start_object` event.
on_start_object =
parent = Ref.get this.child_consumer
Ref.put this.child_consumer (here.mk_object_consumer parent)
## Consumes the `key` event.
on_key k = Ref.get this.child_consumer . on_key k
## Consumes the `end_object` event.
on_end_object = this.seal_child
## Consumes the `start_array` event.
on_start_array =
parent = Ref.get this.child_consumer
Ref.put this.child_consumer (here.mk_array_consumer parent)
## Consumes the `end_array` event.
on_end_array = this.seal_child
## Consumes the `long` event.
on_long n = this.on_value (Number n)
## Consumes the `double` event.
on_double n = this.on_value (Number n)
## Consumes the `string` event.
on_string s = this.on_value (String s)
## Consumes the `true` event.
on_true = this.on_value (Boolean True)
## Consumes the `false` event.
on_false = this.on_value (Boolean False)
## Consumes the `null` event.
on_null = this.on_value Null
## A child consumer, used to process events inside arrays.
type Array_Consumer
type Array_Consumer builder parent
## Consumes a value.
on_value v = this.builder.append v
## Returns the final value built by this consumer.
seal =
vec = this.builder.to_vector
Array vec
## A child consumer, used to process events inside objects.
type Object_Consumer
type Object_Consumer last_key map parent
## Consumes a key.
on_key k = Ref.put this.last_key k
## Consumes a value.
on_value v =
k = Ref.get this.last_key
m = Ref.get this.map
new_m = m.insert k v
Ref.put this.map new_m
## Returns the final value built by this consumer.
seal =
m = Ref.get this.map
Object m
## Creates a new object consumer with the given parent.
mk_object_consumer parent =
k = Ref.new ""
m = Ref.new Map.empty
Object_Consumer k m parent
## Creates a new array consumer with the given parent.
mk_array_consumer parent =
bldr = Vector.new_builder
Array_Consumer bldr parent
## Creates a new top-level consumer.
mk_consumer =
child = Ref.new Nil
val = Ref.new Unit
Consumer child val
## Helper method for printing JSON values to Text.
render_helper json = case json of
Object fields ->
fs = fields.to_vector.map item->
key = item.at 0
value = item.at 1
value_json = here.render_helper value
key_json = Printer.json_escape [key]
key_json + ":" + value_json
"{" + (fs.join ",") + "}"
Array items ->
its = items.map here.render_helper
"[" + (its.join ",") + "]"
String value -> Printer.json_escape [value]
Number value -> value.to_text
Boolean value -> if value then "true" else "false"
Null -> "null"
## Helper method for converting JSON objects into arbitrary types.
See `Json.into` for semantics documentation.
into_helper fmt json = case fmt of
Base.Vector field -> case json of
Array items -> items.map (here.into_helper field)
_ -> Panic.throw (Type_Mismatch_Error json fmt)
Base.Boolean -> case json of
Boolean v -> v
_ -> Panic.throw (Type_Mismatch_Error json fmt)
Base.Number -> case json of
Number v -> v
_ -> Panic.throw (Type_Mismatch_Error json fmt)
Base.Text -> case json of
String v -> v
_ -> Panic.throw (Type_Mismatch_Error json fmt)
_ ->
m = Meta.meta fmt
case m of
Meta.Atom _ -> case json of
Object json_fields ->
cons = Meta.Constructor m.constructor
fnames = cons.fields
ffmts = m.fields
field_values = zip fnames ffmts n-> inner_fmt->
fjson = json_fields . get n . catch _->
Panic.throw (Missing_Field_Error json fmt n)
here.into_helper inner_fmt fjson
cons.new field_values
_ -> Panic.throw (Type_Mismatch_Error json fmt)
_ -> Panic.throw (Type_Mismatch_Error json fmt)
## Helper used to parse text into a JSON value.
parse_helper : Text -> Json ! Polyglot_Error
parse_helper json_text =
consumer = here.mk_consumer
Parser.parse [json_text, consumer]
Ref.get consumer.value

View File

@ -8,16 +8,18 @@ import Base.Meta.Meta
import Base.Error.Extensions
import Base.Polyglot.Java
import Base.Data.Map
from Builtins import Unit, Number, Integer, Any, True, False, Cons
import Base.Data.Json
from Builtins import Unit, Number, Integer, Any, True, False, Cons, Boolean
export Base.Meta.Meta
from Builtins export all hiding Meta
export Base.Data.Map
export Base.Data.Json
from Base.Meta.Enso_Project export all
from Base.List export Nil, Cons
from Base.Vector export Vector
from Base.Number.Extensions export all hiding Math
from Base.Number.Extensions export all hiding Math, String
from Base.Text.Extensions export Text
from Base.Error.Extensions export all
from Base.Polyglot.Java export all
@ -95,3 +97,6 @@ Any.== that = if Meta.is_same_object this that then True else
Therefore, there is no more cases to handle in this method.
_ -> False
## Boolean to JSON conversion.
Boolean.to_json : Json.Boolean
Boolean.to_json = Json.Boolean this

View File

@ -91,3 +91,7 @@ Number.min that = if this < that then this else that
## Returns the larger value of `this` and `that`.
Number.max : Number -> Number
Number.max that = if this > that then this else that
## Number to JSON conversion.
Number.to_json : Json.Number
Number.to_json = Json.Number this

View File

@ -38,7 +38,7 @@ type Verbs
equal subject argument =
if subject == argument then Success else
msg = this.to_text + " did not equal " + argument.to_text + "."
msg = subject.to_text + " did not equal " + argument.to_text + "."
here.fail msg
be subject argument = this.equal subject argument

View File

@ -113,3 +113,11 @@ Text.from_codepoints codepoints = Text_Utils.from_codepoints [codepoints.to_arra
## Checks whether `this` starts with `prefix`.
Text.starts_with : Text -> Boolean
Text.starts_with prefix = Text_Utils.starts_with [this, prefix]
## Checks whether `this` contains `sequence` as its substring.
Text.contains : Text -> Boolean
Text.contains sequence = Text_Utils.contains [this, sequence]
## Text to JSON conversion.
Text.to_json : Json.String
Text.to_json = Json.String this

View File

@ -206,6 +206,10 @@ type Vector
len = min this.length that.length
Vector.new len i-> function (this.at i) (that.at i)
## Vector to JSON conversion.
to_json : Json.Array
to_json = Json.Array (this.map to_json)
## A builder type for Enso vectors.

View File

@ -423,7 +423,7 @@ public abstract class MethodResolverNode extends Node {
@CompilerDirectives.TruffleBoundary
Function resolveMethodOnArray(Context context, UnresolvedSymbol symbol) {
return symbol.resolveFor(
context.getBuiltins().array().constructor(), context.getBuiltins().any());
context.getBuiltins().mutable().constructor(), context.getBuiltins().any());
}
@CompilerDirectives.TruffleBoundary

View File

@ -1,4 +1,4 @@
package org.enso.interpreter.node.expression.builtin.array;
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;

View File

@ -1,4 +1,4 @@
package org.enso.interpreter.node.expression.builtin.array;
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;

View File

@ -0,0 +1,16 @@
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;
import org.enso.interpreter.runtime.data.Ref;
@BuiltinMethod(
type = "Ref",
name = "get",
description = "Gets the value stored in the reference.")
public class GetRefNode extends Node {
Object execute(Object _this, Ref ref) {
return ref.getValue();
}
}

View File

@ -1,4 +1,4 @@
package org.enso.interpreter.node.expression.builtin.array;
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;

View File

@ -1,4 +1,4 @@
package org.enso.interpreter.node.expression.builtin.array;
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;

View File

@ -1,4 +1,4 @@
package org.enso.interpreter.node.expression.builtin.array;
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;

View File

@ -1,4 +1,4 @@
package org.enso.interpreter.node.expression.builtin.array;
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;

View File

@ -1,4 +1,4 @@
package org.enso.interpreter.node.expression.builtin.array;
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;

View File

@ -1,4 +1,4 @@
package org.enso.interpreter.node.expression.builtin.array;
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;

View File

@ -0,0 +1,14 @@
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;
import org.enso.interpreter.runtime.data.Array;
import org.enso.interpreter.runtime.data.Ref;
@BuiltinMethod(type = "Ref", name = "new", description = "Creates an empty ref.")
public class NewRefNode extends Node {
Object execute(Object _this, Object value) {
return new Ref(value);
}
}

View File

@ -0,0 +1,15 @@
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;
import org.enso.interpreter.runtime.data.Ref;
@BuiltinMethod(type = "Ref", name = "put", description = "Stores a new value in the reference.")
public class PutRefNode extends Node {
Object execute(Object _this, Ref ref, Object new_value) {
Object old = ref.getValue();
ref.setValue(new_value);
return old;
}
}

View File

@ -1,4 +1,4 @@
package org.enso.interpreter.node.expression.builtin.array;
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;

View File

@ -1,4 +1,4 @@
package org.enso.interpreter.node.expression.builtin.array;
package org.enso.interpreter.node.expression.builtin.mutable;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;

View File

@ -1,15 +0,0 @@
package org.enso.interpreter.node.expression.builtin.text;
import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.nodes.Node;
import org.enso.interpreter.dsl.BuiltinMethod;
import org.enso.interpreter.runtime.builtin.LanguageEntitySerializer;
import org.enso.interpreter.runtime.data.text.Text;
@BuiltinMethod(type = "Any", name = "json_serialize", description = "Generic JSON serialization.")
public class JsonSerializeNode extends Node {
@CompilerDirectives.TruffleBoundary
Text execute(Object _this) {
return Text.create(LanguageEntitySerializer.serialize(_this));
}
}

View File

@ -18,7 +18,6 @@ import org.enso.interpreter.node.expression.builtin.state.GetStateMethodGen;
import org.enso.interpreter.node.expression.builtin.state.PutStateMethodGen;
import org.enso.interpreter.node.expression.builtin.state.RunStateMethodGen;
import org.enso.interpreter.node.expression.builtin.text.AnyToTextMethodGen;
import org.enso.interpreter.node.expression.builtin.text.JsonSerializeMethodGen;
import org.enso.interpreter.node.expression.builtin.thread.WithInterruptHandlerMethodGen;
import org.enso.interpreter.node.expression.builtin.unsafe.SetAtomFieldMethodGen;
import org.enso.interpreter.runtime.Context;
@ -51,7 +50,7 @@ public class Builtins {
private final Error error;
private final Bool bool;
private final System system;
private final Array array;
private final Mutable mutable;
private final Polyglot polyglot;
private final Resource resource;
private final Meta meta;
@ -70,7 +69,7 @@ public class Builtins {
any = new AtomConstructor("Any", scope).initializeFields();
bool = new Bool(language, scope);
error = new Error(language, scope);
array = new Array(language, scope);
mutable = new Mutable(language, scope);
function = new AtomConstructor("Function", scope).initializeFields();
text = new Text(language, scope);
debug = new AtomConstructor("Debug", scope).initializeFields();
@ -147,7 +146,6 @@ public class Builtins {
scope.registerMethod(function, "<|", ApplicationOperatorMethodGen.makeFunction(language));
scope.registerMethod(any, "to_text", AnyToTextMethodGen.makeFunction(language));
scope.registerMethod(any, "json_serialize", JsonSerializeMethodGen.makeFunction(language));
scope.registerMethod(java, "add_to_class_path", AddToClassPathMethodGen.makeFunction(language));
scope.registerMethod(java, "lookup_class", LookupClassMethodGen.makeFunction(language));
@ -234,9 +232,9 @@ public class Builtins {
return system;
}
/** @return the container for array-related builtins. */
public Array array() {
return array;
/** @return the container for mutable memory related builtins. */
public Mutable mutable() {
return mutable;
}
/** @return the container for polyglot-related builtins. */

View File

@ -1,12 +1,12 @@
package org.enso.interpreter.runtime.builtin;
import org.enso.interpreter.Language;
import org.enso.interpreter.node.expression.builtin.array.*;
import org.enso.interpreter.node.expression.builtin.mutable.*;
import org.enso.interpreter.runtime.callable.atom.AtomConstructor;
import org.enso.interpreter.runtime.scope.ModuleScope;
/** Container for builtin array-related types and functions. */
public class Array {
public class Mutable {
private final AtomConstructor array;
/**
@ -15,7 +15,7 @@ public class Array {
* @param language the current language instance.
* @param scope the scope for builtin methods.
*/
public Array(Language language, ModuleScope scope) {
public Mutable(Language language, ModuleScope scope) {
array = new AtomConstructor("Array", scope).initializeFields();
scope.registerConstructor(array);
scope.registerMethod(array, "empty", EmptyMethodGen.makeFunction(language));
@ -28,6 +28,12 @@ public class Array {
scope.registerMethod(array, "to_array", ToArrayMethodGen.makeFunction(language));
scope.registerMethod(array, "at", GetAtMethodGen.makeFunction(language));
scope.registerMethod(array, "set_at", SetAtMethodGen.makeFunction(language));
AtomConstructor ref = new AtomConstructor("Ref", scope).initializeFields();
scope.registerConstructor(ref);
scope.registerMethod(ref, "new", NewRefMethodGen.makeFunction(language));
scope.registerMethod(ref, "get", GetRefMethodGen.makeFunction(language));
scope.registerMethod(ref, "put", PutRefMethodGen.makeFunction(language));
}
/** @return the Array constructor. */

View File

@ -0,0 +1,31 @@
package org.enso.interpreter.runtime.data;
import com.oracle.truffle.api.interop.TruffleObject;
/** A mutable reference type. */
public class Ref implements TruffleObject {
private Object value;
/**
* Creates a new reference.
*
* @param value the initial value to store in the reference.
*/
public Ref(Object value) {
this.value = value;
}
/** @return the current value of the reference. */
public Object getValue() {
return value;
}
/**
* Stores a new value in the reference.
*
* @param value the value to store.
*/
public void setValue(Object value) {
this.value = value;
}
}

View File

@ -12,6 +12,7 @@ import org.enso.interpreter.runtime.callable.atom.AtomConstructor;
import org.enso.interpreter.runtime.callable.function.Function;
import org.enso.interpreter.runtime.data.Array;
import org.enso.interpreter.runtime.data.ManagedResource;
import org.enso.interpreter.runtime.data.Ref;
import org.enso.interpreter.runtime.data.text.Text;
import org.enso.interpreter.runtime.error.RuntimeError;
import org.enso.interpreter.runtime.number.EnsoBigInteger;
@ -41,7 +42,8 @@ import org.enso.interpreter.runtime.scope.ModuleScope;
Array.class,
EnsoBigInteger.class,
ManagedResource.class,
ModuleScope.class
ModuleScope.class,
Ref.class
})
public class Types {

View File

@ -1,37 +0,0 @@
package org.enso.interpreter.runtime.builtin
import com.oracle.truffle.api.interop.InteropLibrary
import io.circe.Json
import org.enso.interpreter.runtime.callable.atom.{Atom, AtomConstructor}
/** Helper for JSON-serializing runtime entities of the language.
*/
object LanguageEntitySerializer {
private val interopLibrary: InteropLibrary = InteropLibrary.getUncached()
/** Serializes a language entity into a JSON string. Returns null JSON for
* unexpected entities.
*
* @param obj any object representing an Enso language entity.
* @return the JSON string representing `obj` or `"null"` if the object
* is not a serializable language entity.
*/
final def serialize(obj: Object): String = toJson(obj).noSpaces
private def toJson(obj: Any): Json = obj match {
case l: Long => Json.fromLong(l)
case cons: AtomConstructor =>
Json.obj("type" -> Json.fromString(cons.getName), "fields" -> Json.arr())
case atom: Atom =>
Json.obj(
"type" -> Json.fromString(atom.getConstructor.getName),
"fields" -> Json.arr(atom.getFields.map(toJson).toIndexedSeq: _*)
)
case _ =>
if (interopLibrary.isString(obj)) {
Json.fromString(interopLibrary.asString(obj))
} else {
Json.Null
}
}
}

View File

@ -1,68 +0,0 @@
package org.enso.std.test
import org.enso.interpreter.test.{InterpreterContext, InterpreterTest}
class JsonSerializationTest extends InterpreterTest {
override def subject = "Automatic JSON serialization"
override def specify(implicit
interpreterContext: InterpreterContext
): Unit = {
"support strings" in {
val code =
"""
|main = "it's a \"string\"" . json_serialize
|""".stripMargin
eval(code) shouldEqual "\"it's a \\\"string\\\"\""
}
"support nubmers" in {
val code =
"""
|main = 1234 . json_serialize
|""".stripMargin
eval(code) shouldEqual "1234"
}
"support atoms" in {
val code =
"""from Builtins import all
|
|type X a b c
|
|main = X 123 "foo" Unit . json_serialize
|""".stripMargin
eval(
code
) shouldEqual """{"type":"X","fields":[123,"foo",{"type":"Unit","fields":[]}]}"""
}
"support functions" in {
val code =
"""
|main = (x -> x).json_serialize
|""".stripMargin
eval(code) shouldEqual "null"
}
"support nested types" in {
val code =
"""from Builtins import all
|
|main =
| test_val = Cons 1 (Cons "\"foo\"" (Cons Unit (Cons (x -> x) Nil)))
| test_val.json_serialize
|""".stripMargin
val expectedResult =
"""{"type":"Cons","fields":[1,{"type":"Cons","fields":["\"foo\"",{"type":
|"Cons","fields":[{"type":"Unit","fields":[]},{"type":"Cons","fields":
|[null,{"type":"Nil","fields":[]}]}]}]}]}""".stripMargin.linesIterator
.mkString("")
eval(code) shouldEqual expectedResult
}
}
}

View File

@ -106,4 +106,15 @@ public class Text_Utils {
public static boolean lt(String a, String b) {
return a.compareTo(b) < 0;
}
/**
* Checks if {@code substring} is a substring of {@code string}.
*
* @param string the containing string.
* @param substring the contained string.
* @return whether {@code substring} is a substring of {@code string}.
*/
public static boolean contains(String string, String substring) {
return string.contains(substring);
}
}

View File

@ -0,0 +1,449 @@
package org.enso.base.json;
import java.util.ArrayDeque;
import java.util.Deque;
public class Parser {
/** An exception thrown when an unexpected token is encountered in JSON. */
public static class UnexpectedTokenException extends RuntimeException {
/**
* Creates a new instance of this error.
*
* @param position the position in input where the exception occured.
* @param expected a description of expected tokens.
*/
public UnexpectedTokenException(int position, String expected) {
super("Unexpected token at position " + position + ". Expected " + expected + ".");
}
}
/** An exception thrown when the input ends unexpectedly. */
public static class UnexpectedEndOfInputException extends RuntimeException {
/** Creates a new instance of this error. */
public UnexpectedEndOfInputException() {
super("Unexpected end of input.");
}
/**
* Creates a new instance of this error.
*
* @param expected a description of expected tokens.
*/
public UnexpectedEndOfInputException(String expected) {
super("Unexpected end of input. Expected " + expected + ".");
}
}
/**
* A consumer of parsing events. Called iteratively, whenever one of the events occurs in parsing.
* An event may either denote a parsed value or a start or end of a new nesting level.
*/
public interface JsonConsumer {
void on_start_object();
void on_key(String name);
void on_end_object();
void on_start_array();
void on_end_array();
void on_double(double n);
void on_long(long n);
void on_string(String str);
void on_true();
void on_false();
void on_null();
}
private enum State {
ANY,
ARRAY_END_OR_VALUE,
ARRAY_END_OR_COMMA,
ARRAY_VALUE,
OBJECT_KEY_OR_END,
OBJECT_VALUE,
OBJECT_END_OR_COMMA,
OBJECT_KEY
}
/**
* Parses a JSON string, iteratively calling the provided consumer on each JSON event.
*
* <p>Note that this parser internally checks the integrity of the parsed JSON, therefore it is
* guaranteed that no invalid sequences of events can be reported in the consumer. In case a an
* invalid sequence of characters is reported, an {@link UnexpectedEndOfInputException} or {@link
* UnexpectedTokenException} is thrown instead.
*
* @param jsonString the string to parse.
* @param consumer the consumer for reported events.
*/
public static void parse(String jsonString, JsonConsumer consumer) {
char[] chars = jsonString.toCharArray();
Deque<State> state = new ArrayDeque<>();
state.push(State.ANY);
int position = 0;
while (!state.isEmpty()) {
State current = state.pop();
position = consumeWhiteSpace(chars, position);
assertInput(chars, position);
switch (current) {
case ANY:
position = consumeAny(chars, position, consumer, state);
break;
case ARRAY_END_OR_VALUE:
position = consumeArrayEndOrValue(chars, position, consumer, state);
break;
case ARRAY_END_OR_COMMA:
position = consumeArrayEndOrComa(chars, position, consumer, state);
break;
case ARRAY_VALUE:
state.push(State.ARRAY_END_OR_COMMA);
position = consumeAny(chars, position, consumer, state);
break;
case OBJECT_KEY_OR_END:
position = consumeObjectKeyOrEnd(chars, position, consumer, state);
break;
case OBJECT_VALUE:
state.push(State.OBJECT_END_OR_COMMA);
position = consumeAny(chars, position, consumer, state);
break;
case OBJECT_END_OR_COMMA:
position = consumeObjectEndOrComma(chars, position, consumer, state);
break;
case OBJECT_KEY:
position = consumeObjectKey(chars, position, consumer, state);
break;
}
}
position = consumeWhiteSpace(chars, position);
if (position < chars.length) {
throw new UnexpectedTokenException(position, "end of input");
}
}
private static int consumeObjectEndOrComma(
char[] chars, int position, JsonConsumer consumer, Deque<State> state) {
if (chars[position] == '}') {
consumer.on_end_object();
position++;
return position;
} else if (chars[position] == ',') {
state.push(State.OBJECT_KEY);
position++;
return position;
}
throw new UnexpectedTokenException(position, "a comma or a closing brace");
}
private static int consumeObjectKey(
char[] chars, int position, JsonConsumer consumer, Deque<State> state) {
position = consumeString(chars, position, consumer, true);
state.push(State.OBJECT_VALUE);
position = consumeWhiteSpace(chars, position);
assertInput(chars, position);
if (chars[position] == ':') {
position++;
return position;
} else {
throw new UnexpectedTokenException(position, "a colon");
}
}
private static int consumeObjectKeyOrEnd(
char[] chars, int position, JsonConsumer consumer, Deque<State> state) {
if (chars[position] == '}') {
consumer.on_end_object();
position++;
return position;
}
return consumeObjectKey(chars, position, consumer, state);
}
private static int consumeArrayEndOrValue(
char[] chars, int position, JsonConsumer consumer, Deque<State> state) {
if (chars[position] == ']') {
consumer.on_end_array();
position++;
return position;
}
state.push(State.ARRAY_END_OR_COMMA);
return consumeAny(chars, position, consumer, state);
}
private static int consumeArrayEndOrComa(
char[] chars, int position, JsonConsumer consumer, Deque<State> state) {
switch (chars[position]) {
case ']':
consumer.on_end_array();
position++;
return position;
case ',':
state.push(State.ARRAY_VALUE);
position++;
return position;
default:
throw new UnexpectedTokenException(position, "a comma or a closing bracket");
}
}
private static int consumeAny(
char[] chars, int position, JsonConsumer consumer, Deque<State> state) {
switch (chars[position]) {
case '[':
consumer.on_start_array();
position++;
state.push(State.ARRAY_END_OR_VALUE);
return position;
case '{':
consumer.on_start_object();
position++;
state.push(State.OBJECT_KEY_OR_END);
return position;
case '"':
return consumeString(chars, position, consumer, false);
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return consumeNumber(chars, position, consumer);
case 'n':
return consumeNull(chars, position, consumer);
case 't':
return consumeTrue(chars, position, consumer);
case 'f':
return consumeFalse(chars, position, consumer);
}
throw new UnexpectedTokenException(position, "a start of a JSON value");
}
private static int consumeNull(char[] chars, int position, JsonConsumer consumer) {
if (position + 3 < chars.length) {
boolean match =
chars[position] == 'n'
&& chars[position + 1] == 'u'
&& chars[position + 2] == 'l'
&& chars[position + 3] == 'l';
if (match) {
consumer.on_null();
return position + 4;
}
throw new UnexpectedTokenException(position, "a null");
}
throw new UnexpectedEndOfInputException("a null");
}
private static int consumeTrue(char[] chars, int position, JsonConsumer consumer) {
if (position + 3 < chars.length) {
boolean match =
chars[position] == 't'
&& chars[position + 1] == 'r'
&& chars[position + 2] == 'u'
&& chars[position + 3] == 'e';
if (match) {
consumer.on_true();
return position + 4;
}
throw new UnexpectedTokenException(position, "a true");
}
throw new UnexpectedEndOfInputException("a true");
}
private static int consumeFalse(char[] chars, int position, JsonConsumer consumer) {
if (position + 4 < chars.length) {
boolean match =
chars[position] == 'f'
&& chars[position + 1] == 'a'
&& chars[position + 2] == 'l'
&& chars[position + 3] == 's'
&& chars[position + 4] == 'e';
if (match) {
consumer.on_false();
return position + 5;
}
throw new UnexpectedTokenException(position, "a false");
}
throw new UnexpectedEndOfInputException("a false");
}
private static int consumeString(
char[] chars, int position, JsonConsumer consumer, boolean isKey) {
if (chars[position] != '"') {
throw new UnexpectedTokenException(position, "a string");
}
position++;
StringBuilder bldr = new StringBuilder();
while (position < chars.length) {
switch (chars[position]) {
case '"':
position++;
if (isKey) {
consumer.on_key(bldr.toString());
} else {
consumer.on_string(bldr.toString());
}
return position;
case '\\':
position++;
position = consumeEscape(chars, position, bldr);
break;
default:
bldr.append(chars[position]);
position++;
}
}
throw new UnexpectedEndOfInputException("a closing quote");
}
private static int consumeEscape(char[] chars, int position, StringBuilder builder) {
if (position >= chars.length) {
throw new UnexpectedEndOfInputException("an escape sequence");
}
switch (chars[position]) {
case '"':
builder.append('"');
position++;
return position;
case '\\':
builder.append('\\');
position++;
return position;
case '/':
builder.append('/');
position++;
return position;
case 'b':
builder.append('\u0008');
position++;
return position;
case 'f':
builder.append('\u000C');
position++;
return position;
case 'n':
builder.append('\n');
position++;
return position;
case 'r':
builder.append('\r');
position++;
return position;
case 't':
builder.append('\t');
position++;
return position;
case 'u':
position++;
return consumeHexEscape(chars, position, builder);
default:
throw new UnexpectedTokenException(position, "a valid escape character");
}
}
private static int consumeHexEscape(char[] chars, int position, StringBuilder builder) {
if (position + 3 >= chars.length) {
throw new UnexpectedEndOfInputException("four hexadecimal digits");
}
char c = 0;
for (int i = 0; i < 4; i++) {
c *= 16;
char current = Character.toLowerCase(chars[position]);
if (current >= '0' && current <= '9') {
c += current - '0';
} else if (current >= 'a' && current <= 'f') {
c += 10 + current - 'a';
} else {
throw new UnexpectedTokenException(position, "a hexadecimal digit");
}
position++;
}
builder.append(c);
return position;
}
private static int consumeDigits(char[] chars, int position, StringBuilder bldr) {
if (position >= chars.length || !Character.isDigit(chars[position])) {
throw new UnexpectedTokenException(position, "a digit");
}
while (position < chars.length) {
if (Character.isDigit(chars[position])) {
bldr.append(chars[position]);
position++;
} else {
return position;
}
}
return position;
}
private static int consumeNumber(char[] chars, int position, JsonConsumer consumer) {
StringBuilder bldr = new StringBuilder();
if (chars[position] == '-') {
bldr.append('-');
position++;
}
position = consumeDigits(chars, position, bldr);
if (position < chars.length && chars[position] == '.') {
bldr.append('.');
position++;
position = consumeDigits(chars, position, bldr);
}
if (position < chars.length && Character.toLowerCase(chars[position]) == 'e') {
bldr.append('E');
position++;
if (position >= chars.length) {
throw new UnexpectedEndOfInputException("an exponent");
}
if (chars[position] == '+' || chars[position] == '-') {
bldr.append(chars[position]);
position++;
}
position = consumeDigits(chars, position, bldr);
}
String strNum = bldr.toString();
try {
consumer.on_long(Long.parseLong(strNum, 10));
} catch (NumberFormatException e) {
consumer.on_double(Double.parseDouble(strNum));
}
return position;
}
private static void assertInput(char[] chars, int position) {
if (position >= chars.length) {
throw new UnexpectedEndOfInputException();
}
}
private static int consumeWhiteSpace(char[] chars, int position) {
while (position < chars.length) {
switch (chars[position]) {
case '\t':
case '\n':
case '\r':
case ' ':
position++;
break;
default:
return position;
}
}
return position;
}
}

View File

@ -0,0 +1,50 @@
package org.enso.base.json;
public class Printer {
/**
* Escapes a string into an RFC-8259 compliant format.
*
* @param string the string to escape
* @return the original string with special characters escaped.
*/
public static String json_escape(String string) {
StringBuilder builder = new StringBuilder();
builder.append("\"");
string
.chars()
.forEach(
ch -> {
switch (ch) {
case '\\':
builder.append("\\\\");
break;
case '\"':
builder.append("\\\"");
break;
case '\b':
builder.append("\\b");
break;
case '\f':
builder.append("\\f");
break;
case '\n':
builder.append("\\n");
break;
case '\r':
builder.append("\\r");
break;
case '\t':
builder.append("\\t");
break;
default:
if (ch <= 0x1F) {
builder.append(String.format("\\u%08X", ch));
} else {
builder.append((char) ch);
}
}
});
builder.append("\"");
return builder.toString();
}
}

View File

@ -0,0 +1,13 @@
from Base import all
import Base.Bench_Utils
prep_json size =
single = '{"foo": 543}, {"bar": false}'
many = Vector.new size (_ -> single)
str = "[" + (many.join ",") + "]"
str
main =
large_json = here.prep_json 1000000
Bench_Utils.measure (Base.Json.parse large_json) "parse json" 10 10

23
test/Test/data/books.json Normal file
View File

@ -0,0 +1,23 @@
[
{
"title": "Lord of the Rings",
"author": {
"name": "J. R. R. Tolkien",
"year_of_birth": 1892
}
},
{
"title": "The Little Prince",
"author": {
"name": "Antoine de Saint-Exupéry",
"year_of_birth": 1900
}
},
{
"title": "And Then There Were None",
"author": {
"name": "Agatha Christie",
"year_of_birth": 1890
}
}
]

View File

@ -0,0 +1,108 @@
from Base import all
import Base.Test
type Author name year_of_birth
type Book title author
Test.Verbs.fail_parsing_with subject expected =
as_fail = case Json.parse subject of
_ -> Test.Failure "Expected a parse error, but no error reported."
result = as_fail.catch e-> case e of
Json.Parse_Error msg ->
if msg.contains expected then Test.Success else
fail_msg = "The reported message " + msg.to_text + " did not contain " + expected.to_text + "."
Test.Failure fail_msg
_ ->
fail_msg = "Expected a parser error, but " + e.to_text + " was thrown."
Test.Failure fail_msg
case result of
Test.Success -> Test.Success
fail -> Panic.throw fail
Test.Verbs.parse_as subject expected =
Test.Verbs.equal (Json.parse subject) expected.to_json
Text.should_render_itself =
Test.Verbs.equal (Json.parse this . to_text) this
spec =
describe "JSON Deserialization" <|
it "should parse JSON structures" <|
"0 ".should parse_as 0
" 123 ".should parse_as 123
"15.0643".should parse_as 15.0643
"32.5E-1".should parse_as 3.25
"32.5E+1".should parse_as 325.0
"32.5e1".should parse_as 325.0
"false".should parse_as False
"true".should parse_as True
"null".should parse_as Json.Null
"[null, null, true, false]".should parse_as <|
[Json.Null, Json.Null, True, False]
"[]".should parse_as []
'[[1, 3], [5 , "foo"], 7, 8]'.should parse_as <|
[[1, 3], [5, 'foo'], 7, 8]
'"foo"'.should parse_as 'foo'
'"foo\\n\\t\\u20AC\\u20AC"'.should parse_as 'foo\n\t€€'
'["foo", "foo\\n\\u00aB", null, true]'.should parse_as <|
["foo", 'foo\n\u{AB}', Json.Null, True]
object_json = '''
{ "foo": "bar",
"baz": ["foo", "x", false],
"y": {"z": null, "w": null} }
object_json.should parse_as <|
foo_v = "bar".to_json
baz_v = ["foo", "x", False].to_json
y_v = Json.Object (Map.empty . insert "z" Json.Null . insert "w" Json.Null)
Json.Object (Map.empty . insert "foo" foo_v . insert "baz" baz_v . insert "y" y_v)
it "should report meaningful parsing errors" <|
"foo".should fail_parsing_with "Expected a false"
"[,]".should fail_parsing_with "Expected a start of a JSON value"
"{,}".should fail_parsing_with "Expected a string"
deep_err = '''
{ "foo": "bar",
"baz": ["foo", "x"", false],
"y": {"z": null, "w": null} }
deep_err.should fail_parsing_with "Expected a comma"
"123 4".should fail_parsing_with "Expected end of input"
it "should parse and convert JSON into domain model" <|
book_1 = Book "Lord of the Rings" <|
Author "J. R. R. Tolkien" 1892
book_2 = Book "The Little Prince" <|
Author "Antoine de Saint-Exupéry" 1900
book_3 = Book "And Then There Were None" <|
Author "Agatha Christie" 1890
books = [book_1, book_2, book_3]
json_string = (Enso_Project.data / "books.json").read
parsed = Json.parse json_string
domain = parsed.into (Vector (Book title=Text (Author name=Text year_of_birth=Number)))
domain.should equal books
describe "JSON Serialization" <|
it "should print JSON structures to valid json" <|
"0".should_render_itself
"123".should_render_itself
"15.0643".should_render_itself
"false".should_render_itself
"true".should_render_itself
"null".should_render_itself
"[null,null,true,false]".should_render_itself
"[]".should_render_itself
'[[1,3],[5,"foo"],7,8]'.should_render_itself
'"foo"'.should_render_itself
'"foo\\n\\t\\r\\f\\b\\"\\\\"'.should_render_itself
'["foo","foo\\n",null,true]'.should_render_itself
object_json = '{"baz":["foo","x",false],"foo":"bar","y":{"w":null,"z":null}}'
object_json.should_render_itself
it "should convert arbitrary types to JSON" <|
1.to_json.should equal (Json.Number 1)
1.54.to_json.should equal (Json.Number 1.54)
["foo", "bar", "baz"].to_json.should equal <|
(Json.Array [Json.String "foo", Json.String "bar", Json.String "baz"])
Author "Tolkien" 1892 . to_json . should equal <|
n = Json.String "Tolkien"
y = Json.Number 1892
t = Json.String "Author"
fields = Map.empty . insert "type" t . insert "name" n . insert "year_of_birth" y
Json.Object fields

View File

@ -9,6 +9,7 @@ import Test.Semantic.Meta_Spec
import Test.List_Spec
import Test.Data.Map_Spec
import Test.Data.Json_Spec
import Test.Number_Spec
import Test.Process_Spec
import Test.Vector.Spec as Vector_Spec
@ -33,3 +34,4 @@ main = Test.Suite.runMain <|
File_Spec.spec
Meta_Spec.spec
Map_Spec.spec
Json_Spec.spec