Qualified defs (#3785)

Allow qualified names in LHS of type signatures and method definitions.
This commit is contained in:
Kaz Wesley 2022-10-12 10:40:16 -07:00 committed by GitHub
parent 102dd9a790
commit 5668cbcc24
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 456 additions and 309 deletions

12
Cargo.lock generated
View File

@ -2212,6 +2212,18 @@ dependencies = [
"uuid 1.1.2",
]
[[package]]
name = "enso-parser-debug"
version = "0.1.0"
dependencies = [
"enso-metamodel",
"enso-metamodel-lexpr",
"enso-parser",
"enso-reflect",
"lexpr",
"serde",
]
[[package]]
name = "enso-parser-generate-java"
version = "0.1.0"

View File

@ -12,6 +12,7 @@ members = [
"lib/rust/parser/src/syntax/tree/visitor",
"lib/rust/parser/jni",
"lib/rust/parser/generate-java",
"lib/rust/parser/debug",
"lib/rust/profiler/data",
"lib/rust/profiler/demo-data",
"integration-test",

View File

@ -294,7 +294,7 @@ final class TreeToIr {
);
}
case Tree.Function fn -> {
var nameId = buildName(fn, fn.getName());
var nameId = buildName(fn.getName());
/*
case AstView.MethodDefinition(targetPath, name, args, definition) =>
@ -386,7 +386,7 @@ final class TreeToIr {
*/
case Tree.TypeSignature sig -> {
// case AstView.TypeAscription(typed, sig) =>
var methodName = buildName(sig, sig.getVariable());
var methodName = buildName(sig.getVariable());
var methodReference = new IR$Name$MethodReference(
Option.empty(),
methodName,
@ -457,7 +457,7 @@ final class TreeToIr {
yield null;
}
case Tree.TypeSignature sig -> {
var typeName = buildName(sig, sig.getVariable(), false);
var typeName = buildName(sig.getVariable());
var fn = switch (sig.getType()) {
case Tree.OprApp app when "->".equals(app.getOpr().getRight().codeRepr()) -> {
@ -486,7 +486,7 @@ final class TreeToIr {
yield new IR$Type$Ascription(typeName, fn, getIdentifiedLocation(sig), meta(), diag());
}
case Tree.Function fun -> {
var name = buildName(fun, fun.getName(), false);
var name = buildName(fun.getName());
var args = translateArgumentsDefinition(fun.getArgs());
var body = translateExpression(fun.getBody(), false);
@ -685,31 +685,6 @@ final class TreeToIr {
yield new IR$Function$Lambda(args, body, getIdentifiedLocation(tree), true, meta(), diag());
}
}
case "=" -> {
var ap = app.getLhs();
List<IR.DefinitionArgument> args = nil();
while (ap instanceof Tree.App leftApp) {
var isSuspended = false;
var a = new IR$DefinitionArgument$Specified(
buildName(leftApp.getArg()),
Option.empty(),
Option.empty(),
isSuspended,
getIdentifiedLocation(leftApp),
meta(),
diag()
);
args = cons(a, args);
ap = leftApp.getFunc();
}
var name = buildName(ap);
var lhs = translateCallArgument(app.getLhs(), insideTypeSignature);
var rhs = translateExpression(app.getRhs(), insideTypeSignature);
yield new IR$Function$Binding(
(IR.Name)name, args, rhs,
getIdentifiedLocation(app), true, meta(), diag()
);
}
default -> {
var lhs = translateCallArgument(app.getLhs(), insideTypeSignature);
var rhs = translateCallArgument(app.getRhs(), insideTypeSignature);
@ -876,7 +851,7 @@ final class TreeToIr {
case IR.Expression e -> e;
};
case Tree.TypeSignature sig -> {
var methodName = buildName(sig, sig.getVariable());
var methodName = buildName(sig.getVariable());
var methodReference = new IR$CallArgument$Specified(
Option.empty(),
methodName,
@ -1848,7 +1823,7 @@ final class TreeToIr {
}
private IR$Name$Literal buildName(Tree ident) {
return switch (ident) {
case Tree.Ident id -> buildName(id.getToken());
case Tree.Ident id -> buildName(ident, id.getToken(), false);
default -> throw new UnhandledEntity(ident, "buildName");
};
}

View File

@ -506,18 +506,20 @@ public class EnsoCompilerTest {
}
@Test
@Ignore
public void testTypeSignatureQualified() throws Exception {
parseTest("""
type Baz
resolve : Integer -> Column
Foo.resolve : Integer -> Column
""");
}
@Test
@Ignore
public void testMethodDefQualified() throws Exception {
parseTest("""
type Foo
id x = x
Identity.id x = x
""");
}

View File

@ -0,0 +1,18 @@
[package]
name = "enso-parser-debug"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
description = "Enso parser debugging tools."
readme = "README.md"
homepage = "https://github.com/enso-org/enso"
repository = "https://github.com/enso-org/enso"
license-file = "../../LICENSE"
[dependencies]
enso-parser = { path = "../" }
enso-metamodel = { path = "../../metamodel", features = ["rust"] }
enso-metamodel-lexpr = { path = "../../metamodel/lexpr" }
enso-reflect = { path = "../../reflect" }
lexpr = "0.2.6"
serde = { version = "1.0", features = ["derive"] }

View File

@ -0,0 +1,185 @@
//! Debugging utilities for the parser.
// === Features ===
#![feature(exact_size_is_empty)]
#![feature(let_chains)]
#![feature(if_let_guard)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
#![allow(clippy::bool_to_int_with_if)]
#![allow(clippy::let_and_return)]
// === Non-Standard Linter Configuration ===
#![allow(clippy::option_map_unit_fn)]
#![allow(clippy::precedence)]
#![allow(dead_code)]
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
#![warn(unused_qualifications)]
use enso_metamodel_lexpr::ToSExpr;
use enso_reflect::Reflect;
use lexpr::Value;
use std::collections::HashSet;
// =====================
// === S-expressions ===
// =====================
/// Produce an S-expression representation of the input AST type.
pub fn to_s_expr<T>(value: &T, code: &str) -> Value
where T: serde::Serialize + Reflect {
use enso_parser::syntax::token::variant::*;
use enso_parser::syntax::tree;
let (graph, rust_to_meta) = enso_metamodel::rust::to_meta(value.reflect_type());
let ast_ty = rust_to_meta[&value.reflect_type().id];
let base = code.as_bytes().as_ptr() as usize;
let code: Box<str> = Box::from(code);
let mut to_s_expr = ToSExpr::new(&graph);
to_s_expr.mapper(ast_ty, strip_hidden_fields);
let stringish_tokens =
vec![Digits::reflect(), NumberBase::reflect(), Operator::reflect(), TextSection::reflect()];
let stringish_tokens = stringish_tokens.into_iter().map(|t| rust_to_meta[&t.id]);
let skip_tokens = vec![
AutoScope::reflect(),
CloseSymbol::reflect(),
Newline::reflect(),
OpenSymbol::reflect(),
TextEnd::reflect(),
TextStart::reflect(),
Wildcard::reflect(),
];
skip_tokens.into_iter().for_each(|token| to_s_expr.skip(rust_to_meta[&token.id]));
let ident_token = rust_to_meta[&Ident::reflect().id];
let text_escape_token = rust_to_meta[&TextEscape::reflect().id];
let token_to_str = move |token: Value| {
let range = token_code_range(&token, base);
code[range].to_owned().into_boxed_str()
};
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(ident_token, move |token| Value::symbol(token_to_str_(token)));
for token in stringish_tokens {
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(token, move |token| Value::string(token_to_str_(token)));
}
let into_car = |cons| match cons {
Value::Cons(cons) => cons.into_pair().0,
_ => panic!(),
};
let simplify_case = |list| {
let list = strip_hidden_fields(list);
let (_, list) = match list {
Value::Cons(cons) => cons.into_pair(),
_ => panic!(),
};
let (expression, list) = match list {
Value::Cons(cons) => cons.into_pair(),
_ => panic!(),
};
let (_, list) = match list {
Value::Cons(cons) => cons.into_pair(),
_ => panic!(),
};
Value::cons(expression, list)
};
let simplify_escape = |mut list| {
let mut last = None;
while let Value::Cons(cons) = list {
let (car, cdr) = cons.into_pair();
last = Some(car);
list = cdr;
}
last.unwrap()
};
let strip_invalid = |list| {
let Value::Cons(cons) = list else { unreachable!() };
let (car, _) = cons.into_pair();
Value::cons(car, Value::Null)
};
let line = rust_to_meta[&tree::block::Line::reflect().id];
let operator_line = rust_to_meta[&tree::block::OperatorLine::reflect().id];
let case = rust_to_meta[&tree::CaseOf::reflect().id];
let invalid = rust_to_meta[&tree::Invalid::reflect().id];
to_s_expr.mapper(line, into_car);
to_s_expr.mapper(operator_line, into_car);
to_s_expr.mapper(case, simplify_case);
to_s_expr.mapper(invalid, strip_invalid);
to_s_expr.mapper(text_escape_token, simplify_escape);
tuplify(to_s_expr.value(ast_ty, &value))
}
/// Strip fields that are not useful to a human reader, like source-code offsets.
fn strip_hidden_fields(tree: Value) -> Value {
let hidden_tree_fields = [
":spanLeftOffsetVisible",
":spanLeftOffsetCodeReprBegin",
":spanLeftOffsetCodeReprLen",
":spanLeftOffsetCodeUtf16",
":spanCodeLengthUtf8",
":spanCodeLengthUtf16",
];
let hidden_tree_fields: HashSet<_> = hidden_tree_fields.into_iter().collect();
Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val {
Value::Cons(cons) => match cons.car() {
Value::Symbol(symbol) => !hidden_tree_fields.contains(symbol.as_ref()),
_ => panic!(),
},
_ => true,
}))
}
/// Given an S-expression representation of a [`Token`] and the base address for `Code` `Cow`s,
/// return the range of the input code the token references.
fn token_code_range(token: &Value, base: usize) -> std::ops::Range<usize> {
let get_u32 =
|field| fields(token).find(|(name, _)| *name == field).unwrap().1.as_u64().unwrap() as u32;
let begin = get_u32(":codeReprBegin");
let len = get_u32(":codeReprLen");
let begin = (begin as u64) | (base as u64 & !(u32::MAX as u64));
let begin = if begin < (base as u64) { begin + 1 << 32 } else { begin };
let begin = begin as usize - base;
let len = len as usize;
begin..(begin + len)
}
/// Iterate the field `(name, value)` pairs of the S-expression of a struct with named fields.
fn fields(value: &'_ Value) -> impl Iterator<Item = (&'_ str, &'_ Value)> {
value.list_iter().unwrap().filter_map(|value| match value {
Value::Cons(cons) => match cons.car() {
Value::Symbol(symbol) => Some((&symbol[..], cons.cdr())),
_ => None,
},
_ => None,
})
}
/// Strip field names from struct representations, so that they are printed more concisely, as if
/// they were tuple-structs.
fn tuplify(value: Value) -> Value {
let (car, cdr) = match value {
Value::Cons(cons) => cons.into_pair(),
Value::Vector(mut vector) => {
for value in vector.iter_mut() {
let original = std::mem::replace(value, Value::Nil);
*value = tuplify(original);
}
return Value::Vector(vector);
}
value => return value,
};
if let Value::Symbol(symbol) = &car {
if let Some(':') = symbol.chars().next() {
return tuplify(cdr);
}
}
let car = tuplify(car);
let cdr = tuplify(cdr);
Value::Cons(lexpr::Cons::new(car, cdr))
}

View File

@ -0,0 +1,48 @@
//! Show debug-representation of AST of input sources.
// === Features ===
#![feature(exact_size_is_empty)]
#![feature(let_chains)]
#![feature(if_let_guard)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
#![allow(clippy::bool_to_int_with_if)]
#![allow(clippy::let_and_return)]
// === Non-Standard Linter Configuration ===
#![allow(clippy::option_map_unit_fn)]
#![allow(clippy::precedence)]
#![allow(dead_code)]
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
#![warn(unused_qualifications)]
// ===================
// === Debug-parse ===
// ===================
fn main() {
use std::io::Read;
let mut input = String::new();
std::io::stdin().read_to_string(&mut input).unwrap();
check_file("<stdin>", input.as_str());
}
fn check_file(path: &str, mut code: &str) {
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_;
}
let ast = enso_parser::Parser::new().run(code);
for (parsed, original) in ast.code().lines().zip(code.lines()) {
assert_eq!(parsed, original, "Bug: dropped tokens, while parsing: {}", path);
}
let s_expr = enso_parser_debug::to_s_expr(&ast, code);
println!("{}", s_expr);
}

View File

@ -19,8 +19,7 @@
mod metadata;
use lexpr::sexp;
use lexpr::Value;
use enso_parser_debug::to_s_expr;
@ -31,7 +30,7 @@ use lexpr::Value;
/// Parses input as a sequence of S-expressions, and wraps it in a `BodyBlock`.
macro_rules! block {
( $($statements:tt)* ) => {
sexp![(BodyBlock #( $( $statements )* ) )]
lexpr::sexp![(BodyBlock #( $( $statements )* ) )]
}
}
@ -77,6 +76,31 @@ fn comments() {
test("# a b c", block![()()]);
}
#[test]
fn inline_if() {
#[rustfmt::skip]
test("if True then True else False", block![
(MultiSegmentApp #(((Ident if) (Ident True))
((Ident then) (Ident True))
((Ident else) (Ident False))))]);
}
#[test]
fn then_block() {
#[rustfmt::skip]
test("if True then\n True", block![
(MultiSegmentApp #(((Ident if) (Ident True)) ((Ident then) (BodyBlock #((Ident True))))))]);
}
#[test]
fn else_block() {
#[rustfmt::skip]
test("if True then True else\n False", block![
(MultiSegmentApp #(((Ident if) (Ident True))
((Ident then) (Ident True))
((Ident else) (BodyBlock #((Ident False))))))]);
}
// === Type Definitions ===
@ -122,8 +146,9 @@ fn type_methods() {
#[rustfmt::skip]
let expected = block![
(TypeDef type Geo #() #()
#((Function number #() "=" (BodyBlock #((Ident x))))
(Function area #((() (Ident self) () ())) "=" (OprApp (Ident x) (Ok "+") (Ident x)))))
#((Function (Ident number) #() "=" (BodyBlock #((Ident x))))
(Function (Ident area) #((() (Ident self) () ())) "="
(OprApp (Ident x) (Ok "+") (Ident x)))))
];
test(&code.join("\n"), expected);
}
@ -139,9 +164,9 @@ fn type_operator_methods() {
#[rustfmt::skip]
let expected = block![
(TypeDef type Foo #() #()
#((TypeSignature #"+" ":"
#((TypeSignature (Ident #"+") ":"
(OprApp (Ident Foo) (Ok "->") (OprApp (Ident Foo) (Ok "->") (Ident Foo))))
(Function #"+" #((() (Ident self) () ()) (() (Ident b) () ()))
(Function (Ident #"+") #((() (Ident self) () ()) (() (Ident b) () ()))
"=" (Ident b))))];
test(&code.join("\n"), expected);
}
@ -169,8 +194,8 @@ fn type_def_full() {
((Rectangle #((() (Ident width) () ()) (() (Ident height) () ())) #()))
((Point #() #()))
(()))
#((Function number #() "=" (BodyBlock #((Ident x))))
(Function area #((() (Ident self) () ())) "=" (OprApp (Ident x) (Ok "+") (Ident x)))))
#((Function (Ident number) #() "=" (BodyBlock #((Ident x))))
(Function (Ident area) #((() (Ident self) () ())) "=" (OprApp (Ident x) (Ok "+") (Ident x)))))
];
test(&code.join("\n"), expected);
}
@ -219,37 +244,41 @@ fn assignment_simple() {
#[test]
fn function_inline_simple_args() {
test(" foo a = x", block![(Function foo #((() (Ident a) () ())) "=" (Ident x))]);
test(" foo a = x", block![(Function (Ident foo) #((() (Ident a) () ())) "=" (Ident x))]);
#[rustfmt::skip]
test("foo a b = x",
block![(Function foo #((() (Ident a) () ()) (() (Ident b) () ())) "=" (Ident x))]);
block![(Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ())) "=" (Ident x))]);
#[rustfmt::skip]
test(
"foo a b c = x", block![
(Function foo
(Function (Ident foo)
#((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ()))
"=" (Ident x))],
);
test(" foo _ = x", block![(Function foo #((() (Wildcard -1) () ())) "=" (Ident x))]);
test(" foo _ = x", block![(Function (Ident foo) #((() (Wildcard -1) () ())) "=" (Ident x))]);
}
#[test]
fn function_block_noargs() {
test("foo =", block![(Function foo #() "=" ())]);
test("foo =", block![(Function (Ident foo) #() "=" ())]);
}
#[test]
fn function_block_simple_args() {
test("foo a =", block![(Function foo #((() (Ident a) () ())) "=" ())]);
test("foo a b =", block![(Function foo #((() (Ident a) () ())
(() (Ident b) () ())) "=" ())]);
test("foo a =", block![(Function (Ident foo) #((() (Ident a) () ())) "=" ())]);
#[rustfmt::skip]
test(
"foo a b c =", block![
(Function foo
#((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ()))
"=" ())],
);
test("foo a b =", block![(Function (Ident foo) #((() (Ident a) () ())
(() (Ident b) () ())) "=" ())]);
#[rustfmt::skip]
test("foo a b c =", block![
(Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ())) "="
())]);
}
#[test]
fn function_qualified() {
test("Id.id x = x", block![
(Function (OprApp (Ident Id) (Ok ".") (Ident id)) #((() (Ident x) () ())) "=" (Ident x))]);
}
@ -277,17 +306,17 @@ fn default_app() {
fn default_arguments() {
#[rustfmt::skip]
let cases = [
("f x=1 = x",
block![(Function f #((() (Ident x) () ("=" (Number () "1" ())))) "=" (Ident x))]),
("f (x = 1) = x",
block![(Function f #((() (Ident x) () ("=" (Number () "1" ())))) "=" (Ident x))]),
("f x=1 = x", block![
(Function (Ident f) #((() (Ident x) () ("=" (Number () "1" ())))) "=" (Ident x))]),
("f (x = 1) = x", block![
(Function (Ident f) #((() (Ident x) () ("=" (Number () "1" ())))) "=" (Ident x))]),
// Pattern in LHS:
("f ~x=1 = x", block![
(Function f
(Function (Ident f)
#(("~" (Ident x) () ("=" (Number () "1" ()))))
"=" (Ident x))]),
("f (~x = 1) = x", block![
(Function f
(Function (Ident f)
#(("~" (Ident x) () ("=" (Number () "1" ()))))
"=" (Ident x))]),
];
@ -300,15 +329,15 @@ fn default_arguments() {
#[test]
fn code_block_body() {
let code = ["main =", " x"];
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #((Ident x))))]);
test(&code.join("\n"), block![(Function (Ident main) #() "=" (BodyBlock #((Ident x))))]);
let code = ["main =", " ", " x"];
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Ident x))))]);
test(&code.join("\n"), block![(Function (Ident main) #() "=" (BodyBlock #(() (Ident x))))]);
let code = ["main =", " ", " x"];
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Ident x))))]);
test(&code.join("\n"), block![(Function (Ident main) #() "=" (BodyBlock #(() (Ident x))))]);
let code = ["main =", " ", " x"];
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Ident x))))]);
test(&code.join("\n"), block![(Function (Ident main) #() "=" (BodyBlock #(() (Ident x))))]);
let code = ["main =", "", " x"];
test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Ident x))))]);
test(&code.join("\n"), block![(Function (Ident main) #() "=" (BodyBlock #(() (Ident x))))]);
#[rustfmt::skip]
let code = [
@ -318,7 +347,7 @@ fn code_block_body() {
];
#[rustfmt::skip]
let expect = block![
(Function main #() "=" (BodyBlock #(
(Function (Ident main) #() "=" (BodyBlock #(
(OprSectionBoundary 1 (OprApp () (Ok "+") (Ident x)))
(App (Ident print) (Ident x)))))
];
@ -386,18 +415,18 @@ fn code_block_empty() {
// No input would parse as an empty `ArgumentBlock` or `OperatorBlock`, because those types are
// distinguished from a body continuation by the presence of non-empty indented lines.
let code = ["foo =", "bar"];
test(&code.join("\n"), block![(Function foo #() "=" ()) (Ident bar)]);
test(&code.join("\n"), block![(Function (Ident foo) #() "=" ()) (Ident bar)]);
// This parses similarly to above; a line with no non-whitespace content does not create a code
// block.
let code = ["foo =", " ", "bar"];
test(&code.join("\n"), block![(Function foo #() "=" ()) () (Ident bar)]);
test(&code.join("\n"), block![(Function (Ident foo) #() "=" ()) () (Ident bar)]);
}
#[test]
fn code_block_bad_indents1() {
let code = ["main =", " foo", " bar", " baz"];
let expected = block![
(Function main #() "=" (BodyBlock #((Ident foo) (Ident bar) (Ident baz))))
(Function (Ident main) #() "=" (BodyBlock #((Ident foo) (Ident bar) (Ident baz))))
];
test(&code.join("\n"), expected);
}
@ -406,7 +435,7 @@ fn code_block_bad_indents1() {
fn code_block_bad_indents2() {
let code = ["main =", " foo", " bar", "baz"];
let expected = block![
(Function main #() "=" (BodyBlock #((Ident foo) (Ident bar))))
(Function (Ident main) #() "=" (BodyBlock #((Ident foo) (Ident bar))))
(Ident baz)
];
test(&code.join("\n"), expected);
@ -416,7 +445,7 @@ fn code_block_bad_indents2() {
fn code_block_with_following_statement() {
let code = ["main =", " foo", "bar"];
let expected = block![
(Function main #() "=" (BodyBlock #((Ident foo))))
(Function (Ident main) #() "=" (BodyBlock #((Ident foo))))
(Ident bar)
];
test(&code.join("\n"), expected);
@ -546,7 +575,7 @@ fn template_functions() {
fn unevaluated_argument() {
let code = ["main ~foo = x"];
let expected = block![
(Function main #(("~" (Ident foo) () ())) "=" (Ident x))
(Function (Ident main) #(("~" (Ident foo) () ())) "=" (Ident x))
];
test(&code.join("\n"), expected);
}
@ -555,7 +584,7 @@ fn unevaluated_argument() {
fn unary_operator_missing_operand() {
let code = ["main ~ = x"];
let expected = block![
(Function main #((() (UnaryOprApp "~" ()) () ())) "=" (Ident x))
(Function (Ident main) #((() (UnaryOprApp "~" ()) () ())) "=" (Ident x))
];
test(&code.join("\n"), expected);
}
@ -727,8 +756,8 @@ fn metadata_parsing() {
#[test]
fn type_signatures() {
let cases = [
("val : Bool", block![(TypeSignature val ":" (Ident Bool))]),
("val : List Int", block![(TypeSignature val ":" (App (Ident List) (Ident Int)))]),
("val : Bool", block![(TypeSignature (Ident val) ":" (Ident Bool))]),
("val : List Int", block![(TypeSignature (Ident val) ":" (App (Ident List) (Ident Int)))]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
}
@ -746,7 +775,8 @@ fn type_annotations() {
("(x : My_Type _)", block![
(Group (TypeAnnotated (Ident x) ":" (App (Ident My_Type) (Wildcard -1))))]),
("x : List Int -> Int", block![
(TypeSignature x ":" (OprApp (App (Ident List) (Ident Int)) (Ok "->") (Ident Int)))]),
(TypeSignature (Ident x) ":"
(OprApp (App (Ident List) (Ident Int)) (Ok "->") (Ident Int)))]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
}
@ -1012,7 +1042,7 @@ fn trailing_whitespace() {
let cases = [
("a ", block![(Ident a) ()]),
("a \n", block![(Ident a) ()]),
("a = \n x", block![(Function a #() "=" (BodyBlock #((Ident x))))]),
("a = \n x", block![(Function (Ident a) #() "=" (BodyBlock #((Ident x))))]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
}
@ -1059,10 +1089,6 @@ fn multiline_annotations() {
// === Test Support ===
// ====================
use enso_metamodel_lexpr::ToSExpr;
use enso_reflect::Reflect;
use std::collections::HashSet;
/// Given a block of input Enso code, test that:
/// - The given code parses to the AST represented by the given S-expression.
/// - The AST pretty-prints back to the original code.
@ -1075,7 +1101,7 @@ use std::collections::HashSet;
/// - Most token types are represented as their contents, rather than as a token struct. For
/// example, a `token::Number` may be represented like: `sexp![10]`, and a `token::Ident` may look
/// like `sexp![foo]`.
fn test(code: &str, expect: Value) {
fn test(code: &str, expect: lexpr::Value) {
let ast = enso_parser::Parser::new().run(code);
let ast_s_expr = to_s_expr(&ast, code);
assert_eq!(ast_s_expr.to_string(), expect.to_string(), "{:?}", &ast);
@ -1084,171 +1110,3 @@ fn test(code: &str, expect: Value) {
let deserialized = enso_parser::serialization::deserialize_tree(&serialized);
deserialized.unwrap();
}
// =====================
// === S-expressions ===
// =====================
/// Produce an S-expression representation of the input AST type.
pub fn to_s_expr<T>(value: &T, code: &str) -> Value
where T: serde::Serialize + Reflect {
use enso_parser::syntax::token;
use enso_parser::syntax::tree;
let (graph, rust_to_meta) = enso_metamodel::rust::to_meta(value.reflect_type());
let ast_ty = rust_to_meta[&value.reflect_type().id];
let base = code.as_bytes().as_ptr() as usize;
let code: Box<str> = Box::from(code);
let mut to_s_expr = ToSExpr::new(&graph);
to_s_expr.mapper(ast_ty, strip_hidden_fields);
let ident_token = rust_to_meta[&token::variant::Ident::reflect().id];
let operator_token = rust_to_meta[&token::variant::Operator::reflect().id];
let open_symbol_token = rust_to_meta[&token::variant::OpenSymbol::reflect().id];
let close_symbol_token = rust_to_meta[&token::variant::CloseSymbol::reflect().id];
let number_token = rust_to_meta[&token::variant::Digits::reflect().id];
let number_base_token = rust_to_meta[&token::variant::NumberBase::reflect().id];
let newline_token = rust_to_meta[&token::variant::Newline::reflect().id];
let text_start_token = rust_to_meta[&token::variant::TextStart::reflect().id];
let text_end_token = rust_to_meta[&token::variant::TextEnd::reflect().id];
let text_section_token = rust_to_meta[&token::variant::TextSection::reflect().id];
let text_escape_token = rust_to_meta[&token::variant::TextEscape::reflect().id];
let wildcard_token = rust_to_meta[&token::variant::Wildcard::reflect().id];
let autoscope_token = rust_to_meta[&token::variant::AutoScope::reflect().id];
// TODO: Implement `#[reflect(flag = "enso::concrete")]`, which just attaches user data to the
// type info; then filter by flag here instead of hard-coding these simplifications.
let token_to_str = move |token: Value| {
let range = token_code_range(&token, base);
code[range].to_owned().into_boxed_str()
};
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(ident_token, move |token| Value::symbol(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(operator_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(text_section_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(number_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str;
to_s_expr.mapper(number_base_token, move |token| Value::string(token_to_str_(token)));
let into_car = |cons| match cons {
Value::Cons(cons) => cons.into_pair().0,
_ => panic!(),
};
let simplify_case = |list| {
let list = strip_hidden_fields(list);
let (_, list) = match list {
Value::Cons(cons) => cons.into_pair(),
_ => panic!(),
};
let (expression, list) = match list {
Value::Cons(cons) => cons.into_pair(),
_ => panic!(),
};
let (_, list) = match list {
Value::Cons(cons) => cons.into_pair(),
_ => panic!(),
};
Value::cons(expression, list)
};
let simplify_escape = |mut list| {
let mut last = None;
while let Value::Cons(cons) = list {
let (car, cdr) = cons.into_pair();
last = Some(car);
list = cdr;
}
last.unwrap()
};
let strip_invalid = |list| {
let Value::Cons(cons) = list else { unreachable!() };
let (car, _) = cons.into_pair();
Value::cons(car, Value::Null)
};
let line = rust_to_meta[&tree::block::Line::reflect().id];
let operator_line = rust_to_meta[&tree::block::OperatorLine::reflect().id];
let case = rust_to_meta[&tree::CaseOf::reflect().id];
let invalid = rust_to_meta[&tree::Invalid::reflect().id];
to_s_expr.mapper(line, into_car);
to_s_expr.mapper(operator_line, into_car);
to_s_expr.mapper(case, simplify_case);
to_s_expr.mapper(invalid, strip_invalid);
to_s_expr.mapper(text_escape_token, simplify_escape);
to_s_expr.skip(newline_token);
to_s_expr.skip(wildcard_token);
to_s_expr.skip(autoscope_token);
to_s_expr.skip(text_start_token);
to_s_expr.skip(text_end_token);
to_s_expr.skip(open_symbol_token);
to_s_expr.skip(close_symbol_token);
tuplify(to_s_expr.value(ast_ty, &value))
}
/// Strip certain fields that should be excluded from output.
fn strip_hidden_fields(tree: Value) -> Value {
let hidden_tree_fields = [
":spanLeftOffsetVisible",
":spanLeftOffsetCodeReprBegin",
":spanLeftOffsetCodeReprLen",
":spanLeftOffsetCodeUtf16",
":spanCodeLengthUtf8",
":spanCodeLengthUtf16",
];
let hidden_tree_fields: HashSet<_> = hidden_tree_fields.into_iter().collect();
Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val {
Value::Cons(cons) => match cons.car() {
Value::Symbol(symbol) => !hidden_tree_fields.contains(symbol.as_ref()),
_ => panic!(),
},
_ => true,
}))
}
/// Given an S-expression representation of a [`Token`] and the base address for `Code` `Cow`s,
/// return the range of the input code the token references.
fn token_code_range(token: &Value, base: usize) -> std::ops::Range<usize> {
let get_u32 =
|field| fields(token).find(|(name, _)| *name == field).unwrap().1.as_u64().unwrap() as u32;
let begin = get_u32(":codeReprBegin");
let len = get_u32(":codeReprLen");
let begin = (begin as u64) | (base as u64 & !0xFFFF_FFFF);
let begin = if begin < (base as u64) { begin + 0x1_0000_0000 } else { begin };
let begin = begin as usize - base;
let len = len as usize;
begin..(begin + len)
}
/// Iterate the field `(name, value)` pairs of the S-expression of a struct with named fields.
fn fields(value: &'_ Value) -> impl Iterator<Item = (&'_ str, &'_ Value)> {
value.list_iter().unwrap().filter_map(|value| match value {
Value::Cons(cons) => match cons.car() {
Value::Symbol(symbol) => Some((&symbol[..], cons.cdr())),
_ => None,
},
_ => None,
})
}
/// Strip field names from struct representations, so that they are printed more concisely, as if
/// they were tuple-structs.
fn tuplify(value: Value) -> Value {
let (car, cdr) = match value {
Value::Cons(cons) => cons.into_pair(),
Value::Vector(mut vector) => {
for value in vector.iter_mut() {
let original = std::mem::replace(value, Value::Nil);
*value = tuplify(original);
}
return Value::Vector(vector);
}
value => return value,
};
if let Value::Symbol(symbol) = &car {
if let Some(':') = symbol.chars().next() {
return tuplify(cdr);
}
}
let car = tuplify(car);
let cdr = tuplify(cdr);
Value::Cons(lexpr::Cons::new(car, cdr))
}

View File

@ -672,7 +672,8 @@ fn analyze_operator(token: &str) -> token::OperatorProperties {
.with_binary_infix_precedence(1)
.as_compile_time_operation()
.as_sequence(),
"." => return operator.with_binary_infix_precedence(21).with_decimal_interpretation(),
"." =>
return operator.with_binary_infix_precedence(21).with_decimal_interpretation().as_dot(),
_ => (),
}
// "The precedence of all other operators is determined by the operator's Precedence Character:"

View File

@ -210,17 +210,12 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
use syntax::tree::*;
let mut left_offset = source::span::Offset::default();
if let Tree { variant: box Variant::TypeAnnotated(annotated), span } = tree {
if let Tree { variant: box Variant::Ident(ident), span: _ } = annotated.expression {
let operator = annotated.operator;
let type_ = annotated.type_;
let variable = ident.token;
let variant = TypeSignature { variable, operator, type_ };
let variant = Box::new(Variant::TypeSignature(variant));
return Tree { variant, span };
}
let err = Error::new("Expected identifier in left-hand operand of type signature.");
let variant = Box::new(Variant::TypeAnnotated(annotated));
return Tree::invalid(err, Tree { variant, span });
let operator = annotated.operator;
let type_ = annotated.type_;
let variable = annotated.expression;
let mut tree = Tree::type_signature(variable, operator, type_);
tree.span.left_offset += span.left_offset;
return tree;
}
let tree_ = &mut tree;
let opr_app = match tree_ {
@ -231,7 +226,7 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
_ => return tree,
};
if let OprApp { lhs: Some(lhs), opr: Ok(opr), rhs } = opr_app && opr.properties.is_assignment() {
let (mut leftmost, args) = collect_arguments(lhs.clone());
let (leftmost, args) = collect_arguments(lhs.clone());
if let Some(rhs) = rhs {
if let Variant::Ident(ident) = &*leftmost.variant && ident.token.variant.is_type {
// If the LHS is a type, this is a (destructuring) assignment.
@ -248,10 +243,10 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
return result;
}
}
if let Variant::Ident(Ident { token }) = &mut *leftmost.variant {
if is_qualified_name(&leftmost) {
// If this is not a variable assignment, and the leftmost leaf of the `App` tree is
// an identifier, this is a function definition.
let mut result = Tree::function(mem::take(token), args, mem::take(opr), mem::take(rhs));
// a qualified name, this is a function definition.
let mut result = Tree::function(leftmost, args, mem::take(opr), mem::take(rhs));
result.span.left_offset += left_offset;
return result;
}
@ -259,6 +254,17 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
tree
}
fn is_qualified_name(tree: &syntax::Tree) -> bool {
use syntax::tree::*;
match &*tree.variant {
Variant::Ident(_) => true,
Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) })
if matches!(&*rhs.variant, Variant::Ident(_)) && opr.properties.is_dot() =>
is_qualified_name(lhs),
_ => false,
}
}
fn expression_to_type(mut input: syntax::Tree<'_>) -> syntax::Tree<'_> {
use syntax::tree::*;
if let Variant::Wildcard(wildcard) = &mut *input.variant {

View File

@ -131,12 +131,38 @@ fn export_body(segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
/// If-then-else macro definition.
pub fn if_then_else<'s>() -> Definition<'s> {
crate::macro_definition! {("if", everything(), "then", everything(), "else", everything())}
crate::macro_definition! {
("if", everything(), "then", everything(), "else", everything()) if_body}
}
/// If-then macro definition.
pub fn if_then<'s>() -> Definition<'s> {
crate::macro_definition! {("if", everything(), "then", everything())}
crate::macro_definition! {("if", everything(), "then", everything()) if_body}
}
fn if_body(segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
use syntax::tree::*;
let segments = segments.mapped(|s| {
let header = s.header;
let body = s.result.tokens();
let body = match operator::resolve_operator_precedence_if_non_empty(body) {
Some(Tree {
variant:
box Variant::ArgumentBlockApplication(ArgumentBlockApplication {
lhs: None,
arguments,
}),
span,
}) => {
let mut block = block::body_from_lines(arguments);
block.span.left_offset += span.left_offset;
Some(block)
}
e => e,
};
MultiSegmentAppSegment { header, body }
});
Tree::multi_segment_app(segments)
}
/// Group macro definition.
@ -339,12 +365,12 @@ fn case_body(segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
match body.variant {
box Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs, arguments }) => {
if let Some(lhs) = lhs {
case_lines.push(CaseLine { case: Some(lhs.into()), ..default() });
case_lines.push(CaseLine { case: Some(parse_case(lhs)), ..default() });
}
case_lines.extend(arguments.into_iter().map(
|block::Line { newline, expression }| CaseLine {
newline: newline.into(),
case: expression.map(Case::from),
case: expression.map(parse_case),
},
));
if let Some(left_offset) =
@ -353,12 +379,25 @@ fn case_body(segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
*left_offset += body.span.left_offset;
}
}
_ => case_lines.push(CaseLine { case: Some(body.into()), ..default() }),
_ => case_lines.push(CaseLine { case: Some(parse_case(body)), ..default() }),
}
}
Tree::case_of(case_, expression, of_, case_lines)
}
fn parse_case(tree: syntax::tree::Tree) -> syntax::tree::Case {
use syntax::tree::*;
match tree.variant {
box Variant::OprApp(OprApp { lhs, opr: Ok(opr), rhs }) if opr.properties.is_arrow() => {
let pattern = lhs.map(crate::expression_to_pattern);
let mut case = Case { pattern, arrow: opr.into(), expression: rhs };
*case.left_offset_mut().unwrap() += tree.span.left_offset;
case
}
_ => Case { expression: tree.into(), ..default() },
}
}
/// Array literal.
pub fn array<'s>() -> Definition<'s> {
crate::macro_definition! {("[", everything(), "]", nothing()) array_body}

View File

@ -3,10 +3,12 @@
#![recursion_limit = "256"]
// === Features ===
#![allow(incomplete_features)]
#![feature(assert_matches)]
#![feature(allocator_api)]
#![feature(exact_size_is_empty)]
#![feature(test)]
#![feature(specialization)]
#![feature(let_chains)]
#![feature(if_let_guard)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
@ -60,21 +62,25 @@ fn check_file(path: &str, mut code: &str) {
});
for (error, span) in &*errors.borrow() {
let whitespace = &span.left_offset.code.repr;
let start = whitespace.as_ptr() as usize + whitespace.len() - code.as_ptr() as usize;
let mut line = 1;
let mut char = 0;
for (i, c) in code.char_indices() {
if i >= start {
break;
if matches!(whitespace, Cow::Borrowed(_)) {
let start = whitespace.as_ptr() as usize + whitespace.len() - code.as_ptr() as usize;
let mut line = 1;
let mut char = 0;
for (i, c) in code.char_indices() {
if i >= start {
break;
}
if c == '\n' {
line += 1;
char = 0;
} else {
char += 1;
}
}
if c == '\n' {
line += 1;
char = 0;
} else {
char += 1;
}
}
eprintln!("{path}:{line}:{char}: {}", &error.error.message);
eprintln!("{path}:{line}:{char}: {}", &error.error.message);
} else {
eprintln!("{path}:?:?: {}", &error.error.message);
};
}
for (parsed, original) in ast.code().lines().zip(code.lines()) {
assert_eq!(parsed, original, "Bug: dropped tokens, while parsing: {}", path);

View File

@ -338,6 +338,7 @@ pub struct OperatorProperties {
is_sequence: bool,
is_suspension: bool,
is_annotation: bool,
is_dot: bool,
}
impl OperatorProperties {
@ -403,6 +404,11 @@ impl OperatorProperties {
Self { is_suspension: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as the dot operator.
pub fn as_dot(self) -> Self {
Self { is_dot: true, ..self }
}
/// Return a copy of this operator, modified to allow an interpretion as a decmial point.
pub fn with_decimal_interpretation(self) -> Self {
Self { can_be_decimal_operator: true, ..self }
@ -458,6 +464,11 @@ impl OperatorProperties {
self.is_annotation
}
/// Return whether this operator is the dot operator.
pub fn is_dot(&self) -> bool {
self.is_dot
}
/// Return this operator's associativity.
pub fn associativity(&self) -> Associativity {
match self.is_right_associative {

View File

@ -4,7 +4,6 @@ use crate::prelude::*;
use crate::source::*;
use crate::syntax::*;
use crate::expression_to_pattern;
use crate::span_builder;
use enso_parser_syntax_tree_visitor::Visitor;
@ -218,8 +217,8 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
},
/// A function definition, like `add x y = x + y`.
Function {
/// The identifier to which the function should be bound.
pub name: token::Ident<'s>,
/// The (qualified) name to which the function should be bound.
pub name: Tree<'s>,
/// The argument patterns.
pub args: Vec<ArgumentDefinition<'s>>,
/// The `=` token.
@ -255,13 +254,13 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
},
/// Statement declaring the type of a variable.
TypeSignature {
/// The variable whose type is being declared.
pub variable: token::Ident<'s>,
/// (Qualified) name of the item whose type is being declared.
pub variable: Tree<'s>,
/// The `:` token.
pub operator: token::Operator<'s>,
/// The variable's type.
#[reflect(rename = "type")]
pub type_: Tree<'s>,
pub type_: Tree<'s>,
},
/// An expression with explicit type information attached.
TypeAnnotated {
@ -626,20 +625,6 @@ impl<'s> span::Builder<'s> for Case<'s> {
}
}
impl<'s> From<Tree<'s>> for Case<'s> {
fn from(tree: Tree<'s>) -> Self {
match tree.variant {
box Variant::OprApp(OprApp { lhs, opr: Ok(opr), rhs }) if opr.properties.is_arrow() => {
let pattern = lhs.map(expression_to_pattern);
let mut case = Case { pattern, arrow: opr.into(), expression: rhs };
*case.left_offset_mut().unwrap() += tree.span.left_offset;
case
}
_ => Case { expression: tree.into(), ..default() },
}
}
}
// === OprApp ===
@ -976,12 +961,10 @@ pub fn recurse_left_mut_while<'s>(
| Variant::UnaryOprApp(_)
| Variant::MultiSegmentApp(_)
| Variant::TypeDef(_)
| Variant::Function(_)
| Variant::Import(_)
| Variant::Export(_)
| Variant::Group(_)
| Variant::CaseOf(_)
| Variant::TypeSignature(_)
| Variant::Lambda(_)
| Variant::Array(_)
| Variant::Annotated(_)
@ -1002,6 +985,8 @@ pub fn recurse_left_mut_while<'s>(
| Variant::TemplateFunction(TemplateFunction { ast: lhs, .. })
| Variant::DefaultApp(DefaultApp { func: lhs, .. })
| Variant::Assignment(Assignment { pattern: lhs, .. })
| Variant::TypeSignature(TypeSignature { variable: lhs, .. })
| Variant::Function(Function { name: lhs, .. })
| Variant::TypeAnnotated(TypeAnnotated { expression: lhs, .. }) => lhs,
}
}