diff --git a/Cargo.lock b/Cargo.lock index e8932507427..e5688693491 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2212,6 +2212,18 @@ dependencies = [ "uuid 1.1.2", ] +[[package]] +name = "enso-parser-debug" +version = "0.1.0" +dependencies = [ + "enso-metamodel", + "enso-metamodel-lexpr", + "enso-parser", + "enso-reflect", + "lexpr", + "serde", +] + [[package]] name = "enso-parser-generate-java" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 867be911a79..2f8ef6ec978 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "lib/rust/parser/src/syntax/tree/visitor", "lib/rust/parser/jni", "lib/rust/parser/generate-java", + "lib/rust/parser/debug", "lib/rust/profiler/data", "lib/rust/profiler/demo-data", "integration-test", diff --git a/engine/runtime/src/main/java/org/enso/compiler/TreeToIr.java b/engine/runtime/src/main/java/org/enso/compiler/TreeToIr.java index 11a0900e2e5..7eb01aea390 100644 --- a/engine/runtime/src/main/java/org/enso/compiler/TreeToIr.java +++ b/engine/runtime/src/main/java/org/enso/compiler/TreeToIr.java @@ -294,7 +294,7 @@ final class TreeToIr { ); } case Tree.Function fn -> { - var nameId = buildName(fn, fn.getName()); + var nameId = buildName(fn.getName()); /* case AstView.MethodDefinition(targetPath, name, args, definition) => @@ -386,7 +386,7 @@ final class TreeToIr { */ case Tree.TypeSignature sig -> { // case AstView.TypeAscription(typed, sig) => - var methodName = buildName(sig, sig.getVariable()); + var methodName = buildName(sig.getVariable()); var methodReference = new IR$Name$MethodReference( Option.empty(), methodName, @@ -457,7 +457,7 @@ final class TreeToIr { yield null; } case Tree.TypeSignature sig -> { - var typeName = buildName(sig, sig.getVariable(), false); + var typeName = buildName(sig.getVariable()); var fn = switch (sig.getType()) { case Tree.OprApp app when "->".equals(app.getOpr().getRight().codeRepr()) -> { @@ -486,7 +486,7 @@ final class TreeToIr { yield new IR$Type$Ascription(typeName, fn, getIdentifiedLocation(sig), meta(), diag()); } case Tree.Function fun -> { - var name = buildName(fun, fun.getName(), false); + var name = buildName(fun.getName()); var args = translateArgumentsDefinition(fun.getArgs()); var body = translateExpression(fun.getBody(), false); @@ -685,31 +685,6 @@ final class TreeToIr { yield new IR$Function$Lambda(args, body, getIdentifiedLocation(tree), true, meta(), diag()); } } - case "=" -> { - var ap = app.getLhs(); - List args = nil(); - while (ap instanceof Tree.App leftApp) { - var isSuspended = false; - var a = new IR$DefinitionArgument$Specified( - buildName(leftApp.getArg()), - Option.empty(), - Option.empty(), - isSuspended, - getIdentifiedLocation(leftApp), - meta(), - diag() - ); - args = cons(a, args); - ap = leftApp.getFunc(); - } - var name = buildName(ap); - var lhs = translateCallArgument(app.getLhs(), insideTypeSignature); - var rhs = translateExpression(app.getRhs(), insideTypeSignature); - yield new IR$Function$Binding( - (IR.Name)name, args, rhs, - getIdentifiedLocation(app), true, meta(), diag() - ); - } default -> { var lhs = translateCallArgument(app.getLhs(), insideTypeSignature); var rhs = translateCallArgument(app.getRhs(), insideTypeSignature); @@ -876,7 +851,7 @@ final class TreeToIr { case IR.Expression e -> e; }; case Tree.TypeSignature sig -> { - var methodName = buildName(sig, sig.getVariable()); + var methodName = buildName(sig.getVariable()); var methodReference = new IR$CallArgument$Specified( Option.empty(), methodName, @@ -1848,7 +1823,7 @@ final class TreeToIr { } private IR$Name$Literal buildName(Tree ident) { return switch (ident) { - case Tree.Ident id -> buildName(id.getToken()); + case Tree.Ident id -> buildName(ident, id.getToken(), false); default -> throw new UnhandledEntity(ident, "buildName"); }; } diff --git a/engine/runtime/src/test/java/org/enso/compiler/EnsoCompilerTest.java b/engine/runtime/src/test/java/org/enso/compiler/EnsoCompilerTest.java index d37acece228..f5dcc413503 100644 --- a/engine/runtime/src/test/java/org/enso/compiler/EnsoCompilerTest.java +++ b/engine/runtime/src/test/java/org/enso/compiler/EnsoCompilerTest.java @@ -506,18 +506,20 @@ public class EnsoCompilerTest { } @Test + @Ignore public void testTypeSignatureQualified() throws Exception { parseTest(""" type Baz - resolve : Integer -> Column + Foo.resolve : Integer -> Column """); } @Test + @Ignore public void testMethodDefQualified() throws Exception { parseTest(""" type Foo - id x = x + Identity.id x = x """); } diff --git a/lib/rust/parser/debug/Cargo.toml b/lib/rust/parser/debug/Cargo.toml new file mode 100644 index 00000000000..ba073126fdb --- /dev/null +++ b/lib/rust/parser/debug/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "enso-parser-debug" +version = "0.1.0" +authors = ["Enso Team "] +edition = "2021" +description = "Enso parser debugging tools." +readme = "README.md" +homepage = "https://github.com/enso-org/enso" +repository = "https://github.com/enso-org/enso" +license-file = "../../LICENSE" + +[dependencies] +enso-parser = { path = "../" } +enso-metamodel = { path = "../../metamodel", features = ["rust"] } +enso-metamodel-lexpr = { path = "../../metamodel/lexpr" } +enso-reflect = { path = "../../reflect" } +lexpr = "0.2.6" +serde = { version = "1.0", features = ["derive"] } diff --git a/lib/rust/parser/debug/src/lib.rs b/lib/rust/parser/debug/src/lib.rs new file mode 100644 index 00000000000..0213380edde --- /dev/null +++ b/lib/rust/parser/debug/src/lib.rs @@ -0,0 +1,185 @@ +//! Debugging utilities for the parser. + +// === Features === +#![feature(exact_size_is_empty)] +#![feature(let_chains)] +#![feature(if_let_guard)] +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +#![allow(clippy::bool_to_int_with_if)] +#![allow(clippy::let_and_return)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + +use enso_metamodel_lexpr::ToSExpr; +use enso_reflect::Reflect; +use lexpr::Value; +use std::collections::HashSet; + + + +// ===================== +// === S-expressions === +// ===================== + +/// Produce an S-expression representation of the input AST type. +pub fn to_s_expr(value: &T, code: &str) -> Value +where T: serde::Serialize + Reflect { + use enso_parser::syntax::token::variant::*; + use enso_parser::syntax::tree; + let (graph, rust_to_meta) = enso_metamodel::rust::to_meta(value.reflect_type()); + let ast_ty = rust_to_meta[&value.reflect_type().id]; + let base = code.as_bytes().as_ptr() as usize; + let code: Box = Box::from(code); + let mut to_s_expr = ToSExpr::new(&graph); + to_s_expr.mapper(ast_ty, strip_hidden_fields); + let stringish_tokens = + vec![Digits::reflect(), NumberBase::reflect(), Operator::reflect(), TextSection::reflect()]; + let stringish_tokens = stringish_tokens.into_iter().map(|t| rust_to_meta[&t.id]); + let skip_tokens = vec![ + AutoScope::reflect(), + CloseSymbol::reflect(), + Newline::reflect(), + OpenSymbol::reflect(), + TextEnd::reflect(), + TextStart::reflect(), + Wildcard::reflect(), + ]; + skip_tokens.into_iter().for_each(|token| to_s_expr.skip(rust_to_meta[&token.id])); + let ident_token = rust_to_meta[&Ident::reflect().id]; + let text_escape_token = rust_to_meta[&TextEscape::reflect().id]; + let token_to_str = move |token: Value| { + let range = token_code_range(&token, base); + code[range].to_owned().into_boxed_str() + }; + let token_to_str_ = token_to_str.clone(); + to_s_expr.mapper(ident_token, move |token| Value::symbol(token_to_str_(token))); + for token in stringish_tokens { + let token_to_str_ = token_to_str.clone(); + to_s_expr.mapper(token, move |token| Value::string(token_to_str_(token))); + } + let into_car = |cons| match cons { + Value::Cons(cons) => cons.into_pair().0, + _ => panic!(), + }; + let simplify_case = |list| { + let list = strip_hidden_fields(list); + let (_, list) = match list { + Value::Cons(cons) => cons.into_pair(), + _ => panic!(), + }; + let (expression, list) = match list { + Value::Cons(cons) => cons.into_pair(), + _ => panic!(), + }; + let (_, list) = match list { + Value::Cons(cons) => cons.into_pair(), + _ => panic!(), + }; + Value::cons(expression, list) + }; + let simplify_escape = |mut list| { + let mut last = None; + while let Value::Cons(cons) = list { + let (car, cdr) = cons.into_pair(); + last = Some(car); + list = cdr; + } + last.unwrap() + }; + let strip_invalid = |list| { + let Value::Cons(cons) = list else { unreachable!() }; + let (car, _) = cons.into_pair(); + Value::cons(car, Value::Null) + }; + let line = rust_to_meta[&tree::block::Line::reflect().id]; + let operator_line = rust_to_meta[&tree::block::OperatorLine::reflect().id]; + let case = rust_to_meta[&tree::CaseOf::reflect().id]; + let invalid = rust_to_meta[&tree::Invalid::reflect().id]; + to_s_expr.mapper(line, into_car); + to_s_expr.mapper(operator_line, into_car); + to_s_expr.mapper(case, simplify_case); + to_s_expr.mapper(invalid, strip_invalid); + to_s_expr.mapper(text_escape_token, simplify_escape); + tuplify(to_s_expr.value(ast_ty, &value)) +} + +/// Strip fields that are not useful to a human reader, like source-code offsets. +fn strip_hidden_fields(tree: Value) -> Value { + let hidden_tree_fields = [ + ":spanLeftOffsetVisible", + ":spanLeftOffsetCodeReprBegin", + ":spanLeftOffsetCodeReprLen", + ":spanLeftOffsetCodeUtf16", + ":spanCodeLengthUtf8", + ":spanCodeLengthUtf16", + ]; + let hidden_tree_fields: HashSet<_> = hidden_tree_fields.into_iter().collect(); + Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val { + Value::Cons(cons) => match cons.car() { + Value::Symbol(symbol) => !hidden_tree_fields.contains(symbol.as_ref()), + _ => panic!(), + }, + _ => true, + })) +} + +/// Given an S-expression representation of a [`Token`] and the base address for `Code` `Cow`s, +/// return the range of the input code the token references. +fn token_code_range(token: &Value, base: usize) -> std::ops::Range { + let get_u32 = + |field| fields(token).find(|(name, _)| *name == field).unwrap().1.as_u64().unwrap() as u32; + let begin = get_u32(":codeReprBegin"); + let len = get_u32(":codeReprLen"); + let begin = (begin as u64) | (base as u64 & !(u32::MAX as u64)); + let begin = if begin < (base as u64) { begin + 1 << 32 } else { begin }; + let begin = begin as usize - base; + let len = len as usize; + begin..(begin + len) +} + +/// Iterate the field `(name, value)` pairs of the S-expression of a struct with named fields. +fn fields(value: &'_ Value) -> impl Iterator { + value.list_iter().unwrap().filter_map(|value| match value { + Value::Cons(cons) => match cons.car() { + Value::Symbol(symbol) => Some((&symbol[..], cons.cdr())), + _ => None, + }, + _ => None, + }) +} + +/// Strip field names from struct representations, so that they are printed more concisely, as if +/// they were tuple-structs. +fn tuplify(value: Value) -> Value { + let (car, cdr) = match value { + Value::Cons(cons) => cons.into_pair(), + Value::Vector(mut vector) => { + for value in vector.iter_mut() { + let original = std::mem::replace(value, Value::Nil); + *value = tuplify(original); + } + return Value::Vector(vector); + } + value => return value, + }; + if let Value::Symbol(symbol) = &car { + if let Some(':') = symbol.chars().next() { + return tuplify(cdr); + } + } + let car = tuplify(car); + let cdr = tuplify(cdr); + Value::Cons(lexpr::Cons::new(car, cdr)) +} diff --git a/lib/rust/parser/debug/src/main.rs b/lib/rust/parser/debug/src/main.rs new file mode 100644 index 00000000000..8ad50177d35 --- /dev/null +++ b/lib/rust/parser/debug/src/main.rs @@ -0,0 +1,48 @@ +//! Show debug-representation of AST of input sources. + +// === Features === +#![feature(exact_size_is_empty)] +#![feature(let_chains)] +#![feature(if_let_guard)] +// === Standard Linter Configuration === +#![deny(non_ascii_idents)] +#![warn(unsafe_code)] +#![allow(clippy::bool_to_int_with_if)] +#![allow(clippy::let_and_return)] +// === Non-Standard Linter Configuration === +#![allow(clippy::option_map_unit_fn)] +#![allow(clippy::precedence)] +#![allow(dead_code)] +#![deny(unconditional_recursion)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + + + +// =================== +// === Debug-parse === +// =================== + +fn main() { + use std::io::Read; + let mut input = String::new(); + std::io::stdin().read_to_string(&mut input).unwrap(); + check_file("", input.as_str()); +} + +fn check_file(path: &str, mut code: &str) { + if let Some((_meta, code_)) = enso_parser::metadata::parse(code) { + code = code_; + } + let ast = enso_parser::Parser::new().run(code); + for (parsed, original) in ast.code().lines().zip(code.lines()) { + assert_eq!(parsed, original, "Bug: dropped tokens, while parsing: {}", path); + } + let s_expr = enso_parser_debug::to_s_expr(&ast, code); + println!("{}", s_expr); +} diff --git a/lib/rust/parser/tests/metadata/mod.rs b/lib/rust/parser/debug/tests/metadata/mod.rs similarity index 100% rename from lib/rust/parser/tests/metadata/mod.rs rename to lib/rust/parser/debug/tests/metadata/mod.rs diff --git a/lib/rust/parser/tests/parse.rs b/lib/rust/parser/debug/tests/parse.rs similarity index 75% rename from lib/rust/parser/tests/parse.rs rename to lib/rust/parser/debug/tests/parse.rs index 0737ca2984e..63c967b80a6 100644 --- a/lib/rust/parser/tests/parse.rs +++ b/lib/rust/parser/debug/tests/parse.rs @@ -19,8 +19,7 @@ mod metadata; -use lexpr::sexp; -use lexpr::Value; +use enso_parser_debug::to_s_expr; @@ -31,7 +30,7 @@ use lexpr::Value; /// Parses input as a sequence of S-expressions, and wraps it in a `BodyBlock`. macro_rules! block { ( $($statements:tt)* ) => { - sexp![(BodyBlock #( $( $statements )* ) )] + lexpr::sexp![(BodyBlock #( $( $statements )* ) )] } } @@ -77,6 +76,31 @@ fn comments() { test("# a b c", block![()()]); } +#[test] +fn inline_if() { + #[rustfmt::skip] + test("if True then True else False", block![ + (MultiSegmentApp #(((Ident if) (Ident True)) + ((Ident then) (Ident True)) + ((Ident else) (Ident False))))]); +} + +#[test] +fn then_block() { + #[rustfmt::skip] + test("if True then\n True", block![ + (MultiSegmentApp #(((Ident if) (Ident True)) ((Ident then) (BodyBlock #((Ident True))))))]); +} + +#[test] +fn else_block() { + #[rustfmt::skip] + test("if True then True else\n False", block![ + (MultiSegmentApp #(((Ident if) (Ident True)) + ((Ident then) (Ident True)) + ((Ident else) (BodyBlock #((Ident False))))))]); +} + // === Type Definitions === @@ -122,8 +146,9 @@ fn type_methods() { #[rustfmt::skip] let expected = block![ (TypeDef type Geo #() #() - #((Function number #() "=" (BodyBlock #((Ident x)))) - (Function area #((() (Ident self) () ())) "=" (OprApp (Ident x) (Ok "+") (Ident x))))) + #((Function (Ident number) #() "=" (BodyBlock #((Ident x)))) + (Function (Ident area) #((() (Ident self) () ())) "=" + (OprApp (Ident x) (Ok "+") (Ident x))))) ]; test(&code.join("\n"), expected); } @@ -139,9 +164,9 @@ fn type_operator_methods() { #[rustfmt::skip] let expected = block![ (TypeDef type Foo #() #() - #((TypeSignature #"+" ":" + #((TypeSignature (Ident #"+") ":" (OprApp (Ident Foo) (Ok "->") (OprApp (Ident Foo) (Ok "->") (Ident Foo)))) - (Function #"+" #((() (Ident self) () ()) (() (Ident b) () ())) + (Function (Ident #"+") #((() (Ident self) () ()) (() (Ident b) () ())) "=" (Ident b))))]; test(&code.join("\n"), expected); } @@ -169,8 +194,8 @@ fn type_def_full() { ((Rectangle #((() (Ident width) () ()) (() (Ident height) () ())) #())) ((Point #() #())) (())) - #((Function number #() "=" (BodyBlock #((Ident x)))) - (Function area #((() (Ident self) () ())) "=" (OprApp (Ident x) (Ok "+") (Ident x))))) + #((Function (Ident number) #() "=" (BodyBlock #((Ident x)))) + (Function (Ident area) #((() (Ident self) () ())) "=" (OprApp (Ident x) (Ok "+") (Ident x))))) ]; test(&code.join("\n"), expected); } @@ -219,37 +244,41 @@ fn assignment_simple() { #[test] fn function_inline_simple_args() { - test(" foo a = x", block![(Function foo #((() (Ident a) () ())) "=" (Ident x))]); + test(" foo a = x", block![(Function (Ident foo) #((() (Ident a) () ())) "=" (Ident x))]); #[rustfmt::skip] test("foo a b = x", - block![(Function foo #((() (Ident a) () ()) (() (Ident b) () ())) "=" (Ident x))]); + block![(Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ())) "=" (Ident x))]); #[rustfmt::skip] test( "foo a b c = x", block![ - (Function foo + (Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ())) "=" (Ident x))], ); - test(" foo _ = x", block![(Function foo #((() (Wildcard -1) () ())) "=" (Ident x))]); + test(" foo _ = x", block![(Function (Ident foo) #((() (Wildcard -1) () ())) "=" (Ident x))]); } #[test] fn function_block_noargs() { - test("foo =", block![(Function foo #() "=" ())]); + test("foo =", block![(Function (Ident foo) #() "=" ())]); } #[test] fn function_block_simple_args() { - test("foo a =", block![(Function foo #((() (Ident a) () ())) "=" ())]); - test("foo a b =", block![(Function foo #((() (Ident a) () ()) - (() (Ident b) () ())) "=" ())]); + test("foo a =", block![(Function (Ident foo) #((() (Ident a) () ())) "=" ())]); #[rustfmt::skip] - test( - "foo a b c =", block![ - (Function foo - #((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ())) - "=" ())], - ); + test("foo a b =", block![(Function (Ident foo) #((() (Ident a) () ()) + (() (Ident b) () ())) "=" ())]); + #[rustfmt::skip] + test("foo a b c =", block![ + (Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ())) "=" + ())]); +} + +#[test] +fn function_qualified() { + test("Id.id x = x", block![ + (Function (OprApp (Ident Id) (Ok ".") (Ident id)) #((() (Ident x) () ())) "=" (Ident x))]); } @@ -277,17 +306,17 @@ fn default_app() { fn default_arguments() { #[rustfmt::skip] let cases = [ - ("f x=1 = x", - block![(Function f #((() (Ident x) () ("=" (Number () "1" ())))) "=" (Ident x))]), - ("f (x = 1) = x", - block![(Function f #((() (Ident x) () ("=" (Number () "1" ())))) "=" (Ident x))]), + ("f x=1 = x", block![ + (Function (Ident f) #((() (Ident x) () ("=" (Number () "1" ())))) "=" (Ident x))]), + ("f (x = 1) = x", block![ + (Function (Ident f) #((() (Ident x) () ("=" (Number () "1" ())))) "=" (Ident x))]), // Pattern in LHS: ("f ~x=1 = x", block![ - (Function f + (Function (Ident f) #(("~" (Ident x) () ("=" (Number () "1" ())))) "=" (Ident x))]), ("f (~x = 1) = x", block![ - (Function f + (Function (Ident f) #(("~" (Ident x) () ("=" (Number () "1" ())))) "=" (Ident x))]), ]; @@ -300,15 +329,15 @@ fn default_arguments() { #[test] fn code_block_body() { let code = ["main =", " x"]; - test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #((Ident x))))]); + test(&code.join("\n"), block![(Function (Ident main) #() "=" (BodyBlock #((Ident x))))]); let code = ["main =", " ", " x"]; - test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Ident x))))]); + test(&code.join("\n"), block![(Function (Ident main) #() "=" (BodyBlock #(() (Ident x))))]); let code = ["main =", " ", " x"]; - test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Ident x))))]); + test(&code.join("\n"), block![(Function (Ident main) #() "=" (BodyBlock #(() (Ident x))))]); let code = ["main =", " ", " x"]; - test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Ident x))))]); + test(&code.join("\n"), block![(Function (Ident main) #() "=" (BodyBlock #(() (Ident x))))]); let code = ["main =", "", " x"]; - test(&code.join("\n"), block![(Function main #() "=" (BodyBlock #(() (Ident x))))]); + test(&code.join("\n"), block![(Function (Ident main) #() "=" (BodyBlock #(() (Ident x))))]); #[rustfmt::skip] let code = [ @@ -318,7 +347,7 @@ fn code_block_body() { ]; #[rustfmt::skip] let expect = block![ - (Function main #() "=" (BodyBlock #( + (Function (Ident main) #() "=" (BodyBlock #( (OprSectionBoundary 1 (OprApp () (Ok "+") (Ident x))) (App (Ident print) (Ident x))))) ]; @@ -386,18 +415,18 @@ fn code_block_empty() { // No input would parse as an empty `ArgumentBlock` or `OperatorBlock`, because those types are // distinguished from a body continuation by the presence of non-empty indented lines. let code = ["foo =", "bar"]; - test(&code.join("\n"), block![(Function foo #() "=" ()) (Ident bar)]); + test(&code.join("\n"), block![(Function (Ident foo) #() "=" ()) (Ident bar)]); // This parses similarly to above; a line with no non-whitespace content does not create a code // block. let code = ["foo =", " ", "bar"]; - test(&code.join("\n"), block![(Function foo #() "=" ()) () (Ident bar)]); + test(&code.join("\n"), block![(Function (Ident foo) #() "=" ()) () (Ident bar)]); } #[test] fn code_block_bad_indents1() { let code = ["main =", " foo", " bar", " baz"]; let expected = block![ - (Function main #() "=" (BodyBlock #((Ident foo) (Ident bar) (Ident baz)))) + (Function (Ident main) #() "=" (BodyBlock #((Ident foo) (Ident bar) (Ident baz)))) ]; test(&code.join("\n"), expected); } @@ -406,7 +435,7 @@ fn code_block_bad_indents1() { fn code_block_bad_indents2() { let code = ["main =", " foo", " bar", "baz"]; let expected = block![ - (Function main #() "=" (BodyBlock #((Ident foo) (Ident bar)))) + (Function (Ident main) #() "=" (BodyBlock #((Ident foo) (Ident bar)))) (Ident baz) ]; test(&code.join("\n"), expected); @@ -416,7 +445,7 @@ fn code_block_bad_indents2() { fn code_block_with_following_statement() { let code = ["main =", " foo", "bar"]; let expected = block![ - (Function main #() "=" (BodyBlock #((Ident foo)))) + (Function (Ident main) #() "=" (BodyBlock #((Ident foo)))) (Ident bar) ]; test(&code.join("\n"), expected); @@ -546,7 +575,7 @@ fn template_functions() { fn unevaluated_argument() { let code = ["main ~foo = x"]; let expected = block![ - (Function main #(("~" (Ident foo) () ())) "=" (Ident x)) + (Function (Ident main) #(("~" (Ident foo) () ())) "=" (Ident x)) ]; test(&code.join("\n"), expected); } @@ -555,7 +584,7 @@ fn unevaluated_argument() { fn unary_operator_missing_operand() { let code = ["main ~ = x"]; let expected = block![ - (Function main #((() (UnaryOprApp "~" ()) () ())) "=" (Ident x)) + (Function (Ident main) #((() (UnaryOprApp "~" ()) () ())) "=" (Ident x)) ]; test(&code.join("\n"), expected); } @@ -727,8 +756,8 @@ fn metadata_parsing() { #[test] fn type_signatures() { let cases = [ - ("val : Bool", block![(TypeSignature val ":" (Ident Bool))]), - ("val : List Int", block![(TypeSignature val ":" (App (Ident List) (Ident Int)))]), + ("val : Bool", block![(TypeSignature (Ident val) ":" (Ident Bool))]), + ("val : List Int", block![(TypeSignature (Ident val) ":" (App (Ident List) (Ident Int)))]), ]; cases.into_iter().for_each(|(code, expected)| test(code, expected)); } @@ -746,7 +775,8 @@ fn type_annotations() { ("(x : My_Type _)", block![ (Group (TypeAnnotated (Ident x) ":" (App (Ident My_Type) (Wildcard -1))))]), ("x : List Int -> Int", block![ - (TypeSignature x ":" (OprApp (App (Ident List) (Ident Int)) (Ok "->") (Ident Int)))]), + (TypeSignature (Ident x) ":" + (OprApp (App (Ident List) (Ident Int)) (Ok "->") (Ident Int)))]), ]; cases.into_iter().for_each(|(code, expected)| test(code, expected)); } @@ -1012,7 +1042,7 @@ fn trailing_whitespace() { let cases = [ ("a ", block![(Ident a) ()]), ("a \n", block![(Ident a) ()]), - ("a = \n x", block![(Function a #() "=" (BodyBlock #((Ident x))))]), + ("a = \n x", block![(Function (Ident a) #() "=" (BodyBlock #((Ident x))))]), ]; cases.into_iter().for_each(|(code, expected)| test(code, expected)); } @@ -1059,10 +1089,6 @@ fn multiline_annotations() { // === Test Support === // ==================== -use enso_metamodel_lexpr::ToSExpr; -use enso_reflect::Reflect; -use std::collections::HashSet; - /// Given a block of input Enso code, test that: /// - The given code parses to the AST represented by the given S-expression. /// - The AST pretty-prints back to the original code. @@ -1075,7 +1101,7 @@ use std::collections::HashSet; /// - Most token types are represented as their contents, rather than as a token struct. For /// example, a `token::Number` may be represented like: `sexp![10]`, and a `token::Ident` may look /// like `sexp![foo]`. -fn test(code: &str, expect: Value) { +fn test(code: &str, expect: lexpr::Value) { let ast = enso_parser::Parser::new().run(code); let ast_s_expr = to_s_expr(&ast, code); assert_eq!(ast_s_expr.to_string(), expect.to_string(), "{:?}", &ast); @@ -1084,171 +1110,3 @@ fn test(code: &str, expect: Value) { let deserialized = enso_parser::serialization::deserialize_tree(&serialized); deserialized.unwrap(); } - - - -// ===================== -// === S-expressions === -// ===================== - -/// Produce an S-expression representation of the input AST type. -pub fn to_s_expr(value: &T, code: &str) -> Value -where T: serde::Serialize + Reflect { - use enso_parser::syntax::token; - use enso_parser::syntax::tree; - let (graph, rust_to_meta) = enso_metamodel::rust::to_meta(value.reflect_type()); - let ast_ty = rust_to_meta[&value.reflect_type().id]; - let base = code.as_bytes().as_ptr() as usize; - let code: Box = Box::from(code); - let mut to_s_expr = ToSExpr::new(&graph); - to_s_expr.mapper(ast_ty, strip_hidden_fields); - let ident_token = rust_to_meta[&token::variant::Ident::reflect().id]; - let operator_token = rust_to_meta[&token::variant::Operator::reflect().id]; - let open_symbol_token = rust_to_meta[&token::variant::OpenSymbol::reflect().id]; - let close_symbol_token = rust_to_meta[&token::variant::CloseSymbol::reflect().id]; - let number_token = rust_to_meta[&token::variant::Digits::reflect().id]; - let number_base_token = rust_to_meta[&token::variant::NumberBase::reflect().id]; - let newline_token = rust_to_meta[&token::variant::Newline::reflect().id]; - let text_start_token = rust_to_meta[&token::variant::TextStart::reflect().id]; - let text_end_token = rust_to_meta[&token::variant::TextEnd::reflect().id]; - let text_section_token = rust_to_meta[&token::variant::TextSection::reflect().id]; - let text_escape_token = rust_to_meta[&token::variant::TextEscape::reflect().id]; - let wildcard_token = rust_to_meta[&token::variant::Wildcard::reflect().id]; - let autoscope_token = rust_to_meta[&token::variant::AutoScope::reflect().id]; - // TODO: Implement `#[reflect(flag = "enso::concrete")]`, which just attaches user data to the - // type info; then filter by flag here instead of hard-coding these simplifications. - let token_to_str = move |token: Value| { - let range = token_code_range(&token, base); - code[range].to_owned().into_boxed_str() - }; - let token_to_str_ = token_to_str.clone(); - to_s_expr.mapper(ident_token, move |token| Value::symbol(token_to_str_(token))); - let token_to_str_ = token_to_str.clone(); - to_s_expr.mapper(operator_token, move |token| Value::string(token_to_str_(token))); - let token_to_str_ = token_to_str.clone(); - to_s_expr.mapper(text_section_token, move |token| Value::string(token_to_str_(token))); - let token_to_str_ = token_to_str.clone(); - to_s_expr.mapper(number_token, move |token| Value::string(token_to_str_(token))); - let token_to_str_ = token_to_str; - to_s_expr.mapper(number_base_token, move |token| Value::string(token_to_str_(token))); - let into_car = |cons| match cons { - Value::Cons(cons) => cons.into_pair().0, - _ => panic!(), - }; - let simplify_case = |list| { - let list = strip_hidden_fields(list); - let (_, list) = match list { - Value::Cons(cons) => cons.into_pair(), - _ => panic!(), - }; - let (expression, list) = match list { - Value::Cons(cons) => cons.into_pair(), - _ => panic!(), - }; - let (_, list) = match list { - Value::Cons(cons) => cons.into_pair(), - _ => panic!(), - }; - Value::cons(expression, list) - }; - let simplify_escape = |mut list| { - let mut last = None; - while let Value::Cons(cons) = list { - let (car, cdr) = cons.into_pair(); - last = Some(car); - list = cdr; - } - last.unwrap() - }; - let strip_invalid = |list| { - let Value::Cons(cons) = list else { unreachable!() }; - let (car, _) = cons.into_pair(); - Value::cons(car, Value::Null) - }; - let line = rust_to_meta[&tree::block::Line::reflect().id]; - let operator_line = rust_to_meta[&tree::block::OperatorLine::reflect().id]; - let case = rust_to_meta[&tree::CaseOf::reflect().id]; - let invalid = rust_to_meta[&tree::Invalid::reflect().id]; - to_s_expr.mapper(line, into_car); - to_s_expr.mapper(operator_line, into_car); - to_s_expr.mapper(case, simplify_case); - to_s_expr.mapper(invalid, strip_invalid); - to_s_expr.mapper(text_escape_token, simplify_escape); - to_s_expr.skip(newline_token); - to_s_expr.skip(wildcard_token); - to_s_expr.skip(autoscope_token); - to_s_expr.skip(text_start_token); - to_s_expr.skip(text_end_token); - to_s_expr.skip(open_symbol_token); - to_s_expr.skip(close_symbol_token); - tuplify(to_s_expr.value(ast_ty, &value)) -} - -/// Strip certain fields that should be excluded from output. -fn strip_hidden_fields(tree: Value) -> Value { - let hidden_tree_fields = [ - ":spanLeftOffsetVisible", - ":spanLeftOffsetCodeReprBegin", - ":spanLeftOffsetCodeReprLen", - ":spanLeftOffsetCodeUtf16", - ":spanCodeLengthUtf8", - ":spanCodeLengthUtf16", - ]; - let hidden_tree_fields: HashSet<_> = hidden_tree_fields.into_iter().collect(); - Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val { - Value::Cons(cons) => match cons.car() { - Value::Symbol(symbol) => !hidden_tree_fields.contains(symbol.as_ref()), - _ => panic!(), - }, - _ => true, - })) -} - -/// Given an S-expression representation of a [`Token`] and the base address for `Code` `Cow`s, -/// return the range of the input code the token references. -fn token_code_range(token: &Value, base: usize) -> std::ops::Range { - let get_u32 = - |field| fields(token).find(|(name, _)| *name == field).unwrap().1.as_u64().unwrap() as u32; - let begin = get_u32(":codeReprBegin"); - let len = get_u32(":codeReprLen"); - let begin = (begin as u64) | (base as u64 & !0xFFFF_FFFF); - let begin = if begin < (base as u64) { begin + 0x1_0000_0000 } else { begin }; - let begin = begin as usize - base; - let len = len as usize; - begin..(begin + len) -} - -/// Iterate the field `(name, value)` pairs of the S-expression of a struct with named fields. -fn fields(value: &'_ Value) -> impl Iterator { - value.list_iter().unwrap().filter_map(|value| match value { - Value::Cons(cons) => match cons.car() { - Value::Symbol(symbol) => Some((&symbol[..], cons.cdr())), - _ => None, - }, - _ => None, - }) -} - -/// Strip field names from struct representations, so that they are printed more concisely, as if -/// they were tuple-structs. -fn tuplify(value: Value) -> Value { - let (car, cdr) = match value { - Value::Cons(cons) => cons.into_pair(), - Value::Vector(mut vector) => { - for value in vector.iter_mut() { - let original = std::mem::replace(value, Value::Nil); - *value = tuplify(original); - } - return Value::Vector(vector); - } - value => return value, - }; - if let Value::Symbol(symbol) = &car { - if let Some(':') = symbol.chars().next() { - return tuplify(cdr); - } - } - let car = tuplify(car); - let cdr = tuplify(cdr); - Value::Cons(lexpr::Cons::new(car, cdr)) -} diff --git a/lib/rust/parser/src/lexer.rs b/lib/rust/parser/src/lexer.rs index dbefaf2a1c7..72b49b02a84 100644 --- a/lib/rust/parser/src/lexer.rs +++ b/lib/rust/parser/src/lexer.rs @@ -672,7 +672,8 @@ fn analyze_operator(token: &str) -> token::OperatorProperties { .with_binary_infix_precedence(1) .as_compile_time_operation() .as_sequence(), - "." => return operator.with_binary_infix_precedence(21).with_decimal_interpretation(), + "." => + return operator.with_binary_infix_precedence(21).with_decimal_interpretation().as_dot(), _ => (), } // "The precedence of all other operators is determined by the operator's Precedence Character:" diff --git a/lib/rust/parser/src/lib.rs b/lib/rust/parser/src/lib.rs index 60e0cbf55b3..89a25eafae8 100644 --- a/lib/rust/parser/src/lib.rs +++ b/lib/rust/parser/src/lib.rs @@ -210,17 +210,12 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> { use syntax::tree::*; let mut left_offset = source::span::Offset::default(); if let Tree { variant: box Variant::TypeAnnotated(annotated), span } = tree { - if let Tree { variant: box Variant::Ident(ident), span: _ } = annotated.expression { - let operator = annotated.operator; - let type_ = annotated.type_; - let variable = ident.token; - let variant = TypeSignature { variable, operator, type_ }; - let variant = Box::new(Variant::TypeSignature(variant)); - return Tree { variant, span }; - } - let err = Error::new("Expected identifier in left-hand operand of type signature."); - let variant = Box::new(Variant::TypeAnnotated(annotated)); - return Tree::invalid(err, Tree { variant, span }); + let operator = annotated.operator; + let type_ = annotated.type_; + let variable = annotated.expression; + let mut tree = Tree::type_signature(variable, operator, type_); + tree.span.left_offset += span.left_offset; + return tree; } let tree_ = &mut tree; let opr_app = match tree_ { @@ -231,7 +226,7 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> { _ => return tree, }; if let OprApp { lhs: Some(lhs), opr: Ok(opr), rhs } = opr_app && opr.properties.is_assignment() { - let (mut leftmost, args) = collect_arguments(lhs.clone()); + let (leftmost, args) = collect_arguments(lhs.clone()); if let Some(rhs) = rhs { if let Variant::Ident(ident) = &*leftmost.variant && ident.token.variant.is_type { // If the LHS is a type, this is a (destructuring) assignment. @@ -248,10 +243,10 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> { return result; } } - if let Variant::Ident(Ident { token }) = &mut *leftmost.variant { + if is_qualified_name(&leftmost) { // If this is not a variable assignment, and the leftmost leaf of the `App` tree is - // an identifier, this is a function definition. - let mut result = Tree::function(mem::take(token), args, mem::take(opr), mem::take(rhs)); + // a qualified name, this is a function definition. + let mut result = Tree::function(leftmost, args, mem::take(opr), mem::take(rhs)); result.span.left_offset += left_offset; return result; } @@ -259,6 +254,17 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> { tree } +fn is_qualified_name(tree: &syntax::Tree) -> bool { + use syntax::tree::*; + match &*tree.variant { + Variant::Ident(_) => true, + Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }) + if matches!(&*rhs.variant, Variant::Ident(_)) && opr.properties.is_dot() => + is_qualified_name(lhs), + _ => false, + } +} + fn expression_to_type(mut input: syntax::Tree<'_>) -> syntax::Tree<'_> { use syntax::tree::*; if let Variant::Wildcard(wildcard) = &mut *input.variant { diff --git a/lib/rust/parser/src/macros/built_in.rs b/lib/rust/parser/src/macros/built_in.rs index a30976ceef6..302d52bd7ee 100644 --- a/lib/rust/parser/src/macros/built_in.rs +++ b/lib/rust/parser/src/macros/built_in.rs @@ -131,12 +131,38 @@ fn export_body(segments: NonEmptyVec) -> syntax::Tree { /// If-then-else macro definition. pub fn if_then_else<'s>() -> Definition<'s> { - crate::macro_definition! {("if", everything(), "then", everything(), "else", everything())} + crate::macro_definition! { + ("if", everything(), "then", everything(), "else", everything()) if_body} } /// If-then macro definition. pub fn if_then<'s>() -> Definition<'s> { - crate::macro_definition! {("if", everything(), "then", everything())} + crate::macro_definition! {("if", everything(), "then", everything()) if_body} +} + +fn if_body(segments: NonEmptyVec) -> syntax::Tree { + use syntax::tree::*; + let segments = segments.mapped(|s| { + let header = s.header; + let body = s.result.tokens(); + let body = match operator::resolve_operator_precedence_if_non_empty(body) { + Some(Tree { + variant: + box Variant::ArgumentBlockApplication(ArgumentBlockApplication { + lhs: None, + arguments, + }), + span, + }) => { + let mut block = block::body_from_lines(arguments); + block.span.left_offset += span.left_offset; + Some(block) + } + e => e, + }; + MultiSegmentAppSegment { header, body } + }); + Tree::multi_segment_app(segments) } /// Group macro definition. @@ -339,12 +365,12 @@ fn case_body(segments: NonEmptyVec) -> syntax::Tree { match body.variant { box Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs, arguments }) => { if let Some(lhs) = lhs { - case_lines.push(CaseLine { case: Some(lhs.into()), ..default() }); + case_lines.push(CaseLine { case: Some(parse_case(lhs)), ..default() }); } case_lines.extend(arguments.into_iter().map( |block::Line { newline, expression }| CaseLine { newline: newline.into(), - case: expression.map(Case::from), + case: expression.map(parse_case), }, )); if let Some(left_offset) = @@ -353,12 +379,25 @@ fn case_body(segments: NonEmptyVec) -> syntax::Tree { *left_offset += body.span.left_offset; } } - _ => case_lines.push(CaseLine { case: Some(body.into()), ..default() }), + _ => case_lines.push(CaseLine { case: Some(parse_case(body)), ..default() }), } } Tree::case_of(case_, expression, of_, case_lines) } +fn parse_case(tree: syntax::tree::Tree) -> syntax::tree::Case { + use syntax::tree::*; + match tree.variant { + box Variant::OprApp(OprApp { lhs, opr: Ok(opr), rhs }) if opr.properties.is_arrow() => { + let pattern = lhs.map(crate::expression_to_pattern); + let mut case = Case { pattern, arrow: opr.into(), expression: rhs }; + *case.left_offset_mut().unwrap() += tree.span.left_offset; + case + } + _ => Case { expression: tree.into(), ..default() }, + } +} + /// Array literal. pub fn array<'s>() -> Definition<'s> { crate::macro_definition! {("[", everything(), "]", nothing()) array_body} diff --git a/lib/rust/parser/src/main.rs b/lib/rust/parser/src/main.rs index 95d905e1bcc..8ee0b7ba2f7 100644 --- a/lib/rust/parser/src/main.rs +++ b/lib/rust/parser/src/main.rs @@ -3,10 +3,12 @@ #![recursion_limit = "256"] // === Features === #![allow(incomplete_features)] +#![feature(assert_matches)] #![feature(allocator_api)] #![feature(exact_size_is_empty)] #![feature(test)] #![feature(specialization)] +#![feature(let_chains)] #![feature(if_let_guard)] // === Standard Linter Configuration === #![deny(non_ascii_idents)] @@ -60,21 +62,25 @@ fn check_file(path: &str, mut code: &str) { }); for (error, span) in &*errors.borrow() { let whitespace = &span.left_offset.code.repr; - let start = whitespace.as_ptr() as usize + whitespace.len() - code.as_ptr() as usize; - let mut line = 1; - let mut char = 0; - for (i, c) in code.char_indices() { - if i >= start { - break; + if matches!(whitespace, Cow::Borrowed(_)) { + let start = whitespace.as_ptr() as usize + whitespace.len() - code.as_ptr() as usize; + let mut line = 1; + let mut char = 0; + for (i, c) in code.char_indices() { + if i >= start { + break; + } + if c == '\n' { + line += 1; + char = 0; + } else { + char += 1; + } } - if c == '\n' { - line += 1; - char = 0; - } else { - char += 1; - } - } - eprintln!("{path}:{line}:{char}: {}", &error.error.message); + eprintln!("{path}:{line}:{char}: {}", &error.error.message); + } else { + eprintln!("{path}:?:?: {}", &error.error.message); + }; } for (parsed, original) in ast.code().lines().zip(code.lines()) { assert_eq!(parsed, original, "Bug: dropped tokens, while parsing: {}", path); diff --git a/lib/rust/parser/src/syntax/token.rs b/lib/rust/parser/src/syntax/token.rs index 181f64fde06..a9385ba21dd 100644 --- a/lib/rust/parser/src/syntax/token.rs +++ b/lib/rust/parser/src/syntax/token.rs @@ -338,6 +338,7 @@ pub struct OperatorProperties { is_sequence: bool, is_suspension: bool, is_annotation: bool, + is_dot: bool, } impl OperatorProperties { @@ -403,6 +404,11 @@ impl OperatorProperties { Self { is_suspension: true, ..self } } + /// Return a copy of this operator, modified to be flagged as the dot operator. + pub fn as_dot(self) -> Self { + Self { is_dot: true, ..self } + } + /// Return a copy of this operator, modified to allow an interpretion as a decmial point. pub fn with_decimal_interpretation(self) -> Self { Self { can_be_decimal_operator: true, ..self } @@ -458,6 +464,11 @@ impl OperatorProperties { self.is_annotation } + /// Return whether this operator is the dot operator. + pub fn is_dot(&self) -> bool { + self.is_dot + } + /// Return this operator's associativity. pub fn associativity(&self) -> Associativity { match self.is_right_associative { diff --git a/lib/rust/parser/src/syntax/tree.rs b/lib/rust/parser/src/syntax/tree.rs index 1c3fb8a40a7..b193c987bc6 100644 --- a/lib/rust/parser/src/syntax/tree.rs +++ b/lib/rust/parser/src/syntax/tree.rs @@ -4,7 +4,6 @@ use crate::prelude::*; use crate::source::*; use crate::syntax::*; -use crate::expression_to_pattern; use crate::span_builder; use enso_parser_syntax_tree_visitor::Visitor; @@ -218,8 +217,8 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) }, /// A function definition, like `add x y = x + y`. Function { - /// The identifier to which the function should be bound. - pub name: token::Ident<'s>, + /// The (qualified) name to which the function should be bound. + pub name: Tree<'s>, /// The argument patterns. pub args: Vec>, /// The `=` token. @@ -255,13 +254,13 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) }, /// Statement declaring the type of a variable. TypeSignature { - /// The variable whose type is being declared. - pub variable: token::Ident<'s>, + /// (Qualified) name of the item whose type is being declared. + pub variable: Tree<'s>, /// The `:` token. pub operator: token::Operator<'s>, /// The variable's type. #[reflect(rename = "type")] - pub type_: Tree<'s>, + pub type_: Tree<'s>, }, /// An expression with explicit type information attached. TypeAnnotated { @@ -626,20 +625,6 @@ impl<'s> span::Builder<'s> for Case<'s> { } } -impl<'s> From> for Case<'s> { - fn from(tree: Tree<'s>) -> Self { - match tree.variant { - box Variant::OprApp(OprApp { lhs, opr: Ok(opr), rhs }) if opr.properties.is_arrow() => { - let pattern = lhs.map(expression_to_pattern); - let mut case = Case { pattern, arrow: opr.into(), expression: rhs }; - *case.left_offset_mut().unwrap() += tree.span.left_offset; - case - } - _ => Case { expression: tree.into(), ..default() }, - } - } -} - // === OprApp === @@ -976,12 +961,10 @@ pub fn recurse_left_mut_while<'s>( | Variant::UnaryOprApp(_) | Variant::MultiSegmentApp(_) | Variant::TypeDef(_) - | Variant::Function(_) | Variant::Import(_) | Variant::Export(_) | Variant::Group(_) | Variant::CaseOf(_) - | Variant::TypeSignature(_) | Variant::Lambda(_) | Variant::Array(_) | Variant::Annotated(_) @@ -1002,6 +985,8 @@ pub fn recurse_left_mut_while<'s>( | Variant::TemplateFunction(TemplateFunction { ast: lhs, .. }) | Variant::DefaultApp(DefaultApp { func: lhs, .. }) | Variant::Assignment(Assignment { pattern: lhs, .. }) + | Variant::TypeSignature(TypeSignature { variable: lhs, .. }) + | Variant::Function(Function { name: lhs, .. }) | Variant::TypeAnnotated(TypeAnnotated { expression: lhs, .. }) => lhs, } }