diff --git a/CHANGELOG.md b/CHANGELOG.md index e1756056d3..ff95b6fbff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,11 +18,13 @@ - [Renaming launcher executable to ensoup][10535] - [Space-precedence does not apply to value-level operators][10597] - [Must specify `--repl` to enable debug server][10709] +- [Improved parser error reporting and performance][10734] [10468]: https://github.com/enso-org/enso/pull/10468 [10535]: https://github.com/enso-org/enso/pull/10535 [10597]: https://github.com/enso-org/enso/pull/10597 [10709]: https://github.com/enso-org/enso/pull/10709 +[10734]: https://github.com/enso-org/enso/pull/10734 #### Enso IDE diff --git a/Cargo.lock b/Cargo.lock index be38ece552..c188bbcd86 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,6 +27,18 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "afl" +version = "0.15.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c21e10b6947189c5ff61343b5354e9ad1c1722bd47b69cd0a6b49e5fa7f7ecf6" +dependencies = [ + "home", + "libc", + "rustc_version", + "xdg", +] + [[package]] name = "ahash" version = "0.7.8" @@ -1619,6 +1631,7 @@ dependencies = [ name = "enso-parser-debug" version = "0.1.0" dependencies = [ + "clap 4.5.4", "enso-metamodel", "enso-metamodel-lexpr", "enso-parser", @@ -1628,6 +1641,14 @@ dependencies = [ "serde_json", ] +[[package]] +name = "enso-parser-fuzz" +version = "0.1.0" +dependencies = [ + "afl", + "enso-parser", +] + [[package]] name = "enso-parser-generate-java" version = "0.1.0" @@ -5480,6 +5501,12 @@ dependencies = [ "rustix", ] +[[package]] +name = "xdg" +version = "2.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213b7324336b53d2414b2db8537e56544d981803139155afa84f76eeebb7a546" + [[package]] name = "xmlparser" version = "0.13.6" diff --git a/Cargo.toml b/Cargo.toml index 97d9e2e3eb..af211e4b9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ members = [ "lib/rust/parser/generate-java", "lib/rust/parser/schema", "lib/rust/parser/debug", + "lib/rust/parser/debug/fuzz", "tools/language-server/logstat", "tools/language-server/wstest", ] @@ -47,6 +48,11 @@ incremental = true debug = false debug-assertions = false +[profile.fuzz] +inherits = "release" +debug-assertions = true +overflow-checks = true + [profile.bench] opt-level = 3 lto = true diff --git a/app/gui2/rust-ffi/src/lib.rs b/app/gui2/rust-ffi/src/lib.rs index 55ece57b99..6e90794342 100644 --- a/app/gui2/rust-ffi/src/lib.rs +++ b/app/gui2/rust-ffi/src/lib.rs @@ -40,14 +40,14 @@ pub fn is_ident_or_operator(code: &str) -> u32 { #[wasm_bindgen] pub fn is_numeric_literal(code: &str) -> bool { let parsed = PARSER.with(|parser| parser.run(code)); - let enso_parser::syntax::tree::Variant::BodyBlock(body) = *parsed.variant else { return false }; + let enso_parser::syntax::tree::Variant::BodyBlock(body) = parsed.variant else { return false }; let [stmt] = &body.statements[..] else { return false }; - stmt.expression.as_ref().map_or(false, |expr| match &*expr.variant { + stmt.expression.as_ref().map_or(false, |expr| match &expr.variant { enso_parser::syntax::tree::Variant::Number(_) => true, enso_parser::syntax::tree::Variant::UnaryOprApp(app) => app.opr.code == "-" && app.rhs.as_ref().map_or(false, |rhs| { - matches!(*rhs.variant, enso_parser::syntax::tree::Variant::Number(_)) + matches!(rhs.variant, enso_parser::syntax::tree::Variant::Number(_)) }), _ => false, }) diff --git a/app/gui2/src/components/ComponentBrowser/__tests__/input.test.ts b/app/gui2/src/components/ComponentBrowser/__tests__/input.test.ts index 62b7c6e5fe..3c7445be00 100644 --- a/app/gui2/src/components/ComponentBrowser/__tests__/input.test.ts +++ b/app/gui2/src/components/ComponentBrowser/__tests__/input.test.ts @@ -209,11 +209,6 @@ const baseCases: ApplySuggestionCase[] = [ suggestion: makeStaticMethod('Standard.Base.Data.Vector.new'), expected: 'Data.Vector.new ', }, - { - code: 'Dat . V .', - suggestion: makeStaticMethod('Standard.Base.Data.Vector.new'), - expected: 'Data . Vector . new ', - }, { code: '(type_method some_arg).Vector.', suggestion: makeStaticMethod('Standard.Base.Data.Vector.new'), @@ -240,6 +235,15 @@ const baseCases: ApplySuggestionCase[] = [ expected: 'a -> a.get ', }, ] +const simpleCases: ApplySuggestionCase[] = [ + ...baseCases, + // This case would cause a syntax error if a spaced-operator suffix were added. + { + code: 'Dat . V .', + suggestion: makeStaticMethod('Standard.Base.Data.Vector.new'), + expected: 'Data . Vector . new ', + }, +] function makeComplexCase(prefix: string, suffix: string): ApplySuggestionCase[] { return Array.from(baseCases, (aCase) => { diff --git a/app/gui2/src/util/__tests__/qualifiedName.test.ts b/app/gui2/src/util/__tests__/qualifiedName.test.ts index 0cb4b2feb8..abf17034ed 100644 --- a/app/gui2/src/util/__tests__/qualifiedName.test.ts +++ b/app/gui2/src/util/__tests__/qualifiedName.test.ts @@ -33,11 +33,10 @@ const validIdentifiers = [ '+', '<=>', '*', - '.', '!=', ] const invalidIdentifiers = ['', '1', '1Abc', '1_', 'abA!', '$a', 'a$'] -// These are not valid identifiers but currently pass the qualified name regex: ['_', '.*'] +// These are not valid identifiers but currently pass the qualified name regex: ['_', '.*', '.'] test.each(validIdentifiers)("'%s' is a valid identifier", (name) => expect(unwrap(tryIdentifierOrOperatorIdentifier(name))).toStrictEqual( diff --git a/app/gui2/src/util/ast/__tests__/__snapshots__/raw.test.ts.snap b/app/gui2/src/util/ast/__tests__/__snapshots__/raw.test.ts.snap index e38ae52569..c99f75b007 100644 --- a/app/gui2/src/util/ast/__tests__/__snapshots__/raw.test.ts.snap +++ b/app/gui2/src/util/ast/__tests__/__snapshots__/raw.test.ts.snap @@ -1373,7 +1373,7 @@ exports[`Parsing 'foo bar=baz' 1`] = ` "leftOffsetCodeStartUtf8": 7, "lengthInCodeBuffer": 1, "startInCodeBuffer": 7, - "type": "Operator", + "type": "AssignmentOperator", "whitespaceLengthInCodeBuffer": 0, "whitespaceStartInCodeBuffer": 7, }, diff --git a/app/gui2/src/util/ast/__tests__/raw.test.ts b/app/gui2/src/util/ast/__tests__/raw.test.ts index a10f2cff26..0cd7bde258 100644 --- a/app/gui2/src/util/ast/__tests__/raw.test.ts +++ b/app/gui2/src/util/ast/__tests__/raw.test.ts @@ -122,14 +122,9 @@ test.each([ { type: Tree.Type.Ident, repr: 'foo' }, ], ], - ['(', [{ type: Tree.Type.Invalid, repr: '(' }]], - [ - '(foo', - [ - { type: Tree.Type.Invalid, repr: '(' }, - { type: Tree.Type.Ident, repr: 'foo' }, - ], - ], + // These are Invalid nodes, so the child is a subtree containing the whole expression. + ['(', [{ type: Tree.Type.Group, repr: '(' }]], + ['(foo', [{ type: Tree.Type.Group, repr: '(foo' }]], ])("Reading children of '%s'", (code, expected) => { const ast = parseEnsoLine(code) const children = Array.from(childrenAstNodes(ast)) diff --git a/app/gui2/src/util/ast/raw.ts b/app/gui2/src/util/ast/raw.ts index e611172191..59b7ad3213 100644 --- a/app/gui2/src/util/ast/raw.ts +++ b/app/gui2/src/util/ast/raw.ts @@ -17,7 +17,6 @@ export type HasAstRange = SourceRange | RawAst.Tree | RawAst.Token */ export function parseEnsoLine(code: string): RawAst.Tree { const block = parseEnso(code) - assert(block.type === RawAst.Tree.Type.BodyBlock) const soleExpression = tryGetSoleValue(block.statements)?.expression assertDefined(soleExpression) return soleExpression diff --git a/build.sbt b/build.sbt index 8874af612d..b2969ab6e3 100644 --- a/build.sbt +++ b/build.sbt @@ -639,10 +639,7 @@ lazy val rustParserTargetDirectory = SettingKey[File]("target directory for the Rust parser") (`syntax-rust-definition` / rustParserTargetDirectory) := { - // setting "debug" for release, because it isn't yet safely integrated into - // the parser definition - val versionName = if (BuildInfo.isReleaseMode) "debug" else "debug" - target.value / "rust" / versionName + target.value / "rust" / "parser-jni" } val generateRustParserLib = @@ -668,10 +665,13 @@ val generateRustParserLib = target.foreach { t => Cargo.rustUp(t, log) } - val baseArguments = Seq( + val profile = if (BuildInfo.isReleaseMode) "release" else "fuzz" + val arguments = Seq( "build", "-p", "enso-parser-jni", + "--profile", + profile, "-Z", "unstable-options" ) ++ target.map(t => Seq("--target", t)).getOrElse(Seq()) ++ @@ -679,20 +679,18 @@ val generateRustParserLib = "--out-dir", (`syntax-rust-definition` / rustParserTargetDirectory).value.toString ) - val adjustedArguments = baseArguments ++ - (if (BuildInfo.isReleaseMode) - Seq("--release") - else Seq()) val envVars = target .map(_ => Seq(("RUSTFLAGS", "-C target-feature=-crt-static"))) .getOrElse(Seq()) - Cargo.run(adjustedArguments, log, envVars) + Cargo.run(arguments, log, envVars) } FileTreeView.default.list(Seq(libGlob)).map(_._1.toFile) } `syntax-rust-definition` / generateRustParserLib / fileInputs += - (`syntax-rust-definition` / baseDirectory).value.toGlob / "jni" / "src" / "*.rs" + (`syntax-rust-definition` / baseDirectory).value.toGlob / "jni" / "src" / ** / "*.rs" +`syntax-rust-definition` / generateRustParserLib / fileInputs += + (`syntax-rust-definition` / baseDirectory).value.toGlob / "src" / ** / "*.rs" val generateParserJavaSources = TaskKey[Seq[File]]( "generateParserJavaSources", diff --git a/build/build/src/engine.rs b/build/build/src/engine.rs index 3c1ee2e949..1fe5564ebc 100644 --- a/build/build/src/engine.rs +++ b/build/build/src/engine.rs @@ -247,7 +247,7 @@ impl Default for BuildConfigurationFlags { build_project_manager_package: false, build_launcher_bundle: false, build_project_manager_bundle: false, - generate_java_from_rust: true, + generate_java_from_rust: false, test_java_generated_from_rust: false, verify_packages: false, } diff --git a/build/build/src/enso.rs b/build/build/src/enso.rs index 8929434ceb..be25805be2 100644 --- a/build/build/src/enso.rs +++ b/build/build/src/enso.rs @@ -1,6 +1,6 @@ -use crate::engine::StandardLibraryTestsSelection; use crate::prelude::*; +use crate::engine::StandardLibraryTestsSelection; use crate::paths::Paths; use crate::paths::ENSO_ENABLE_ASSERTIONS; use crate::paths::ENSO_META_TEST_ARGS; diff --git a/build/build/src/project/backend.rs b/build/build/src/project/backend.rs index 28b8f0e8ae..a67436d0ec 100644 --- a/build/build/src/project/backend.rs +++ b/build/build/src/project/backend.rs @@ -155,11 +155,8 @@ impl IsTarget for Backend { target_os == TARGET_OS, "Enso Project Manager cannot be built on '{target_os}' for target '{TARGET_OS}'.", ); - let config = BuildConfigurationFlags { - build_project_manager_bundle: true, - generate_java_from_rust: true, - ..default() - }; + let config = + BuildConfigurationFlags { build_project_manager_bundle: true, ..default() }; let context = inner.prepare_context(context, config)?; let artifacts = context.build().await?; let project_manager = diff --git a/build/build/src/project/runtime.rs b/build/build/src/project/runtime.rs index bc76ada698..45a6952a9d 100644 --- a/build/build/src/project/runtime.rs +++ b/build/build/src/project/runtime.rs @@ -43,11 +43,7 @@ impl IsTarget for Runtime { context: Context, job: WithDestination, ) -> BoxFuture<'static, Result> { - let config = BuildConfigurationFlags { - build_engine_package: true, - generate_java_from_rust: true, - ..default() - }; + let config = BuildConfigurationFlags { build_engine_package: true, ..default() }; let this = *self; let WithDestination { inner, destination } = job; let triple = TargetTriple::new(inner.versions); diff --git a/build/build/src/rust/enso_linter.rs b/build/build/src/rust/enso_linter.rs index 3dbf38b11d..2f8644a7bf 100644 --- a/build/build/src/rust/enso_linter.rs +++ b/build/build/src/rust/enso_linter.rs @@ -1,9 +1,11 @@ use super::*; +use crate::paths::generated::RepoRoot; + use ide_ci::programs::cargo; use ide_ci::programs::Cargo; -use crate::paths::generated::RepoRoot; + const LINTER_CRATE_NAME: &str = "enso-parser-debug"; const LINTER_BIN_NAME: &str = "check_syntax"; diff --git a/build/build/src/rust/parser.rs b/build/build/src/rust/parser.rs index d5fc536680..76b3041bae 100644 --- a/build/build/src/rust/parser.rs +++ b/build/build/src/rust/parser.rs @@ -12,7 +12,6 @@ use ide_ci::programs::Javac; const GENERATOR_CRATE_NAME: &str = "enso-parser-generate-java"; -const PARSER_JNI_CRATE_NAME: &str = "enso-parser-jni"; const GENERATOR_BIN_NAME: &str = GENERATOR_CRATE_NAME; const TEST_GENERATOR_BIN_NAME: &str = "java-tests"; const GENERATED_CODE_NAMESPACE: [&str; 3] = ["org", "enso", "syntax2"]; @@ -47,17 +46,8 @@ pub async fn generate_java(repo_root: &RepoRoot) -> Result { generate_java_to(repo_root, &output_path).await } -fn cargo_build_parser_jni(repo_root: &Path) -> Result { - let mut ret = Cargo.cmd()?; - ret.current_dir(repo_root) - .apply(&cargo::Command::Build) - .apply(&cargo::Options::Package(PARSER_JNI_CRATE_NAME.into())); - Ok(ret) -} - #[context("Running self-tests for the generated Java sources failed.")] pub async fn run_self_tests(repo_root: &RepoRoot) -> Result { - cargo_build_parser_jni(repo_root)?.run_ok().await?; let base = &repo_root.target.generated_java; let lib = &repo_root.lib.rust.parser.generate_java.java; let package = repo_root.target.generated_java.join_iter(GENERATED_CODE_NAMESPACE); diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso index be15943666..2d76018ab9 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso @@ -1119,9 +1119,9 @@ type Integer of bits in the operands. > Example - Computing the bitwise conjunction of 2_01101101 and 2_11110000. + Computing the bitwise conjunction of 0b01101101 and 0b11110000. - 2_01101101.bit_and 2_11110000 + 0b01101101.bit_and 0b11110000 bit_and self that:Integer -> Integer = integer_bit_and self that ## GROUP Bitwise @@ -1131,9 +1131,9 @@ type Integer The bitwise compliment negates the value of each bit in the operand. > Example - Bitwise negation of 2_0110. + Bitwise negation of 0b0110. - 2_0110.bit_not + 0b0110.bit_not bit_not self -> Integer = integer_bit_not self ## GROUP Bitwise @@ -1148,9 +1148,9 @@ type Integer bits in the operands. > Example - Computing the bitwise disjunction of 2_01101101 and 2_11110000. + Computing the bitwise disjunction of 0b01101101 and 0b11110000. - 2_01101101.bit_or 2_11110000 + 0b01101101.bit_or 0b11110000 bit_or self that:Integer -> Integer = integer_bit_or self that ## GROUP Bitwise @@ -1164,9 +1164,9 @@ type Integer corresponding bits in the operands. > Example - Computing the bitwise exclusive or of 2_01101101 and 2_11110000. + Computing the bitwise exclusive or of 0b01101101 and 0b11110000. - 2_01101101.bit_xor 2_11110000 + 0b01101101.bit_xor 0b11110000 bit_xor self that:Integer -> Integer = integer_bit_xor self that ## GROUP Bitwise diff --git a/engine/runtime-integration-tests/src/test/java/org/enso/compiler/ErrorCompilerTest.java b/engine/runtime-integration-tests/src/test/java/org/enso/compiler/ErrorCompilerTest.java index 2fd86a4d03..d81ecf2a57 100644 --- a/engine/runtime-integration-tests/src/test/java/org/enso/compiler/ErrorCompilerTest.java +++ b/engine/runtime-integration-tests/src/test/java/org/enso/compiler/ErrorCompilerTest.java @@ -44,17 +44,6 @@ public class ErrorCompilerTest extends CompilerTest { ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 14, 16); } - @Test - public void unaryMinus() throws Exception { - var ir = parse(""" - from Standard.Base import all - - main = Date.new day=- - """); - - assertSingleSyntaxError(ir, Syntax.UnrecognizedToken$.MODULE$, "Unrecognized token", 51, 52); - } - @Test public void dotUnderscore2() throws Exception { var ir = parse(""" @@ -404,21 +393,21 @@ public class ErrorCompilerTest extends CompilerTest { public void malformedExport9() throws Exception { var ir = parse("from export all"); assertSingleSyntaxError( - ir, invalidExport("`all` not allowed in `export` statement"), null, 0, 15); + ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 15); } @Test public void malformedExport10() throws Exception { var ir = parse("from Foo export all hiding"); assertSingleSyntaxError( - ir, invalidExport("`hiding` not allowed in `export` statement"), null, 0, 26); + ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 26); } @Test public void malformedExport11() throws Exception { var ir = parse("from Foo export all hiding X.Y"); assertSingleSyntaxError( - ir, invalidExport("`hiding` not allowed in `export` statement"), null, 0, 30); + ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 30); } @Test @@ -596,8 +585,8 @@ public class ErrorCompilerTest extends CompilerTest { var ir = parse(""" from project.Module export all """); - var expectedReason = new Syntax.InvalidExport("`all` not allowed in `export` statement"); - assertSingleSyntaxError(ir, expectedReason, null, 0, 30); + assertSingleSyntaxError( + ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 30); } @Test @@ -605,8 +594,8 @@ public class ErrorCompilerTest extends CompilerTest { var ir = parse(""" from project.Module export all hiding Foo """); - var expectedReason = new Syntax.InvalidExport("`hiding` not allowed in `export` statement"); - assertSingleSyntaxError(ir, expectedReason, null, 0, 41); + assertSingleSyntaxError( + ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 41); } private void assertSingleSyntaxError( diff --git a/engine/runtime-integration-tests/src/test/scala/org/enso/interpreter/test/semantic/DataflowErrorsTest.scala b/engine/runtime-integration-tests/src/test/scala/org/enso/interpreter/test/semantic/DataflowErrorsTest.scala index f9a6043b4d..705dcfadd7 100644 --- a/engine/runtime-integration-tests/src/test/scala/org/enso/interpreter/test/semantic/DataflowErrorsTest.scala +++ b/engine/runtime-integration-tests/src/test/scala/org/enso/interpreter/test/semantic/DataflowErrorsTest.scala @@ -93,7 +93,7 @@ class DataflowErrorsTest extends InterpreterTest { | |main = | myErr = Error.throw (My_Error.Mk_My_Error 20) - | IO.println(myErr.catch_primitive .recover) + | IO.println (myErr.catch_primitive .recover) |""".stripMargin eval(code) consumeOut shouldEqual List("(Mk_My_Recovered 20)") diff --git a/engine/runtime-parser/src/main/java/org/enso/compiler/core/TreeToIr.java b/engine/runtime-parser/src/main/java/org/enso/compiler/core/TreeToIr.java index 5695743d9c..79d2c6a237 100644 --- a/engine/runtime-parser/src/main/java/org/enso/compiler/core/TreeToIr.java +++ b/engine/runtime-parser/src/main/java/org/enso/compiler/core/TreeToIr.java @@ -97,17 +97,18 @@ final class TreeToIr { case Tree.Import x -> null; case Tree.Invalid x -> null; case Tree.TypeSignature sig -> { - Expression methodReference; - try { - methodReference = translateMethodReference(sig.getVariable(), true); - } catch (SyntaxException ex) { - methodReference = translateExpression(sig.getVariable()); - } + Expression methodReference; + try { + methodReference = translateMethodReference(sig.getVariable(), true); + } catch (SyntaxException ex) { + methodReference = ex.toError(); + } var signature = translateType(sig.getType()); var ascription = new Type.Ascription(methodReference, signature, Option.empty(), getIdentifiedLocation(sig), meta(), diag()); yield ascription; } + case Tree.TypeAnnotated anno -> translateTypeAnnotated(anno); default -> translateExpression(exprTree); }; if (expr != null) { @@ -1361,12 +1362,24 @@ final class TreeToIr { case Tree.Group group -> translateType(group.getBody()); case Tree.UnaryOprApp un -> translateType(un.getRhs()); case Tree.Wildcard wild -> new Name.Blank(getIdentifiedLocation(wild), meta(), diag()); - case Tree.TypeAnnotated anno -> translateTypeAnnotated(anno); + case Tree.TypeAnnotated anno -> translateTypeAnnotatedToOperator(anno); default -> translateSyntaxError(tree, new Syntax.UnsupportedSyntax("translateType")); }; } + /** + * Translate a type-annotated expression. + */ Expression translateTypeAnnotated(Tree.TypeAnnotated anno) { + var type = translateType(anno.getType()); + var expr = translateExpression(anno.getExpression()); + return new Type.Ascription(expr, type, Option.empty(), getIdentifiedLocation(anno), meta(), diag()); + } + + /** + * Translate a type-annotated expression in a context where the IR is a generic binary operator. + */ + Expression translateTypeAnnotatedToOperator(Tree.TypeAnnotated anno) { var type = translateTypeCallArgument(anno.getType()); var expr = translateCallArgument(anno.getExpression()); var opName = new Name.Literal(anno.getOperator().codeRepr(), true, Option.empty(), @@ -1835,12 +1848,6 @@ final class TreeToIr { @SuppressWarnings("unchecked") Export translateExport(Tree.Export exp) { try { - if (exp.getHiding() != null) { - return translateSyntaxError(exp, invalidExportReason("`hiding` not allowed in `export` statement")); - } - if (exp.getAll() != null) { - return translateSyntaxError(exp, invalidExportReason("`all` not allowed in `export` statement")); - } Option rename; if (exp.getAs() == null) { rename = Option.empty(); diff --git a/lib/rust/parser/debug/Cargo.toml b/lib/rust/parser/debug/Cargo.toml index 8974de608c..addb4bf854 100644 --- a/lib/rust/parser/debug/Cargo.toml +++ b/lib/rust/parser/debug/Cargo.toml @@ -17,6 +17,7 @@ enso-reflect = { path = "../../reflect" } lexpr = "0.2.6" serde = { workspace = true } serde_json = { workspace = true } +clap = { workspace = true } [lints] workspace = true diff --git a/lib/rust/parser/debug/fuzz/Cargo.toml b/lib/rust/parser/debug/fuzz/Cargo.toml new file mode 100644 index 0000000000..48f65fbe3f --- /dev/null +++ b/lib/rust/parser/debug/fuzz/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "enso-parser-fuzz" +version = "0.1.0" +authors = ["Enso Team "] +edition = "2021" +description = "Binary for fuzzing Enso parser with AFL" +readme = "README.md" +homepage = "https://github.com/enso-org/enso" +repository = "https://github.com/enso-org/enso" +license-file = "../../../LICENSE" + +[dependencies] +afl = "0.15" +enso-parser = { path = "../../", features = ["debug"] } + +[lints] +workspace = true diff --git a/lib/rust/parser/debug/fuzz/src/main.rs b/lib/rust/parser/debug/fuzz/src/main.rs new file mode 100644 index 0000000000..fdeae74847 --- /dev/null +++ b/lib/rust/parser/debug/fuzz/src/main.rs @@ -0,0 +1,19 @@ +//! Build: +//! `cargo afl build --profile=fuzz -p enso-parser-fuzz` +//! +//! Run: +//! `cargo afl fuzz -i inputs/ -o outputs/ target/rust/fuzz/enso-parser-fuzz` + +use afl::fuzz; + + + +fn main() { + fuzz!(|code: &[u8]| { + if let Ok(code) = std::str::from_utf8(code) { + let parser = enso_parser::Parser::new(); + let ast = parser.run(code); + assert_eq!(ast.code(), code); + } + }); +} diff --git a/lib/rust/parser/debug/src/bin/bench_parse.rs b/lib/rust/parser/debug/src/bin/bench_parse.rs index 5554db8d97..7945681b4e 100644 --- a/lib/rust/parser/debug/src/bin/bench_parse.rs +++ b/lib/rust/parser/debug/src/bin/bench_parse.rs @@ -1,5 +1,6 @@ //! Parses Enso sources, measuring time spent in the parser. +// === Features === #![feature(test)] // === Non-Standard Linter Configuration === #![allow(clippy::option_map_unit_fn)] @@ -79,6 +80,7 @@ fn bench_std_lib(b: &mut test::Bencher) { } }) .unwrap(); + sources.sort_unstable(); let parser = enso_parser::Parser::new(); b.bytes = sources.iter().map(|s| s.len() as u64).sum(); b.iter(|| { diff --git a/lib/rust/parser/debug/src/bin/check_syntax.rs b/lib/rust/parser/debug/src/bin/check_syntax.rs index d6155b1d62..e31eef0db9 100644 --- a/lib/rust/parser/debug/src/bin/check_syntax.rs +++ b/lib/rust/parser/debug/src/bin/check_syntax.rs @@ -3,6 +3,8 @@ //! Source files may be specified as command line arguments; if none are provided, source code will //! be read from standard input. +// === Features === +#![feature(box_patterns)] // === Non-Standard Linter Configuration === #![allow(clippy::option_map_unit_fn)] #![allow(clippy::precedence)] @@ -14,24 +16,41 @@ use enso_parser::prelude::*; +use clap::Parser; + + + +use std::path::Path; +use std::path::PathBuf; + struct WithSourcePath { - path: String, + path: PathBuf, value: T, } +#[derive(Parser)] +struct Cli { + /// Files to check. If none specified, code will be read from standard input. + files: Vec, + + /// Only check if the parser fails to parse the input. + #[arg(short, long)] + smoke_test: bool, +} + fn main() -> Result<(), Box> { - let args = std::env::args().skip(1); + let cli = Cli::parse(); let mut to_read = vec![]; let mut to_parse = vec![]; - if args.len() == 0 { + if cli.files.is_empty() { use std::io::Read; let mut data = String::new(); std::io::stdin().read_to_string(&mut data).unwrap(); to_parse.push(WithSourcePath { path: "".into(), value: data }); } else { - to_read.extend(args); + to_read.extend(cli.files); }; let cores = std::thread::available_parallelism() .unwrap_or(std::num::NonZeroUsize::new(1).unwrap()) @@ -79,7 +98,7 @@ fn main() -> Result<(), Box> { None => break, } }; - let results = check_file(source, &mut parser); + let results = check_file(source, &mut parser, cli.smoke_test); to_print.lock().unwrap().push(results); } })); @@ -109,21 +128,33 @@ fn main() -> Result<(), Box> { fn check_file( file: WithSourcePath, parser: &mut enso_parser::Parser, + smoke_test: bool, ) -> WithSourcePath> { let mut code = file.value.as_str(); if let Some((_meta, code_)) = enso_parser::metadata::parse(code) { code = code_; } let ast = parser.run(code); + let mut messages = if smoke_test { vec![] } else { collect_messages(&ast, &file.path) }; + if ast.code() != code { + messages.push(format!( + "Internal error: AST does not match source code. File: {}", + file.path.display() + )); + } + WithSourcePath { path: file.path, value: messages } +} + +fn collect_messages(ast: &enso_parser::syntax::Tree, path: impl AsRef) -> Vec { let errors = RefCell::new(vec![]); let warnings = RefCell::new(vec![]); ast.visit_trees(|tree| { - match &*tree.variant { + match &tree.variant { enso_parser::syntax::tree::Variant::Invalid(err) => { let error = format!("{}: {}", err.error.message, tree.code()); errors.borrow_mut().push((error, tree.span.clone())); } - enso_parser::syntax::tree::Variant::OprApp(enso_parser::syntax::tree::OprApp { + enso_parser::syntax::tree::Variant::OprApp(box enso_parser::syntax::tree::OprApp { opr: Err(e), .. }) => { @@ -156,15 +187,19 @@ fn check_file( warnings.borrow_mut().sort_unstable_by_key(|(_, span)| sort_key(span)); let mut messages = vec![]; for (message, span) in &*errors.borrow() { - messages.push(format!("E {}: {}", fmt_location(&file.path, span), &message)); + messages.push(format!("E {}: {}", fmt_location(path.as_ref().display(), span), &message)); } for (warning, span) in &*warnings.borrow() { - messages.push(format!("W {}: {}", fmt_location(&file.path, span), warning.message())); + messages.push(format!( + "W {}: {}", + fmt_location(path.as_ref().display(), span), + warning.message() + )); } - WithSourcePath { path: file.path, value: messages } + messages } -fn fmt_location(path: &str, span: &enso_parser::source::Span) -> String { +fn fmt_location(path: impl Display, span: &enso_parser::source::Span) -> String { let start = span.left_offset.code.position_after().start; let end = start + span.code_length; format!("{path} {}:{}-{}:{}", start.line + 1, start.col16, end.line + 1, end.col16) diff --git a/lib/rust/parser/debug/src/lib.rs b/lib/rust/parser/debug/src/lib.rs index 34ee776529..7203965996 100644 --- a/lib/rust/parser/debug/src/lib.rs +++ b/lib/rust/parser/debug/src/lib.rs @@ -36,8 +36,19 @@ where T: serde::Serialize + Reflect { let code: Box = Box::from(code); let mut to_s_expr = ToSExpr::new(&graph); to_s_expr.mapper(ast_ty, strip_hidden_fields); - let stringish_tokens = - vec![Digits::reflect(), NumberBase::reflect(), Operator::reflect(), TextSection::reflect()]; + let stringish_tokens = vec![ + Digits::reflect(), + NumberBase::reflect(), + TextSection::reflect(), + Operator::reflect(), + TypeAnnotationOperator::reflect(), + ArrowOperator::reflect(), + AutoscopeOperator::reflect(), + UnaryOperator::reflect(), + LambdaOperator::reflect(), + DotOperator::reflect(), + SuspensionOperator::reflect(), + ]; let stringish_tokens = stringish_tokens.into_iter().map(|t| rust_to_meta[&t.id]); let skip_tokens = vec![ SuspendedDefaultArguments::reflect(), @@ -48,16 +59,25 @@ where T: serde::Serialize + Reflect { TextStart::reflect(), Wildcard::reflect(), Private::reflect(), + TypeKeyword::reflect(), + ForeignKeyword::reflect(), + CaseKeyword::reflect(), + OfKeyword::reflect(), + AnnotationOperator::reflect(), + AssignmentOperator::reflect(), ]; skip_tokens.into_iter().for_each(|token| to_s_expr.skip(rust_to_meta[&token.id])); - let ident_token = rust_to_meta[&Ident::reflect().id]; + let identish_tokens = vec![Ident::reflect(), AllKeyword::reflect()]; + let identish_tokens = identish_tokens.into_iter().map(|t| rust_to_meta[&t.id]); let text_escape_token = rust_to_meta[&TextEscape::reflect().id]; let token_to_str = move |token: Value| { let range = token_code_range(&token, base); code[range].to_owned().into_boxed_str() }; - let token_to_str_ = token_to_str.clone(); - to_s_expr.mapper(ident_token, move |token| Value::symbol(token_to_str_(token))); + for token in identish_tokens { + let token_to_str_ = token_to_str.clone(); + to_s_expr.mapper(token, move |token| Value::symbol(token_to_str_(token))); + } for token in stringish_tokens { let token_to_str_ = token_to_str.clone(); to_s_expr.mapper(token, move |token| Value::string(token_to_str_(token))); @@ -66,22 +86,6 @@ where T: serde::Serialize + Reflect { Value::Cons(cons) => cons.into_pair().0, _ => panic!(), }; - let simplify_case = |list| { - let list = strip_hidden_fields(list); - let (_, list) = match list { - Value::Cons(cons) => cons.into_pair(), - _ => panic!(), - }; - let (expression, list) = match list { - Value::Cons(cons) => cons.into_pair(), - _ => panic!(), - }; - let (_, list) = match list { - Value::Cons(cons) => cons.into_pair(), - _ => panic!(), - }; - Value::cons(expression, list) - }; let simplify_escape = |mut list| { let mut last = None; while let Value::Cons(cons) = list { @@ -98,11 +102,9 @@ where T: serde::Serialize + Reflect { }; let line = rust_to_meta[&tree::block::Line::reflect().id]; let operator_line = rust_to_meta[&tree::block::OperatorLine::reflect().id]; - let case = rust_to_meta[&tree::CaseOf::reflect().id]; let invalid = rust_to_meta[&tree::Invalid::reflect().id]; to_s_expr.mapper(line, into_car); to_s_expr.mapper(operator_line, into_car); - to_s_expr.mapper(case, simplify_case); to_s_expr.mapper(invalid, strip_invalid); to_s_expr.mapper(text_escape_token, simplify_escape); tuplify(to_s_expr.value(ast_ty, &value)) @@ -199,26 +201,28 @@ pub fn validate_spans( tree: &enso_parser::syntax::tree::Tree, expected_span: std::ops::Range, locations: &mut LocationCheck, -) { +) -> Result<(), String> { let mut sum_span = None; fn concat( a: &Option>, b: &std::ops::Range, - ) -> std::ops::Range { - match a { + ) -> Result, String> { + Ok(match a { Some(a) => { - assert_eq!(a.end, b.start); + if a.end != b.start { + return Err(format!("{:?} != {:?}", &a.end, b.start)); + } a.start..b.end } None => b.clone(), - } + }) } - sum_span = Some(concat(&sum_span, &tree.span.left_offset.code.range())); + sum_span = Some(concat(&sum_span, &tree.span.left_offset.code.range())?); tree.visit_items(|item| match item { enso_parser::syntax::item::Ref::Token(token) => { if !(token.left_offset.is_empty() && token.code.is_empty()) { - sum_span = Some(concat(&sum_span, &token.left_offset.code.range())); - sum_span = Some(concat(&sum_span, &token.code.range())); + sum_span = Some(concat(&sum_span, &token.left_offset.code.range()).unwrap()); + sum_span = Some(concat(&sum_span, &token.code.range()).unwrap()); } let left_offset = token.left_offset.code.range(); let code = token.code.range(); @@ -226,10 +230,10 @@ pub fn validate_spans( } enso_parser::syntax::item::Ref::Tree(tree) => { let children_span = - concat(&Some(tree.span.left_offset.code.range()), &tree.span.range()); + concat(&Some(tree.span.left_offset.code.range()), &tree.span.range()).unwrap(); let children_span_ = children_span.start.utf16..children_span.end.utf16; - validate_spans(tree, children_span_, locations); - sum_span = Some(concat(&sum_span, &children_span)); + validate_spans(tree, children_span_, locations).unwrap(); + sum_span = Some(concat(&sum_span, &children_span).unwrap()); let left_offset = tree.span.left_offset.code.range(); let code = tree.span.range(); locations.extend(&[left_offset.start, left_offset.end, code.start, code.end]); @@ -242,4 +246,5 @@ pub fn validate_spans( let sum_span = sum_span.start.utf16..sum_span.end.utf16; assert_eq!(sum_span, expected_span); } + Ok(()) } diff --git a/lib/rust/parser/debug/src/main.rs b/lib/rust/parser/debug/src/main.rs index 2862437511..dd525981b0 100644 --- a/lib/rust/parser/debug/src/main.rs +++ b/lib/rust/parser/debug/src/main.rs @@ -33,7 +33,9 @@ fn check_file(path: &str, mut code: &str) { let ast = enso_parser::Parser::new().run(code); let expected_span = 0..(code.encode_utf16().count() as u32); let mut locations = enso_parser::source::code::debug::LocationCheck::new(); - enso_parser_debug::validate_spans(&ast, expected_span, &mut locations); + enso_parser_debug::validate_spans(&ast, expected_span, &mut locations) + .map_err(|e| format!("{e} in {path}")) + .unwrap(); for (parsed, original) in ast.code().lines().zip(code.lines()) { assert_eq!(parsed, original, "Bug: dropped tokens, while parsing: {path}"); } diff --git a/lib/rust/parser/debug/tests/parse.rs b/lib/rust/parser/debug/tests/parse.rs index 226696a6c8..4a4e452101 100644 --- a/lib/rust/parser/debug/tests/parse.rs +++ b/lib/rust/parser/debug/tests/parse.rs @@ -38,7 +38,7 @@ macro_rules! block { macro_rules! test { ( $code:expr, $($statements:tt)* ) => { - test($code, block![$( $statements )*]); + test($code, block![$( $statements )*]) } } @@ -60,104 +60,83 @@ fn application() { #[test] fn parentheses() { - test("(a b)", block![(Group (App (Ident a) (Ident b)))]); - test("x)", block![(App (Ident x) (Invalid))]); - test("(x", block![(App (Invalid) (Ident x))]); - test("(a) (b)", block![(App (Group (Ident a)) (Group (Ident b)))]); - #[rustfmt::skip] - test("((a b) c)", block![ + test!("(a b)", (Group (App (Ident a) (Ident b)))); + expect_invalid_node("x)"); + test!("(x", (Invalid)); + test!("(a) (b)", (App (Group (Ident a)) (Group (Ident b)))); + test!("((a b) c)", (Group (App (Group (App (Ident a) (Ident b))) - (Ident c)))]); + (Ident c)))); + test!("(a).b", (OprApp (Group (Ident a)) (Ok ".") (Ident b))); } #[test] fn section_simple() { - let expected_lhs = block![(OprSectionBoundary 1 (OprApp () (Ok "+") (Ident a)))]; - test("+ a", expected_lhs); - let expected_rhs = block![(OprSectionBoundary 1 (OprApp (Ident a) (Ok "+") ()))]; - test("a +", expected_rhs); + test!("+ a", (OprSectionBoundary 1 (OprApp () (Ok "+") (Ident a)))); + test!("a +", (OprSectionBoundary 1 (OprApp (Ident a) (Ok "+") ()))); } #[test] fn inline_if() { - #[rustfmt::skip] - test("if True then True else False", block![ + test!("if True then True else False", (MultiSegmentApp #(((Ident if) (Ident True)) ((Ident then) (Ident True)) - ((Ident else) (Ident False))))]); + ((Ident else) (Ident False))))); } #[test] fn then_block() { - #[rustfmt::skip] - test("if True then\n True", block![ - (MultiSegmentApp #(((Ident if) (Ident True)) ((Ident then) (BodyBlock #((Ident True))))))]); + test!("if True then\n True", + (MultiSegmentApp #(((Ident if) (Ident True)) ((Ident then) (BodyBlock #((Ident True))))))); } #[test] fn else_block() { - #[rustfmt::skip] - test("if True then True else\n False", block![ + test!("if True then True else\n False", (MultiSegmentApp #(((Ident if) (Ident True)) ((Ident then) (Ident True)) - ((Ident else) (BodyBlock #((Ident False))))))]); + ((Ident else) (BodyBlock #((Ident False))))))); } #[test] fn if_then_else_chained_block() { - #[rustfmt::skip] - test("if True then True else False\n . to_text", block![ - (OperatorBlockApplication - (MultiSegmentApp #(((Ident if) (Ident True)) - ((Ident then) (Ident True)) - ((Ident else) (Ident False) - ))) - #(((Ok ".") (Ident to_text))) - #() - )]); -} - -#[test] -fn if_then_else_chained_block_with_group() { - #[rustfmt::skip] - test("(if True then True else False)\n . to_text", block![ - (OperatorBlockApplication - (Group (MultiSegmentApp #(((Ident if) (Ident True)) - ((Ident then) (Ident True)) - ((Ident else) (Ident False) - )))) - #(((Ok ".") (Ident to_text))) - #() - )]); -} - -#[test] -fn if_then_else_chained_block_multi2() { - #[rustfmt::skip] - test("if True then True else False\n . to_text\n . as_value", block![ - (OperatorBlockApplication - (MultiSegmentApp #(((Ident if) (Ident True)) - ((Ident then) (Ident True)) - ((Ident else) (Ident False) - ))) - #(((Ok ".") (Ident to_text)) ((Ok ".") (Ident as_value))) - #() - )]); -} - -#[test] -fn if_then_else_chained_block_multi3() { - #[rustfmt::skip] - test("if True then True else False\n . to_text\n . as_value\n . done 42", block![ - (OperatorBlockApplication - (MultiSegmentApp #(((Ident if) (Ident True)) - ((Ident then) (Ident True)) - ((Ident else) (Ident False) - ))) - #(((Ok ".") (Ident to_text)) ((Ok ".") (Ident as_value)) ((Ok ".") (App (Ident done) (Number () "42" ())))) - #() - )]); + test!("if True then True else False\n . to_text", + (OperatorBlockApplication + (MultiSegmentApp #(((Ident if) (Ident True)) + ((Ident then) (Ident True)) + ((Ident else) (Ident False) + ))) + #(((Ok ".") (Ident to_text))) + #() + )); + test!("(if True then True else False)\n . to_text", + (OperatorBlockApplication + (Group (MultiSegmentApp #(((Ident if) (Ident True)) + ((Ident then) (Ident True)) + ((Ident else) (Ident False) + )))) + #(((Ok ".") (Ident to_text))) + #() + )); + test!("if True then True else False\n . to_text\n . as_value", + (OperatorBlockApplication + (MultiSegmentApp #(((Ident if) (Ident True)) + ((Ident then) (Ident True)) + ((Ident else) (Ident False) + ))) + #(((Ok ".") (Ident to_text)) ((Ok ".") (Ident as_value))) + #() + )); + test!("if True then True else False\n . to_text\n . as_value\n . done 42", + (OperatorBlockApplication + (MultiSegmentApp #(((Ident if) (Ident True)) + ((Ident then) (Ident True)) + ((Ident else) (Ident False) + ))) + #(((Ok ".") (Ident to_text)) ((Ok ".") (Ident as_value)) ((Ok ".") (App (Ident done) (Number () "42" ())))) + #() + )); } // === Comments === @@ -166,12 +145,11 @@ fn if_then_else_chained_block_multi3() { fn plain_comments() { test!("# a b c", ()()); test!("main = # define main\n 4", - (Function (Ident main) #() () "=" (BodyBlock #(() (Number () "4" ()))))); + (Function (Ident main) #() () (BodyBlock #(() (Number () "4" ()))))); } #[test] fn doc_comments() { - #[rustfmt::skip] let lines = [ "## The Identity Function", "", @@ -179,28 +157,20 @@ fn doc_comments() { " - x: value to do nothing to", "id x = x", ]; - #[rustfmt::skip] - test(&lines.join("\n"), block![ + test!(lines.join("\n"), (Documented (#((Section " The Identity Function") (Newline) (Newline) (Section "Arguments:") (Newline) (Section "- x: value to do nothing to")) #(())) - (Function (Ident id) #((() (Ident x) () ())) () "=" (Ident x)))]); - #[rustfmt::skip] - let lines = [ - "type Foo", - " ## Test indent handling", - " ", - " foo", - ]; - #[rustfmt::skip] + (Function (Ident id) #((() (Ident x) () ())) () (Ident x)))); + let lines = ["type Foo", " ## Test indent handling", " ", " foo bar = foo"]; test!(&lines.join("\n"), - (TypeDef type Foo #() #( + (TypeDef Foo #() #( (Documented (#((Section " Test indent handling")) #(() ())) - (Ident foo))))); + (Function (Ident foo) #((() (Ident bar) () ())) () (Ident foo)))))); } @@ -208,14 +178,15 @@ fn doc_comments() { #[test] fn type_definition_no_body() { - test!("type Bool", (TypeDef type Bool #() #())); - test!("type Option a", (TypeDef type Option #((() (Ident a) () ())) #())); - test!("type Option (a)", (TypeDef type Option #((() (Ident a) () ())) #())); - test!("type Foo (a : Int)", (TypeDef type Foo #((() (Ident a) (":" (Ident Int)) ())) #())); - test!("type A a=0", (TypeDef type A #((() (Ident a) () ("=" (Number () "0" ())))) #())); + test!("type Bool", (TypeDef Bool #() #())); + test!("type Option a", (TypeDef Option #((() (Ident a) () ())) #())); + test!("type Option (a)", (TypeDef Option #((() (Ident a) () ())) #())); + test!("type Foo (a : Int)", (TypeDef Foo #((() (Ident a) (":" (Ident Int)) ())) #())); + test!("type A a=0", (TypeDef A #((() (Ident a) () ((Number () "0" ())))) #())); test!("type Existing_Headers (column_names : Vector Text)", - (TypeDef type Existing_Headers #( + (TypeDef Existing_Headers #( (() (Ident column_names) (":" (App (Ident Vector) (Ident Text))) ())) #())); + test!("type 1", (Invalid)); } #[test] @@ -228,32 +199,23 @@ fn type_constructors() { " Rectangle width height", " Point", ]; - #[rustfmt::skip] - let expected = block![ - (TypeDef type Geo #() + test!(code.join("\n"), + (TypeDef Geo #() #((ConstructorDefinition Circle #() #(((() (Ident radius) () ())) ((() (Ident x) () ())))) (ConstructorDefinition Rectangle #((() (Ident width) () ()) (() (Ident height) () ())) #()) - (ConstructorDefinition Point #() #())))]; - test(&code.join("\n"), expected); - let code = "type Foo\n Bar (a : B = C.D)"; - #[rustfmt::skip] - let expected = block![ - (TypeDef type Foo #() #((ConstructorDefinition + (ConstructorDefinition Point #() #())))); + test!("type Foo\n Bar (a : B = C.D)", (TypeDef Foo #() #( + (ConstructorDefinition Bar - #((() (Ident a) (":" (Ident B)) ("=" (OprApp (Ident C) (Ok ".") (Ident D))))) - #())))]; - test(code, expected); - let code = "type Foo\n ## Bar\n Baz"; - let expected = block![(TypeDef type Foo #() #( - (Documented (#((Section " Bar")) #(())) (ConstructorDefinition Baz #() #()))))]; - test(code, expected); + #((() (Ident a) (":" (Ident B)) ((OprApp (Ident C) (Ok ".") (Ident D))))) + #())))); + test!("type Foo\n ## Bar\n Baz", (TypeDef Foo #() #( + (Documented (#((Section " Bar")) #(())) (ConstructorDefinition Baz #() #()))))); let code = ["type A", " Foo (a : Integer, b : Integer)"]; - #[rustfmt::skip] - let expected = block![(TypeDef type A #() #( - (ConstructorDefinition Foo #((() (Invalid) () ())) #())))]; - test(&code.join("\n"), expected); + test!(code.join("\n"), (TypeDef A #() + #((ConstructorDefinition Foo #((() (Ident a) (":" (Invalid)) ())) #())))); } #[test] @@ -263,11 +225,7 @@ fn type_constructor_private() { "type Foo", " private Bar" ]; - #[rustfmt::skip] - let expected = block![ - (TypeDef type Foo #() - #((Private (ConstructorDefinition Bar #() #()))))]; - test(&code.join("\n"), expected); + test!(code.join("\n"), (TypeDef Foo #() #((Private (ConstructorDefinition Bar #() #()))))); #[rustfmt::skip] let code = [ @@ -275,14 +233,9 @@ fn type_constructor_private() { " private Bar", " Foo" ]; - #[rustfmt::skip] - let expected = block![ - (TypeDef type Foo #() - #((Private (ConstructorDefinition Bar #() #())) - (ConstructorDefinition Foo #() #())) - ) - ]; - test(&code.join("\n"), expected); + test!(code.join("\n"), (TypeDef Foo #() + #((Private (ConstructorDefinition Bar #() #())) + (ConstructorDefinition Foo #() #())))); #[rustfmt::skip] let code = [ @@ -293,27 +246,23 @@ fn type_constructor_private() { " Rectangle width height", " Point", ]; - #[rustfmt::skip] - let expected = block![ - (TypeDef type Geo #() + test!(code.join("\n"), + (TypeDef Geo #() #((Private(ConstructorDefinition Circle #() #(((() (Ident radius) () ())) ((() (Ident x) () ()))))) (ConstructorDefinition Rectangle #((() (Ident width) () ()) (() (Ident height) () ())) #()) - (ConstructorDefinition Point #() #())))]; - test(&code.join("\n"), expected); + (ConstructorDefinition Point #() #())))); #[rustfmt::skip] let code = [ "type My_Type", " private Value a b c" ]; - let expected = block![ - (TypeDef type My_Type #() - #((Private (ConstructorDefinition Value #((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ())) #()))) - ) - ]; - test(&code.join("\n"), expected); + test!(code.join("\n"), (TypeDef My_Type #() + #((Private + (ConstructorDefinition Value + #((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ())) #()))))); } #[test] @@ -321,11 +270,11 @@ fn type_methods() { let code = ["type Geo", " number =", " x", " area self = x + x"]; #[rustfmt::skip] let expected = block![ - (TypeDef type Geo #() - #((Function (Ident number) #() () "=" (BodyBlock #((Ident x)))) + (TypeDef Geo #() + #((Function (Ident number) #() () (BodyBlock #((Ident x)))) (Function (Ident area) #((() (Ident self) () ())) () - "=" (OprApp (Ident x) (Ok "+") (Ident x)))))]; - test(&code.join("\n"), expected); + (OprApp (Ident x) (Ok "+") (Ident x)))))]; + test(code.join("\n"), expected); let code = [ "type Problem_Builder", " ## Returns a vector containing all reported problems, aggregated.", @@ -335,14 +284,14 @@ fn type_methods() { ]; #[rustfmt::skip] let expected = block![ - (TypeDef type Problem_Builder #() #( + (TypeDef Problem_Builder #() #( (Documented (#((Section " Returns a vector containing all reported problems, aggregated.")) #(())) (TypeSignature (Ident build_problemset) ":" (Ident Vector))) (Function (Ident build_problemset) #((() (Ident self) () ())) () - "=" (BodyBlock #((Ident self)))))) + (BodyBlock #((Ident self)))))) ]; - test(&code.join("\n"), expected); + test(code.join("\n"), expected); test!("[foo., bar.]", (Array (OprSectionBoundary 1 (OprApp (Ident foo) (Ok ".") ())) #(("," (OprSectionBoundary 1 (OprApp (Ident bar) (Ok ".") ())))))); @@ -358,16 +307,14 @@ fn type_operator_methods() { " Foo.+ : Foo", " Foo.+ self b = b", ]; - #[rustfmt::skip] - let expected = block![ - (TypeDef type Foo #() + test!(code.join("\n"), + (TypeDef Foo #() #((TypeSignature (Ident #"+") ":" (OprApp (Ident Foo) (Ok "->") (OprApp (Ident Foo) (Ok "->") (Ident Foo)))) - (Function (Ident #"+") #((() (Ident self) () ()) (() (Ident b) () ())) () "=" (Ident b)) + (Function (Ident #"+") #((() (Ident self) () ()) (() (Ident b) () ())) () (Ident b)) (TypeSignature (OprApp (Ident Foo) (Ok ".") (Ident #"+")) ":" (Ident Foo)) (Function (OprApp (Ident Foo) (Ok ".") (Ident #"+")) - #((() (Ident self) () ()) (() (Ident b) () ())) () "=" (Ident b))))]; - test(&code.join("\n"), expected); + #((() (Ident self) () ()) (() (Ident b) () ())) () (Ident b))))); test!("Any.==", (OprApp (Ident Any) (Ok ".") (Ident #"=="))); expect_invalid_node("x.-y"); expect_invalid_node("x.-1"); @@ -379,13 +326,6 @@ fn type_operator_methods() { test!("x.~1", (OprApp (Ident x) (Ok ".") (UnaryOprApp "~" (Number () "1" ())))); } -#[test] -fn unspaced_app() { - test!("js_set_zone arr.at(0)", (App (Ident js_set_zone) - (App (OprApp (Ident arr) (Ok ".") (Ident at)) - (Group (Number () "0" ()))))); -} - #[test] fn type_def_full() { let code = [ @@ -402,7 +342,7 @@ fn type_def_full() { ]; #[rustfmt::skip] let expected = block![ - (TypeDef type Geo #() + (TypeDef Geo #() #((ConstructorDefinition Circle #() #( ((() (Ident radius) (":" (Ident float)) ())) ((() (Ident x) () ())))) @@ -410,39 +350,29 @@ fn type_def_full() { Rectangle #((() (Ident width) () ()) (() (Ident height) () ())) #()) (ConstructorDefinition Point #() #()) () - (Function (Ident number) #() () "=" (BodyBlock #((Ident x)))) + (Function (Ident number) #() () (BodyBlock #((Ident x)))) (Function (Ident area) #((() (Ident self) () ())) () - "=" (OprApp (Ident x) (Ok "+") (Ident x)))))]; - test(&code.join("\n"), expected); + (OprApp (Ident x) (Ok "+") (Ident x)))))]; + test(code.join("\n"), expected); } #[test] fn type_def_defaults() { let code = ["type Result error ok=Nothing", " Ok value:ok = Nothing"]; - #[rustfmt::skip] - let expected = block![ - (TypeDef type Result #((() (Ident error) () ()) - (() (Ident ok) () ("=" (Ident Nothing)))) + test!(code.join("\n"), + (TypeDef Result #((() (Ident error) () ()) + (() (Ident ok) () ((Ident Nothing)))) #((ConstructorDefinition Ok - #((() (Ident value) (":" (Ident ok)) ("=" (Ident Nothing)))) #())))]; - test(&code.join("\n"), expected); + #((() (Ident value) (":" (Ident ok)) ((Ident Nothing)))) #())))); } #[test] fn type_def_nested() { - #[rustfmt::skip] - let code = [ - "type Foo", - " type Bar", - " type Baz", - ]; - #[rustfmt::skip] - let expected = block![ - (TypeDef type Foo #() - #((TypeDef type Bar #() #()) - (TypeDef type Baz #() #()))) - ]; - test(&code.join("\n"), expected); + let code = ["type Foo", " type Bar", " type Baz"]; + test!(code.join("\n"), + (TypeDef Foo #() + #((TypeDef Bar #() #()) + (TypeDef Baz #() #())))); } @@ -450,7 +380,10 @@ fn type_def_nested() { #[test] fn assignment_simple() { - test("foo = x", block![(Assignment (Ident foo) "=" (Ident x))]); + test!("foo = x", (Assignment (Ident foo) (Ident x))); + test!("foo=x", (Assignment (Ident foo) (Ident x))); + test!("foo= x", (Assignment (Ident foo) (Ident x))); + expect_invalid_node("foo =x"); } @@ -458,59 +391,71 @@ fn assignment_simple() { #[test] fn function_inline_simple_args() { - test!("foo a = x", (Function (Ident foo) #((() (Ident a) () ())) () "=" (Ident x))); + test!("foo a = x", (Function (Ident foo) #((() (Ident a) () ())) () (Ident x))); test!("foo a b = x", - (Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ())) () "=" (Ident x))); - #[rustfmt::skip] + (Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ())) () (Ident x))); test!( "foo a b c = x", (Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ())) - () "=" (Ident x)) + () (Ident x)) ); - test!("foo _ = x", (Function (Ident foo) #((() (Wildcard -1) () ())) () "=" (Ident x))); + test!("foo _ = x", (Function (Ident foo) #((() (Wildcard -1) () ())) () (Ident x))); + expect_invalid_node("foo a =x"); } #[test] fn function_block_noargs() { - test("foo =", block![(Function (Ident foo) #() () "=" ())]); + test("foo =", block![(Function (Ident foo) #() () ())]); } #[test] -fn function_block_simple_args() { - test("foo a =", block![(Function (Ident foo) #((() (Ident a) () ())) () "=" ())]); - #[rustfmt::skip] - test("foo a b =", block![(Function (Ident foo) #((() (Ident a) () ()) - (() (Ident b) () ())) () "=" ())]); - #[rustfmt::skip] +fn function_no_body() { + test!("foo a =", (Function (Ident foo) #((() (Ident a) () ())) () ())); + test!("foo a b =", (Function (Ident foo) #((() (Ident a) () ()) + (() (Ident b) () ())) () ())); test!("foo a b c =", (Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ())) () - "=" ())); } +#[test] +fn function_block_body() { + test!("foo a =\n a", + (Function (Ident foo) #((() (Ident a) () ())) () (BodyBlock #((Ident a))))); + test!("foo a b =\n a", + (Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ())) () + (BodyBlock #((Ident a))))); + test!("foo a b c =\n a", + (Function (Ident foo) #((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ())) () + (BodyBlock #((Ident a))))); +} + #[test] fn function_qualified() { test!("Id.id x = x", (Function (OprApp (Ident Id) (Ok ".") (Ident id)) #((() (Ident x) () ())) - () "=" (Ident x))); + () (Ident x))); } #[test] fn ignored_arguments() { - test!("f ~_ = x", (Function (Ident f) #(("~" (Wildcard -1) () ())) () "=" (Ident x))); + test!("f _ = x", (Function (Ident f) #((() (Wildcard -1) () ())) () (Ident x))); + test!("f ~_ = x", (Function (Ident f) #(("~" (Wildcard -1) () ())) () (Ident x))); } #[test] fn foreign_functions() { test!("foreign python my_method a b = \"42\"", - (ForeignFunction foreign python my_method + (ForeignFunction python my_method #((() (Ident a) () ()) (() (Ident b) () ())) - "=" (TextLiteral #((Section "42"))))); + test!("foreign python my_method = \"42\"", + (ForeignFunction python my_method #() (TextLiteral #((Section "42"))))); + expect_invalid_node("private foreign python my_method = \"42\""); } #[test] @@ -520,13 +465,12 @@ fn function_inline_return_specification() { (Function (Ident id) #((() (Ident self) () ()) (() (Ident that) (":" (Ident Integer)) ())) ("->" (Ident Integer)) - "=" (Ident that))); + (Ident that))); // Edge case test!("number -> Integer = 23", (Function (Ident number) #() ("->" (Ident Integer)) - "=" (Number () "23" ()))); - // Edge case: Not an inline return specification + (Number () "23" ()))); expect_invalid_node("f x : Integer -> Integer = 23"); } @@ -535,11 +479,35 @@ fn function_inline_return_specification() { #[test] fn named_arguments() { - let cases = [ - ("f x=y", block![(NamedApp (Ident f) x "=" (Ident y))]), - ("f (x = y)", block![(NamedApp (Ident f) x "=" (Ident y))]), - ]; - cases.into_iter().for_each(|(code, expected)| test(code, expected)); + test!("f x=y", (NamedApp (Ident f) x (Ident y))); + test!("f (x = y)", (NamedApp (Ident f) x (Ident y))); + test!("(x a=b)", (Group (NamedApp (Ident x) a (Ident b)))); + test!("(x a=b.c)", (Group (NamedApp (Ident x) a (OprApp (Ident b) (Ok ".") (Ident c))))); + test!("catch handler=exc->\n throw", + (NamedApp (Ident catch) handler + (OprApp (Ident exc) (Ok "->") (BodyBlock #((Ident throw)))))); + test!("sort by=x-> y-> compare x y", + (NamedApp (Ident sort) by + (OprApp (Ident x) (Ok "->") + (OprApp (Ident y) (Ok "->") (App (App (Ident compare) (Ident x)) (Ident y)))))); + test!("sort by=(<) xs", + (App + (NamedApp (Ident sort) by (Group (OprSectionBoundary 2 (OprApp () (Ok "<") ())))) + (Ident xs))); + test!("sort by=(x-> x) y-> compare x y", + (App + (NamedApp (Ident sort) by (Group (OprApp (Ident x) (Ok "->") (Ident x)))) + (OprApp (Ident y) (Ok "->") (App (App (Ident compare) (Ident x)) (Ident y))))); + test!("sort by=(x-> x) 1", + (App + (NamedApp (Ident sort) by (Group (OprApp (Ident x) (Ok "->") (Ident x)))) + (Number () "1" ()))); + test!("foo to=", (App (Ident foo) (Invalid))); + test!("(foo to=)", (Group (App (Ident foo) (Invalid)))); + test!("filter (foo to=(1))", + (App (Ident filter) (Group (NamedApp (Ident foo) to (Group (Number () "1" ())))))); + test!("foo . bar baz=quux", + (NamedApp (OprApp (Ident foo) (Ok ".") (Ident bar)) baz (Ident quux))); } @@ -553,32 +521,68 @@ fn default_app() { #[test] fn argument_named_default() { test!("f default x = x", - (Function (Ident f) #((() (Ident default) () ()) (() (Ident x) () ())) () "=" (Ident x))); + (Function (Ident f) #((() (Ident default) () ()) (() (Ident x) () ())) () (Ident x))); test!("f x default = x", - (Function (Ident f) #((() (Ident x) () ()) (() (Ident default) () ())) () "=" (Ident x))); + (Function (Ident f) #((() (Ident x) () ()) (() (Ident default) () ())) () (Ident x))); } #[test] -fn default_arguments() { - #[rustfmt::skip] - let cases = [ - ("f x=1 = x", block![ - (Function (Ident f) #((() (Ident x) () ("=" (Number () "1" ())))) () "=" (Ident x))]), - ("f (x = 1) = x", block![ - (Function (Ident f) #((() (Ident x) () ("=" (Number () "1" ())))) () "=" (Ident x))]), - // Pattern in LHS: - ("f ~x=1 = x", block![ - (Function (Ident f) - #(("~" (Ident x) () ("=" (Number () "1" ())))) - () - "=" (Ident x))]), - ("f (~x = 1) = x", block![ - (Function (Ident f) - #(("~" (Ident x) () ("=" (Number () "1" ())))) - () - "=" (Ident x))]), - ]; - cases.into_iter().for_each(|(code, expected)| test(code, expected)); +fn complex_arguments() { + test!("f x=1 = x", + (Function (Ident f) #((() (Ident x) () ((Number () "1" ())))) () (Ident x))); + test!("f (x : Number) = x", + (Function (Ident f) #((() (Ident x) (":" (Ident Number)) ())) () (Ident x))); + test!("f (x = 1) = x", + (Function (Ident f) #((() (Ident x) () ((Number () "1" ())))) () (Ident x))); + test!("f ((x = 1) : Number) = x", + (Function (Ident f) #((() (Invalid) (":" (Ident Number)) ())) () (Ident x))); + test!("f (x=1 : Number) = x", + (Function (Ident f) #((() (Invalid) (":" (Ident Number)) ())) () (Ident x))); + test!("f (x : Number = 1) = x", + (Function (Ident f) + #((() (Ident x) (":" (Ident Number)) ((Number () "1" ())))) + () (Ident x))); + test!("f (x y) = x", (Function (Ident f) #((() (Invalid) () ())) () (Ident x))); + test!("f ((x : Number) = 1) = x", + (Function (Ident f) + #((() (Ident x) (":" (Ident Number)) ((Number () "1" ())))) + () (Ident x))); + test!("f ((x : Array Number) = 1) = x", + (Function (Ident f) + #((() (Ident x) (":" (App (Ident Array) (Ident Number))) ((Number () "1" ())))) + () (Ident x))); + test!("f (x):Number=1 = x", + (Function (Ident f) + #((() (Invalid) (":" (Ident Number)) ((Number () "1" ())))) + () (Ident x))); + test!("f ((x:Number=1)) = x", (Function (Ident f) #((() (Invalid) () ())) () (Ident x))); + test!("f (x : Number)=1 = x", + (Function (Ident f) + #((() (Ident x) (":" (Ident Number)) ((Number () "1" ())))) + () (Ident x))); + test!("f (x:Number = 1) = x", + (Function (Ident f) + #((() (Ident x) (":" (Ident Number)) ((Number () "1" ())))) + () (Ident x))); + test!("f (x:Number=1) = x", + (Function (Ident f) + #((() (Ident x) (":" (Ident Number)) ((Number () "1" ())))) + () (Ident x))); + test!("f x:Number=1 = x", + (Function (Ident f) + #((() (Ident x) (":" (Ident Number)) ((Number () "1" ())))) + () (Ident x))); + // Pattern in LHS: + test!("f ~x=1 = x", + (Function (Ident f) + #(("~" (Ident x) () ((Number () "1" ())))) + () + (Ident x))); + test!("f (~x = 1) = x", + (Function (Ident f) + #(("~" (Ident x) () ((Number () "1" ())))) + () + (Ident x))); } @@ -587,15 +591,15 @@ fn default_arguments() { #[test] fn code_block_body() { let code = ["main =", " x"]; - test(&code.join("\n"), block![(Function (Ident main) #() () "=" (BodyBlock #((Ident x))))]); + test!(code.join("\n"), (Function (Ident main) #() () (BodyBlock #((Ident x))))); let code = ["main =", " ", " x"]; - test(&code.join("\n"), block![(Function (Ident main) #() () "=" (BodyBlock #(() (Ident x))))]); + test!(code.join("\n"), (Function (Ident main) #() () (BodyBlock #(() (Ident x))))); let code = ["main =", " ", " x"]; - test(&code.join("\n"), block![(Function (Ident main) #() () "=" (BodyBlock #(() (Ident x))))]); + test!(code.join("\n"), (Function (Ident main) #() () (BodyBlock #(() (Ident x))))); let code = ["main =", " ", " x"]; - test(&code.join("\n"), block![(Function (Ident main) #() () "=" (BodyBlock #(() (Ident x))))]); + test!(code.join("\n"), (Function (Ident main) #() () (BodyBlock #(() (Ident x))))); let code = ["main =", "", " x"]; - test(&code.join("\n"), block![(Function (Ident main) #() () "=" (BodyBlock #(() (Ident x))))]); + test!(code.join("\n"), (Function (Ident main) #() () (BodyBlock #(() (Ident x))))); #[rustfmt::skip] let code = [ @@ -605,36 +609,34 @@ fn code_block_body() { ]; #[rustfmt::skip] let expect = block![ - (Function (Ident main) #() () "=" (BodyBlock #( + (Function (Ident main) #() () (BodyBlock #( (OprSectionBoundary 1 (OprApp () (Ok "+") (Ident x))) (App (Ident print) (Ident x))))) ]; - test(&code.join("\n"), expect); + test(code.join("\n"), expect); } #[test] fn code_block_operator() { let code = ["value = nums", " * each random", " + constant"]; let expect = block![ - (Assignment (Ident value) "=" + (Assignment (Ident value) (OperatorBlockApplication (Ident nums) #(((Ok "*") (App (Ident each) (Ident random))) ((Ok "+") (Ident constant))) #())) ]; - test(&code.join("\n"), expect); + test(code.join("\n"), expect); } #[test] fn dot_operator_blocks() { let code = ["rect1", " . width = 7", " . center", " + x"]; - #[rustfmt::skip] - let expected = block![ + test!(code.join("\n"), (OperatorBlockApplication (Ident rect1) #(((Ok ".") (OprApp (Ident width) (Ok "=") (Number () "7" ()))) ((Ok ".") (OperatorBlockApplication (Ident center) - #(((Ok "+") (Ident x))) #()))) #())]; - test(&code.join("\n"), expected); + #(((Ok "+") (Ident x))) #()))) #())); } #[test] @@ -644,7 +646,7 @@ fn code_block_argument_list() { "foo", " bar", ]; - test!(&code.join("\n"), (ArgumentBlockApplication (Ident foo) #((Ident bar)))); + test!(code.join("\n"), (ArgumentBlockApplication (Ident foo) #((Ident bar)))); #[rustfmt::skip] let code = [ @@ -652,9 +654,9 @@ fn code_block_argument_list() { " bar", ]; let expect = block![ - (Assignment (Ident value) "=" (ArgumentBlockApplication (Ident foo) #((Ident bar)))) + (Assignment (Ident value) (ArgumentBlockApplication (Ident foo) #((Ident bar)))) ]; - test(&code.join("\n"), expect); + test(code.join("\n"), expect); #[rustfmt::skip] let code = [ @@ -664,12 +666,12 @@ fn code_block_argument_list() { ]; #[rustfmt::skip] let expect = block![ - (Assignment (Ident value) "=" + (Assignment (Ident value) (ArgumentBlockApplication (Ident foo) #( (OprSectionBoundary 1 (OprApp () (Ok "+") (Ident x))) (Ident bar)))) ]; - test(&code.join("\n"), expect); + test(code.join("\n"), expect); } #[test] @@ -678,40 +680,37 @@ fn code_block_empty() { // No input would parse as an empty `ArgumentBlock` or `OperatorBlock`, because those types are // distinguished from a body continuation by the presence of non-empty indented lines. let code = ["foo =", "bar"]; - test(&code.join("\n"), block![(Function (Ident foo) #() () "=" ()) (Ident bar)]); + test(code.join("\n"), block![(Function (Ident foo) #() () ()) (Ident bar)]); // This parses similarly to above; a line with no non-whitespace content does not create a code // block. let code = ["foo =", " ", "bar"]; - test(&code.join("\n"), block![(Function (Ident foo) #() () "=" ()) () (Ident bar)]); + test(code.join("\n"), block![(Function (Ident foo) #() () ()) () (Ident bar)]); } #[test] fn code_block_bad_indents1() { - let code = ["main =", " foo", " bar", " baz"]; - let expected = block![ - (Function (Ident main) #() () "=" (BodyBlock #((Ident foo) (Ident bar) (Ident baz)))) - ]; - test(&code.join("\n"), expected); + test!(["main =", " foo", " bar", " baz"].join("\n"), + (Function (Ident main) #() () (BodyBlock #((Ident foo) (Ident bar) (Ident baz))))); } #[test] fn code_block_bad_indents2() { let code = ["main =", " foo", " bar", "baz"]; let expected = block![ - (Function (Ident main) #() () "=" (BodyBlock #((Ident foo) (Ident bar)))) + (Function (Ident main) #() () (BodyBlock #((Ident foo) (Ident bar)))) (Ident baz) ]; - test(&code.join("\n"), expected); + test(code.join("\n"), expected); } #[test] fn code_block_with_following_statement() { let code = ["main =", " foo", "bar"]; let expected = block![ - (Function (Ident main) #() () "=" (BodyBlock #((Ident foo)))) + (Function (Ident main) #() () (BodyBlock #((Ident foo)))) (Ident bar) ]; - test(&code.join("\n"), expected); + test(code.join("\n"), expected); } #[test] @@ -722,7 +721,7 @@ fn operator_block_nested() { (OperatorBlockApplication (Ident foo) #(((Ok "+") (OperatorBlockApplication (Ident bar) #(((Ok "-") (Ident baz))) #()))) #())]; - test(&code.join("\n"), expected); + test(code.join("\n"), expected); } #[test] @@ -733,12 +732,12 @@ fn operator_section_in_operator_block() { (OperatorBlockApplication (Ident foo) #(((Ok "+") (OprSectionBoundary 1 (OprApp (Ident bar) (Ok "+") ())))) #())]; - test(&code.join("\n"), expected); + test(code.join("\n"), expected); } #[test] fn first_line_indented() { - expect_invalid_node(" a"); + test!(" a", (BodyBlock #((Ident a)))); } @@ -757,17 +756,14 @@ fn multiple_operator_error() { #[test] fn precedence() { - #[rustfmt::skip] - let cases = [ - ("x * y + z", block![(OprApp (OprApp (Ident x) (Ok "*") (Ident y)) (Ok "+") (Ident z))]), - ("x + y * z", block![(OprApp (Ident x) (Ok "+") (OprApp (Ident y) (Ok "*") (Ident z)))]), - ("w + x + y * z", block![ - (OprApp (OprApp (Ident w) (Ok "+") (Ident x)) (Ok "+") - (OprApp (Ident y) (Ok "*") (Ident z)))]), - ]; - cases.into_iter().for_each(|(code, expected)| test(code, expected)); + test!("x * y + z", (OprApp (OprApp (Ident x) (Ok "*") (Ident y)) (Ok "+") (Ident z))); + test!("x + y * z", (OprApp (Ident x) (Ok "+") (OprApp (Ident y) (Ok "*") (Ident z)))); + test!("w + x + y * z", + (OprApp (OprApp (Ident w) (Ok "+") (Ident x)) (Ok "+") + (OprApp (Ident y) (Ok "*") (Ident z)))); test!("x - 1 + 2", (OprApp (OprApp (Ident x) (Ok "-") (Number () "1" ())) (Ok "+") (Number () "2" ()))); + test!("x+y * z", (OprApp (Ident x) (Ok "+") (OprApp (Ident y) (Ok "*") (Ident z)))); } #[test] @@ -819,7 +815,7 @@ fn operator_sections() { (Ok "+") (Ident x)))]); #[rustfmt::skip] test("increment = 1 +", block![ - (Assignment (Ident increment) "=" + (Assignment (Ident increment) (OprSectionBoundary 1 (OprApp (Number () "1" ()) (Ok "+") ())))]); test!("1+ << 2*", (OprSectionBoundary 1 @@ -875,9 +871,9 @@ fn template_functions() { fn unevaluated_argument() { let code = ["main ~foo = x"]; let expected = block![ - (Function (Ident main) #(("~" (Ident foo) () ())) () "=" (Ident x)) + (Function (Ident main) #(("~" (Ident foo) () ())) () (Ident x)) ]; - test(&code.join("\n"), expected); + test(code.join("\n"), expected); } #[test] @@ -894,25 +890,24 @@ fn unary_operator_at_end_of_expression() { fn unspaced_operator_sequence() { // Add a negated value. test!("x = y+-z", - (Assignment (Ident x) "=" (OprApp (Ident y) (Ok "+") (UnaryOprApp "-" (Ident z))))); + (Assignment (Ident x) (OprApp (Ident y) (Ok "+") (UnaryOprApp "-" (Ident z))))); // Create an operator section that adds a negated value to its input. test!("x = +-z", - (Assignment (Ident x) "=" (OprSectionBoundary 1 + (Assignment (Ident x) (OprSectionBoundary 1 (OprApp () (Ok "+") (UnaryOprApp "-" (Ident z)))))); - // Create an operator section that adds its input, negated, to a value. - test!("x = y+-", - (Assignment (Ident x) "=" (OprSectionBoundary 1 - (OprApp (Ident y) (Ok "+") (UnaryOprApp "-" ()))))); + // The `-` can only be lexed as a unary operator, and unary operators cannot form sections. + expect_invalid_node("x = y+-"); // Assign a negative number to x. - test!("x=-1", (Assignment (Ident x) "=" (UnaryOprApp "-" (Number () "1" ())))); + test!("x=-1", (Assignment (Ident x) (UnaryOprApp "-" (Number () "1" ())))); // Assign a negated value to x. - test!("x=-y", (Assignment (Ident x) "=" (UnaryOprApp "-" (Ident y)))); + test!("x=-y", (Assignment (Ident x) (UnaryOprApp "-" (Ident y)))); } #[test] fn minus_binary() { test!("x - x", (OprApp (Ident x) (Ok "-") (Ident x))); test!("x-x", (OprApp (Ident x) (Ok "-") (Ident x))); + test!("x-1", (OprApp (Ident x) (Ok "-") (Number () "1" ()))); } #[test] @@ -934,7 +929,7 @@ fn minus_unary() { test!("-x", (UnaryOprApp "-" (Ident x))); test!("(-x)", (Group (UnaryOprApp "-" (Ident x)))); test!("-(x * x)", (UnaryOprApp "-" (Group (OprApp (Ident x) (Ok "*") (Ident x))))); - test!("x=-x", (Assignment (Ident x) "=" (UnaryOprApp "-" (Ident x)))); + test!("x=-x", (Assignment (Ident x) (UnaryOprApp "-" (Ident x)))); test!("-x+x", (OprApp (UnaryOprApp "-" (Ident x)) (Ok "+") (Ident x))); test!("-x*x", (OprApp (UnaryOprApp "-" (Ident x)) (Ok "*") (Ident x))); } @@ -961,22 +956,22 @@ fn method_app_in_minus_unary() { #[test] fn autoscope_operator() { test!("x : ..True", (TypeSignature (Ident x) ":" (AutoscopedIdentifier ".." True))); - test!("x = ..True", (Assignment (Ident x) "=" (AutoscopedIdentifier ".." True))); + test!("x = ..True", (Assignment (Ident x) (AutoscopedIdentifier ".." True))); test!("x = f ..True", - (Assignment (Ident x) "=" (App (Ident f) (AutoscopedIdentifier ".." True)))); + (Assignment (Ident x) (App (Ident f) (AutoscopedIdentifier ".." True)))); expect_invalid_node("x = ..not_a_constructor"); expect_invalid_node("x = case a of ..True -> True"); expect_invalid_node("x = ..4"); expect_invalid_node("x = ..Foo.Bar"); expect_invalid_node("x = f .. True"); - expect_invalid_node("x = f(.. ..)"); - expect_invalid_node("x = f(.. *)"); - expect_invalid_node("x = f(.. True)"); + expect_invalid_node("x = f (.. ..)"); + expect_invalid_node("x = f (.. *)"); + expect_invalid_node("x = f (.. True)"); expect_invalid_node("x = True.."); expect_invalid_node("x = True..True"); - expect_multiple_operator_error("x = .."); - expect_multiple_operator_error("x = .. True"); - expect_multiple_operator_error("x : .. True"); + expect_invalid_node("x = .."); + expect_invalid_node("x = .. True"); + expect_invalid_node("x : .. True"); } @@ -1034,29 +1029,20 @@ fn import() { #[test] fn export() { - #[rustfmt::skip] - let cases = [ - ("export prj.Data.Foo", block![ - (Export () - ((Ident export) - (OprApp (OprApp (Ident prj) (Ok ".") (Ident Data)) (Ok ".") (Ident Foo))) - () () ())]), - ("export Foo as Bar", block![ - (Export () ((Ident export) (Ident Foo)) () ((Ident as) (Ident Bar)) ())]), - ("from Foo export Bar, Baz", block![ - (Export - ((Ident from) (Ident Foo)) - ((Ident export) (OprApp (Ident Bar) (Ok ",") (Ident Baz))) - () () ())]), - ("from Foo export all hiding Bar, Baz", block![ - (Export - ((Ident from) (Ident Foo)) - ((Ident export) ()) - all - () - ((Ident hiding) (OprApp (Ident Bar) (Ok ",") (Ident Baz))))]), - ]; - cases.into_iter().for_each(|(code, expected)| test(code, expected)); + test!("export prj.Data.Foo", + (Export () + ((Ident export) + (OprApp (OprApp (Ident prj) (Ok ".") (Ident Data)) (Ok ".") (Ident Foo))) + ())); + test!("export Foo as Bar", + (Export () ((Ident export) (Ident Foo)) ((Ident as) (Ident Bar)))); + test!("from Foo export Bar, Baz", + (Export + ((Ident from) (Ident Foo)) + ((Ident export) (OprApp (Ident Bar) (Ok ",") (Ident Baz))) + ())); + expect_invalid_node("from Foo export all hiding Bar, Baz"); + test!("from Foo export all", (Invalid)); } @@ -1095,39 +1081,42 @@ fn metadata_parsing() { #[test] fn type_signatures() { - #[rustfmt::skip] - let cases = [ - ("val : Bool", block![(TypeSignature (Ident val) ":" (Ident Bool))]), - ("val : List Int", block![(TypeSignature (Ident val) ":" (App (Ident List) (Ident Int)))]), - ("foo : [Integer | Text] -> (Integer | Text)", block![ - (TypeSignature (Ident foo) ":" - (OprApp (Array (OprApp (Ident Integer) (Ok "|") (Ident Text)) #()) - (Ok "->") - (Group (OprApp (Ident Integer) (Ok "|") (Ident Text)))))]), - ]; - cases.into_iter().for_each(|(code, expected)| test(code, expected)); + test!("val : Bool", (TypeSignature (Ident val) ":" (Ident Bool))); + test!("val : List Int", (TypeSignature (Ident val) ":" (App (Ident List) (Ident Int)))); + test!("foo : [Integer | Text] -> (Integer | Text)", + (TypeSignature (Ident foo) ":" + (OprApp (Array (OprApp (Ident Integer) (Ok "|") (Ident Text)) #()) + (Ok "->") + (Group (OprApp (Ident Integer) (Ok "|") (Ident Text)))))); + test!("f a (b : Int) : Double", + (TypeAnnotated + (App (App (Ident f) (Ident a)) (Group (TypeAnnotated (Ident b) ":" (Ident Int)))) + ":" (Ident Double))); + test!("f a (b = 1 : Int) : Double", + (TypeAnnotated + (NamedApp (App (Ident f) (Ident a)) b + (TypeAnnotated (Number () "1" ()) ":" (Ident Int))) ":" (Ident Double))); } #[test] fn type_annotations() { - #[rustfmt::skip] - let cases = [ - ("val = x : Int", block![ - (Assignment (Ident val) "=" (TypeAnnotated (Ident x) ":" (Ident Int)))]), - ("val = foo (x : Int)", block![ - (Assignment (Ident val) "=" - (App (Ident foo) - (Group (TypeAnnotated (Ident x) ":" (Ident Int)))))]), - ("(x : My_Type _)", block![ - (Group - (TypeAnnotated (Ident x) - ":" - (App (Ident My_Type) (TemplateFunction 1 (Wildcard 0)))))]), - ("x : List Int -> Int", block![ - (TypeSignature (Ident x) ":" - (OprApp (App (Ident List) (Ident Int)) (Ok "->") (Ident Int)))]), - ]; - cases.into_iter().for_each(|(code, expected)| test(code, expected)); + test!("val = x : Int", + (Assignment (Ident val) (TypeAnnotated (Ident x) ":" (Ident Int)))); + test!("val = foo (x : Int)", + (Assignment (Ident val) + (App (Ident foo) + (Group (TypeAnnotated (Ident x) ":" (Ident Int)))))); + test!("(x : My_Type _)", + (Group + (TypeAnnotated (Ident x) + ":" + (App (Ident My_Type) (TemplateFunction 1 (Wildcard 0)))))); + test!("x : List Int -> Int", + (TypeSignature (Ident x) ":" + (OprApp (App (Ident List) (Ident Int)) (Ok "->") (Ident Int)))); + test!("p:Plus + m:Plus", + (OprApp (TypeAnnotated (Ident p) ":" (Ident Plus)) + (Ok "+") (TypeAnnotated (Ident m) ":" (Ident Plus)))); } @@ -1136,10 +1125,10 @@ fn type_annotations() { #[test] fn inline_text_literals() { test!(r#""I'm an inline raw text!""#, (TextLiteral #((Section "I'm an inline raw text!")))); - test!(r#"zero_length = """#, (Assignment (Ident zero_length) "=" (TextLiteral #()))); + test!(r#"zero_length = """#, (Assignment (Ident zero_length) (TextLiteral #()))); test!(r#""type""#, (TextLiteral #((Section "type")))); - test!(r#"unclosed = ""#, (Assignment (Ident unclosed) "=" (TextLiteral #()))); - test!(r#"unclosed = "a"#, (Assignment (Ident unclosed) "=" (TextLiteral #((Section "a"))))); + test!(r#"unclosed = ""#, (Assignment (Ident unclosed) (TextLiteral #()))); + test!(r#"unclosed = "a"#, (Assignment (Ident unclosed) (TextLiteral #((Section "a"))))); test!(r#"'Other quote type'"#, (TextLiteral #((Section "Other quote type")))); test!(r#""Non-escape: \n""#, (TextLiteral #((Section "Non-escape: \\n")))); test!(r#""Non-escape: \""#, (TextLiteral #((Section "Non-escape: \\")))); @@ -1190,7 +1179,7 @@ x"#; let code = "x = \"\"\"\n Indented multiline\nx"; #[rustfmt::skip] let expected = block![ - (Assignment (Ident x) "=" (TextLiteral #((Section "Indented multiline")))) + (Assignment (Ident x) (TextLiteral #((Section "Indented multiline")))) (Ident x) ]; test(code, expected); @@ -1199,13 +1188,13 @@ x"#; let code = "x =\n x = '''\n x\nx"; #[rustfmt::skip] let expected = block![ - (Function (Ident x) #() () "=" - (BodyBlock #((Assignment (Ident x) "=" (TextLiteral #((Section "x"))))))) + (Function (Ident x) #() () + (BodyBlock #((Assignment (Ident x) (TextLiteral #((Section "x"))))))) (Ident x) ]; test(code, expected); test!("foo = bar '''\n baz", - (Assignment (Ident foo) "=" (App (Ident bar) (TextLiteral #((Section "baz")))))); + (Assignment (Ident foo) (App (Ident bar) (TextLiteral #((Section "baz")))))); test!("'''\n \\t'", (TextLiteral #((Escape 0x09) (Section "'")))); test!("'''\n x\n \\t'", (TextLiteral #((Section "x") (Newline) (Escape 0x09) (Section "'")))); @@ -1253,25 +1242,26 @@ fn interpolated_literals_in_multiline_text() { #[test] fn new_lambdas() { - let cases = [ - (r#"\v -> v"#, block![(Lambda "\\" (OprApp (Ident v) (Ok "->") (Ident v)))]), - (r#"\a b -> x"#, block![ - (Lambda "\\" (OprApp (App (Ident a) (Ident b)) (Ok "->") (Ident x)))]), - ]; - cases.into_iter().for_each(|(code, expected)| test(code, expected)); + test!(r#"\v -> v"#, (Lambda "\\" (OprApp (Ident v) (Ok "->") (Ident v)))); + test!(r#"\a b -> x"#, (Lambda "\\" (OprApp (App (Ident a) (Ident b)) (Ok "->") (Ident x)))); } #[test] fn old_lambdas() { - test("x -> y", block![(OprApp (Ident x) (Ok "->") (Ident y))]); - test("x->y", block![(OprApp (Ident x) (Ok "->") (Ident y))]); - test("x-> y", block![(OprApp (Ident x) (Ok "->") (Ident y))]); - test("x-> x + y", block![(OprApp (Ident x) (Ok "->") (OprApp (Ident x) (Ok "+") (Ident y)))]); - test("x->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]); - test("x ->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]); - test("f x->\n y", block![ - (App (Ident f) (OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y)))))]); - test("x->y-> z", block![(OprApp (Ident x) (Ok "->") (OprApp (Ident y) (Ok "->") (Ident z)))]); + test!("x -> y", (OprApp (Ident x) (Ok "->") (Ident y))); + test!("x->y", (OprApp (Ident x) (Ok "->") (Ident y))); + test!("x-> y", (OprApp (Ident x) (Ok "->") (Ident y))); + test!("x-> x + y", (OprApp (Ident x) (Ok "->") (OprApp (Ident x) (Ok "+") (Ident y)))); + test!("x->\n y", (OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))); + test!("x ->\n y", (OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))); + test!("f x->\n y", + (App (Ident f) (OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y)))))); + test!("x->y-> z", (OprApp (Ident x) (Ok "->") (OprApp (Ident y) (Ok "->") (Ident z)))); + test!("foo = x -> (y = bar x) -> x + y", + (Assignment (Ident foo) + (OprApp (Ident x) (Ok "->") + (OprApp (Group (OprApp (Ident y) (Ok "=") (App (Ident bar) (Ident x)))) (Ok "->") + (OprApp (Ident x) (Ok "+") (Ident y)))))); } @@ -1279,11 +1269,16 @@ fn old_lambdas() { #[test] fn pattern_irrefutable() { - let code = "Point x_val = my_point"; - let expected = block![(Assignment (App (Ident Point) (Ident x_val)) "=" (Ident my_point))]; - test(code, expected); + test!("Point x_val = my_point", + (Assignment (App (Ident Point) (Ident x_val)) (Ident my_point))); + test!("Vector _ = x", (Assignment (App (Ident Vector) (Wildcard -1)) (Ident x))); + test!("X.y = z", (Function (OprApp (Ident X) (Ok ".") (Ident y)) #() () (Ident z))); +} - test("Vector _ = x", block![(Assignment (App (Ident Vector) (Wildcard -1)) "=" (Ident x))]); +#[test] +fn pattern_invalid() { + expect_invalid_node("x + y = z"); + expect_invalid_node("(x y) = z"); } #[test] @@ -1294,24 +1289,15 @@ fn case_expression() { " Some -> x", " Int -> x", ]; - #[rustfmt::skip] - let expected = block![ + test!(code.join("\n"), (CaseOf (Ident a) #( ((() (Ident Some) "->" (Ident x))) - ((() (Ident Int) "->" (Ident x))))) - ]; - test(&code.join("\n"), expected); + ((() (Ident Int) "->" (Ident x)))))); - #[rustfmt::skip] - let code = [ - "case a of", - " Vector_2d x y -> x", - ]; - #[rustfmt::skip] - let expected = block![ + let code = ["case a of", " Vector_2d x y -> x"]; + test!(code.join("\n"), (CaseOf (Ident a) #( - ((() (App (App (Ident Vector_2d) (Ident x)) (Ident y)) "->" (Ident x)))))]; - test(&code.join("\n"), expected); + ((() (App (App (Ident Vector_2d) (Ident x)) (Ident y)) "->" (Ident x)))))); #[rustfmt::skip] let code = [ @@ -1319,12 +1305,10 @@ fn case_expression() { " Vector_2d -> x", " _ -> x", ]; - #[rustfmt::skip] - let expected = block![ + test!(code.join("\n"), (CaseOf (Ident self) #( ((() (Ident Vector_2d) "->" (Ident x))) - ((() (Wildcard -1) "->" (Ident x)))))]; - test(&code.join("\n"), expected); + ((() (Wildcard -1) "->" (Ident x)))))); #[rustfmt::skip] let code = [ @@ -1332,8 +1316,7 @@ fn case_expression() { " v:My_Type -> x", " v:(My_Type _ _) -> x", ]; - #[rustfmt::skip] - let expected = block![ + test!(code.join("\n"), (CaseOf (Ident foo) #( ((() (TypeAnnotated (Ident v) ":" (Ident My_Type)) "->" (Ident x))) ((() (TypeAnnotated (Ident v) ":" @@ -1342,9 +1325,7 @@ fn case_expression() { (Ident My_Type) (TemplateFunction 1 (Wildcard 0))) (TemplateFunction 1 (Wildcard 0))))) - "->" (Ident x))))) - ]; - test(&code.join("\n"), expected); + "->" (Ident x)))))); } #[test] @@ -1365,7 +1346,7 @@ fn case_documentation() { (((#((Section " The Int case")) #()) () () ())) ((() (Ident Int) "->" (Ident x))))) ]; - test(&code.join("\n"), expected); + test(code.join("\n"), expected); } #[test] @@ -1392,80 +1373,46 @@ fn case_by_type() { } #[test] -fn pattern_match_suspended_default_arguments() { - #[rustfmt::skip] - let code = [ - "case self of", - " Vector_2d ... -> x", - ]; - #[rustfmt::skip] - let expected = block![ - (CaseOf (Ident self) #(((() (App (Ident Vector_2d) (SuspendedDefaultArguments)) "->" (Ident x)))))]; - test(&code.join("\n"), expected); +fn suspended_default_arguments_in_pattern() { + test!("case self of\n Vector_2d ... -> x", + (CaseOf (Ident self) + #(((() (App (Ident Vector_2d) (SuspendedDefaultArguments)) "->" (Ident x)))))) +} + +#[test] +fn suspended_default_arguments_in_expression() { + test!("c = self.value ...", + (Assignment (Ident c) + (App (OprApp (Ident self) (Ok ".") (Ident value)) (SuspendedDefaultArguments)))); + test!("c = self.value...", + (Assignment (Ident c) + (App (OprApp (Ident self) (Ok ".") (Ident value)) (SuspendedDefaultArguments)))); } // === Private (project-private) keyword === + #[test] fn private_keyword() { - test("private", block![(Private())]); + test!("private", (Private())); expect_invalid_node("private func"); - // Private binding is not supported. expect_invalid_node("private var = 42"); - expect_invalid_node("private ConstructorOutsideType"); - - #[rustfmt::skip] - let code = [ - "type My_Type", - " private" - ]; - expect_invalid_node(&code.join("\n")); - - #[rustfmt::skip] - let code = [ - "private type My_Type", - " Ctor" - ]; - expect_invalid_node(&code.join("\n")); + expect_invalid_node("type My_Type\n private"); + expect_invalid_node("private type My_Type\n Ctor"); } #[test] fn private_methods() { - #[rustfmt::skip] - let code = "private method x = x"; - #[rustfmt::skip] - let expected = block![ - (Private - (Function (Ident method) #((() (Ident x) () ())) () "=" (Ident x))) - ]; - test(code, expected); - - #[rustfmt::skip] - let code = [ - "private method =", - " 42" - ]; - #[rustfmt::skip] - let expected = block![ - (Private (Function (Ident method) #() () "=" - (BodyBlock #((Number () "42" ()))))) - ]; - test(&code.join("\n"), expected); - - #[rustfmt::skip] - let code = [ - "type T", - " private method x = x" - ]; - #[rustfmt::skip] - let expected = block![ - (TypeDef type T #() #( - (Private - (Function (Ident method) #((() (Ident x) () ())) () "=" (Ident x))) - )) - ]; - test(&code.join("\n"), expected); + test!("private method x = x", + (Private (Function (Ident method) #((() (Ident x) () ())) () (Ident x)))); + test!("private method =\n 42", + (Private (Function (Ident method) #() () + (BodyBlock #((Number () "42" ())))))); + test!("type T\n private method x = x", + (TypeDef T #() #( + (Private + (Function (Ident method) #((() (Ident x) () ())) () (Ident x)))))); } @@ -1480,7 +1427,7 @@ fn private_is_first_statement() { "", "private" ]; - test(&lines.join("\n"), block![()()()(Private)]); + test(lines.join("\n"), block![()()()(Private)]); #[rustfmt::skip] let lines = [ @@ -1525,13 +1472,33 @@ mod numbers { #[test] fn with_decimal() { + test!("pi = 3.14", (Assignment (Ident pi) (Number () "3" ("." "14")))); + } + + #[test] + fn digits_spaced_dot() { test!("1 . 0", (OprApp (Number () "1" ()) (Ok ".") (Number () "0" ()))); test!("1 .0", (App (Number () "1" ()) (OprSectionBoundary 1 (OprApp () (Ok ".") (Number () "0" ()))))); test!("1. 0", (OprSectionBoundary 1 (App (OprApp (Number () "1" ()) (Ok ".") ()) (Number () "0" ())))); - test!("pi = 3.14", (Assignment (Ident pi) "=" (Number () "3" ("." "14")))); + } + + #[test] + fn non_digits_dot_digits() { + test!("x.0", (OprApp (Ident x) (Ok ".") (Number () "0" ()))); + } + + #[test] + fn digits_dot_non_digits() { test!("0.0.x", (OprApp (Number () "0" ("." "0")) (Ok ".") (Ident x))); + test!("1.0.0", (OprApp (Number () "1" ("." "0")) (Ok ".") (Number () "0" ()))); + test!("1.0x", (OprApp (Number () "1" ()) (Ok ".") (Number "0x" () ()))); + test!("876543.is_even.should_be_false", + (OprApp + (OprApp (Number () "876543" ()) (Ok ".") (Ident is_even)) + (Ok ".") + (Ident should_be_false))); } #[test] @@ -1549,19 +1516,14 @@ mod numbers { } #[test] - // This syntax cannot be used until we remove old-nondecimal number support, which is - // needed for compatibility until the old parser is fully replaced. - #[ignore] - fn new_delimited() { + fn delimited() { test!("100_000", (Number () "100_000" ())); test!("10_000.99", (Number () "10_000" ("." "99"))); } #[test] - fn old_nondecimal() { - test!("2_01101101", (Number "2_" "01101101" ())); - test!("-2_01101101", (UnaryOprApp "-" (Number "2_" "01101101" ()))); - test!("16_17ffffffffffffffa", (Number "16_" "17ffffffffffffffa" ())); + fn old_hex() { + expect_invalid_node("16_17ffffffffffffffa"); } } @@ -1570,12 +1532,9 @@ mod numbers { #[test] fn trailing_whitespace() { - let cases = [ - ("a ", block![(Ident a) ()]), - ("a \n", block![(Ident a) ()]), - ("a = \n x", block![(Function (Ident a) #() () "=" (BodyBlock #((Ident x))))]), - ]; - cases.into_iter().for_each(|(code, expected)| test(code, expected)); + test("a ", block![(Ident a) ()]); + test("a \n", block![(Ident a) ()]); + test("a = \n x", block![(Function (Ident a) #() () (BodyBlock #((Ident x))))]); } @@ -1590,35 +1549,42 @@ fn at_operator() { #[test] fn attributes() { test!("@on_problems P.g\nTable.select_columns : Text -> Table", - (Annotated "@" on_problems + (Annotated on_problems (OprApp (Ident P) (Ok ".") (Ident g)) #(()) (TypeSignature (OprApp (Ident Table) (Ok ".") (Ident select_columns)) ":" (OprApp (Ident Text) (Ok "->") (Ident Table))))); - test!("@a z\n@b\nx", (Annotated "@" a (Ident z) #(()) (Annotated "@" b () #(()) (Ident x)))); - test!("@a\n@b\nx", (Annotated "@" a () #(()) (Annotated "@" b () #(()) (Ident x)))); + test!("@a z\n@b\nx", (Annotated a (Ident z) #(()) (Annotated b () #(()) (Ident x)))); + test!("@a\n@b\nx", (Annotated a () #(()) (Annotated b () #(()) (Ident x)))); } #[test] fn attributes_in_types() { - test!("type A\n @a z\n @b\n x", - (TypeDef type A #() #( - (Annotated "@" a (Ident z) #(()) (Annotated "@" b () #(()) (Ident x)))))); + test!("type A\n @a z\n @b\n x y = x", + (TypeDef A #() #( + (Annotated a (Ident z) #(()) + (Annotated b () #(()) + (Function (Ident x) #((() (Ident y) () ())) () (Ident x))))))); } #[test] fn inline_builtin_annotations() { - test!("@Tail_Call go t", (AnnotatedBuiltin "@" Tail_Call #() (App (Ident go) (Ident t)))); + test!("@Tail_Call go t", (AnnotatedBuiltin Tail_Call #() (App (Ident go) (Ident t)))); + test!("@Tail_Call go (x = y)", + (AnnotatedBuiltin Tail_Call #() (NamedApp (Ident go) x (Ident y)))); test!("@Tail_Call go\n a\n b", - (AnnotatedBuiltin "@" Tail_Call #() + (AnnotatedBuiltin Tail_Call #() (ArgumentBlockApplication (Ident go) #((Ident a) (Ident b))))); + test!("map _-> @Tail_Call f", + (App (Ident map) + (OprApp (Wildcard 0) (Ok "->") (AnnotatedBuiltin Tail_Call #() (Ident f))))); } #[test] fn multiline_builtin_annotations() { test!("@Builtin_Type\ntype Date", - (AnnotatedBuiltin "@" Builtin_Type #(()) (TypeDef type Date #() #()))); + (AnnotatedBuiltin Builtin_Type #(()) (TypeDef Date #() #()))); } @@ -1749,9 +1715,11 @@ fn invalid_token() { #[test] fn illegal_foreign_body() { - expect_invalid_node("foreign 4"); - expect_invalid_node("foreign 4 * 4"); + // Foreign is only a keyword on the LHS of an assignment operator. + test!("foreign 4", (App (Ident foreign) (Number () "4" ()))); + // Missing name expect_invalid_node("foreign foo = \"4\""); + // Body must be a type expect_invalid_node("foreign js foo = 4"); } @@ -1769,7 +1737,7 @@ fn invalid_unspaced_operator_sequence() { // // Due to this special case, there is no reasonable way to interpret this type of expression as // valid when spaces are added in the following way: - expect_multiple_operator_error("x = y +- z"); + expect_invalid_node("x = y +- z"); expect_multiple_operator_error("x =- y"); // // Treating the `-` as a unary operator applied to `z` would be confusing, as it would be in @@ -1778,18 +1746,34 @@ fn invalid_unspaced_operator_sequence() { // However, it would also be confusing to lex a sequence of characters like `+-` as a single // operator in spaced expressions, but as two operators in unspaced expressions. // - // Lacking any reasonable valid interpretation, we treat this case as a multiple-operator error. - // This is the only case in which we yield a multiple-operator error when there are no spaces - // between the operators. + // Lacking any reasonable valid interpretation, we treat this case as an error. // // Similar expressions with missing operands should be treated likewise: - expect_multiple_operator_error("x = y +-"); - expect_multiple_operator_error("x = +- z"); + expect_invalid_node("x = y +-"); + expect_invalid_node("x = +- z"); expect_multiple_operator_error("x =-"); expect_multiple_operator_error("=- y"); expect_multiple_operator_error("=-"); } +#[test] +fn nonsense_inputs() { + expect_invalid_node("`a (b = 1).`"); + expect_invalid_node("type M = B F(M<'a>) -> S>;"); + expect_invalid_node("'`'\nx `y`\nz"); + expect_invalid_node("if (asGuestValue\n a"); + expect_invalid_node("foo(\n a"); + expect_invalid_node("(Vector(), true)"); +} + +#[test] +#[ignore] +fn nonsense_inputs_broken() { + // FIXME + expect_invalid_node("'`\n"); + expect_invalid_node(".'\\\n"); +} + // ==================== @@ -1825,7 +1809,8 @@ fn expect_tree_representing_code(code: &str, ast: &enso_parser::syntax::Tree) { /// - Most token types are represented as their contents, rather than as a token struct. For /// example, a `token::Number` may be represented like: `sexp![10]`, and a `token::Ident` may look /// like `sexp![foo]`. -fn test(code: &str, expect: lexpr::Value) { +fn test>(code: T, expect: lexpr::Value) { + let code = code.as_ref(); let ast = parse(code); let ast_s_expr = to_s_expr(&ast, code); assert_eq!(ast_s_expr.to_string(), expect.to_string(), "{:?}", &ast); @@ -1836,7 +1821,7 @@ fn parse(code: &str) -> enso_parser::syntax::tree::Tree { let ast = enso_parser::Parser::new().run(code); let expected_span = 0..(code.encode_utf16().count() as u32); let mut locations = enso_parser::source::code::debug::LocationCheck::new(); - enso_parser_debug::validate_spans(&ast, expected_span, &mut locations); + enso_parser_debug::validate_spans(&ast, expected_span, &mut locations).unwrap(); locations.check(code); ast } @@ -1854,7 +1839,7 @@ impl Errors { fn collect(ast: &enso_parser::syntax::Tree, code: &str) -> Self { expect_tree_representing_code(code, ast); let errors = core::cell::Cell::new(Errors::default()); - ast.visit_trees(|tree| match &*tree.variant { + ast.visit_trees(|tree| match &tree.variant { enso_parser::syntax::tree::Variant::Invalid(_) => { errors.update(|e| Self { invalid_node: true, ..e }); } diff --git a/lib/rust/parser/doc-parser/src/main.rs b/lib/rust/parser/doc-parser/src/main.rs index 5897ddd8a5..5e714c523a 100644 --- a/lib/rust/parser/doc-parser/src/main.rs +++ b/lib/rust/parser/doc-parser/src/main.rs @@ -51,7 +51,7 @@ fn extract_docs(_filename: &str, mut code: &str) -> Vec { } let ast = enso_parser::Parser::new().run(code); let docs = RefCell::new(vec![]); - ast.visit_trees(|tree| match &*tree.variant { + ast.visit_trees(|tree| match &tree.variant { enso_parser::syntax::tree::Variant::Documented(doc) => { docs.borrow_mut().push(doc.documentation.clone()); } diff --git a/lib/rust/parser/generate-java/java/org/enso/syntax2/Parser.java b/lib/rust/parser/generate-java/java/org/enso/syntax2/Parser.java index e277391ddd..2c375ee840 100644 --- a/lib/rust/parser/generate-java/java/org/enso/syntax2/Parser.java +++ b/lib/rust/parser/generate-java/java/org/enso/syntax2/Parser.java @@ -44,10 +44,11 @@ public final class Parser implements AutoCloseable { } System.load(path.getAbsolutePath()); } catch (NullPointerException | IllegalArgumentException | LinkageError e) { - if (searchFromDirToTop(e, root, "target", "rust", "debug", name)) { + if (searchFromDirToTop(e, root, "target", "rust", "parser-jni", name)) { return; } - if (searchFromDirToTop(e, new File(".").getAbsoluteFile(), "target", "rust", "debug", name)) { + if (searchFromDirToTop( + e, new File(".").getAbsoluteFile(), "target", "rust", "parser-jni", name)) { return; } throw new IllegalStateException("Cannot load parser from " + root, e); diff --git a/lib/rust/parser/src/lexer.rs b/lib/rust/parser/src/lexer.rs index 924eb0bd59..a8a5cc321c 100644 --- a/lib/rust/parser/src/lexer.rs +++ b/lib/rust/parser/src/lexer.rs @@ -11,6 +11,7 @@ use crate::syntax::*; use crate::source::code::Length; use crate::source::code::Location; use crate::syntax::token::Codepoint; +use crate::syntax::token::OperatorProperties; use std::str; @@ -82,15 +83,15 @@ pattern_impl_for_char_slice!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); /// syntax errors. #[derive(Debug, Deref, DerefMut)] #[allow(missing_docs)] -pub struct Lexer<'s> { +pub struct Lexer<'s, Inner> { #[deref] #[deref_mut] state: LexerState, input: &'s str, iterator: str::CharIndices<'s>, - output: Vec>, /// Memory for storing tokens, reused as an optimization. - token_storage: VecAllocation>, + token_storage: VecAllocation>, + inner: Inner, } /// Internal state of the [`Lexer`]. @@ -103,7 +104,6 @@ pub struct LexerState { last_spaces_visible_offset: VisibleOffset, current_block_indent: VisibleOffset, block_indent_stack: Vec, - internal_error: Option, stack: Vec, } @@ -127,15 +127,13 @@ struct Mark<'s> { offset: Offset<'s>, } -impl<'s> Lexer<'s> { +impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> { /// Constructor. - pub fn new(input: &'s str) -> Self { + pub fn new(input: &'s str, inner: Inner) -> Self { let iterator = input.char_indices(); - let capacity = input.len() / AVERAGE_TOKEN_LEN; - let output = Vec::with_capacity(capacity); let state = default(); let token_storage = default(); - Self { input, iterator, output, state, token_storage }.init() + Self { input, iterator, state, token_storage, inner }.init() } fn init(mut self) -> Self { @@ -230,18 +228,6 @@ impl<'s> Lexer<'s> { Token(offset, Code::empty(start), elem) } - /// Push the [`token`] to the result stream. - #[inline(always)] - fn submit_token(&mut self, token: Token<'s>) { - self.output.push(token); - } - - /// Push the [`tokens`] to the result stream. - #[inline(always)] - fn submit_tokens>>(&mut self, tokens: T) { - self.output.extend(tokens); - } - /// Start a new block. #[inline(always)] fn start_block(&mut self, new_indent: VisibleOffset) { @@ -267,7 +253,7 @@ impl<'s> Lexer<'s> { // === Basic Parsers === // ===================== -impl<'s> Lexer<'s> { +impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> { /// Consume the next character, unconditionally. #[inline(always)] fn take_next(&mut self) -> bool { @@ -361,7 +347,7 @@ fn is_space_char(t: char) -> bool { space_char_visible_size(t).is_some() } -impl<'s> Lexer<'s> { +impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> { /// Consume a visible space character and return its visible offset. #[inline(always)] fn space(&mut self) -> Option { @@ -442,7 +428,7 @@ fn decode_hexadecimal_digit(c: char) -> Option { }) } -impl<'s> Lexer<'s> { +impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> { #[inline(always)] fn take_rest_of_line(&mut self) { self.take_while(|t| !is_newline_char(t)) @@ -598,7 +584,7 @@ impl token::Variant { } } -impl<'s> Lexer<'s> { +impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> { /// Parse an identifier. fn ident(&mut self) { if let Some(token) = self.token(|this| { @@ -606,12 +592,14 @@ impl<'s> Lexer<'s> { this.take_while_1(is_ident_char); } }) { - if token.left_offset.is_empty() { - self.unspaced_term(); + if token.code == "private" { + let token = token.with_variant(token::Variant::private()); + self.inner.push_token(token); + return; } let tp = token::Variant::new_ident_or_wildcard_unchecked(&token.code); let token = token.with_variant(tp); - self.submit_token(token); + self.inner.push_token(token); } } @@ -635,7 +623,7 @@ impl<'s> Lexer<'s> { // === Operator === // ================ -impl<'s> Lexer<'s> { +impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> { /// Parse an operator. fn operator(&mut self) { let token = self.token(|this| { @@ -656,171 +644,73 @@ impl<'s> Lexer<'s> { "+-" => { let (left, right) = token.split_at(Length::of("+")); let lhs = analyze_operator(&left.code); - self.submit_token(left.with_variant(token::Variant::operator(lhs))); + self.inner.push_token(left.with_variant(lhs)); // The `-` in this case is not identical to a free `-`: It is only allowed a // unary interpretation. - let rhs = token::OperatorProperties::new() - .with_unary_prefix_mode(token::Precedence::unary_minus()); - self.submit_token(right.with_variant(token::Variant::operator(rhs))); + self.inner.push_token(right.with_variant(token::Variant::negation_operator())); } // Composed of operator characters, but not an operator node. "..." => { let token = token.with_variant(token::Variant::suspended_default_arguments()); - self.submit_token(token); - } - // Decimal vs. method-application must be distinguished before parsing because they - // have different precedences; this is a special case here because the distinction - // requires lookahead. - "." if self.last_spaces_visible_offset.width_in_spaces == 0 - && let Some(char) = self.current_char - && char.is_ascii_digit() => - { - let opr = token::OperatorProperties::new() - .with_binary_infix_precedence(81) - .as_decimal(); - let token = token.with_variant(token::Variant::operator(opr)); - self.submit_token(token); - } - // Operator-identifiers. - _ if self.prev_token_is_dot_operator() => { - let properties = analyze_operator(&token.code); - if properties.is_compile_time_operation() { - self.submit_token(token.with_variant(token::Variant::operator(properties))); - } else { - self.submit_token( - token.with_variant(token::Variant::operator_ident().into()), - ); - } - } - // The unary-negation operator binds tighter to numeric literals than other - // expressions. - "-" if self.last_spaces_visible_offset.width_in_spaces == 0 - && let Some(char) = self.current_char - && char.is_ascii_digit() => - { - let opr = token::OperatorProperties::new() - .with_unary_prefix_mode(token::Precedence::unary_minus_numeric_literal()) - .with_binary_infix_precedence(15); - let token = token.with_variant(token::Variant::operator(opr)); - self.submit_token(token); + self.inner.push_token(token); } // Normally-structured operator. _ => { - let tp = token::Variant::operator(analyze_operator(&token.code)); + let tp = analyze_operator(&token.code); let token = token.with_variant(tp); - self.submit_token(token); + self.inner.push_token(token); } } } } - - fn prev_token_is_dot_operator(&self) -> bool { - match self.output.last() { - Some(Token { variant: token::Variant::Operator(operator), .. }) => - operator.properties.is_dot(), - _ => false, - } - } - - fn unspaced_term(&mut self) { - if let Some(Token { - variant: - variant @ token::Variant::Ident(token::variant::Ident { - is_operator_lexically: true, - .. - }), - .. - }) = self.output.last_mut() - { - *variant = token::Variant::invalid(); - } - } } // === Precedence === -fn analyze_operator(token: &str) -> token::OperatorProperties { - let mut operator = token::OperatorProperties::new(); +fn analyze_operator(token: &str) -> token::Variant { + match token { + "\\" => token::Variant::lambda_operator(), + "~" => token::Variant::suspension_operator(), + ".." => token::Variant::autoscope_operator(), + "@" => token::Variant::annotation_operator(), + "=" => token::Variant::assignment_operator(), + ":" => token::Variant::type_annotation_operator(), + "->" => token::Variant::arrow_operator(), + "," => token::Variant::comma_operator(), + "." => token::Variant::dot_operator(), + _ => token::Variant::operator(), + } +} + +/// Analyze an operator that has been determined not to be any syntactically-special operator. +pub fn analyze_non_syntactic_operator(token: &str) -> OperatorProperties { + match token { + "-" => OperatorProperties::value() + .with_unary_prefix_mode(token::Precedence::unary_minus()) + .with_binary_infix_precedence(15), + "!" => OperatorProperties::value().with_binary_infix_precedence(3), + "||" | "\\\\" | "&&" => OperatorProperties::value().with_binary_infix_precedence(4), + ">>" | "<<" => OperatorProperties::functional().with_binary_infix_precedence(5), + "|>" | "|>>" => OperatorProperties::functional().with_binary_infix_precedence(6), + "<|" | "<<|" => + OperatorProperties::functional().with_binary_infix_precedence(6).as_right_associative(), + "<=" | ">=" => OperatorProperties::value().with_binary_infix_precedence(14), + "==" | "!=" => OperatorProperties::value().with_binary_infix_precedence(5), + _ => analyze_user_operator(token), + } +} + +fn analyze_user_operator(token: &str) -> OperatorProperties { + let mut operator = OperatorProperties::new(); let has_right_arrow = token.ends_with("->"); let has_left_arrow = token.starts_with("<-"); if has_right_arrow && !has_left_arrow { operator = operator.as_right_associative(); } if token.ends_with('=') && !token.bytes().all(|c| c == b'=') { - match token { - // Inclusive comparison operators are not modifiers. - ">=" | "<=" => (), - // Any other operator ending with "=" is a modifier. - _ => operator = operator.as_modifier(), - } - } - match token { - // Operators that can be unary. - "\\" => - return operator - .with_unary_prefix_mode(token::Precedence::min_valid()) - .as_compile_time_operation(), - "~" => - return operator - .with_unary_prefix_mode(token::Precedence::max()) - .as_compile_time_operation() - .as_suspension(), - ".." => - return operator - .with_unary_prefix_mode(token::Precedence::min_valid()) - .as_compile_time_operation() - .as_autoscope(), - "@" => - return operator - .with_unary_prefix_mode(token::Precedence::max()) - .as_compile_time_operation() - .as_annotation(), - "-" => - return operator - .as_value_operation() - .with_unary_prefix_mode(token::Precedence::unary_minus()) - .with_binary_infix_precedence(15), - // "There are a few operators with the lowest precedence possible." - // - These 3 "consume everything to the right". - "=" => - return operator - .with_binary_infix_precedence(1) - .as_right_associative() - .with_lhs_section_termination(operator::SectionTermination::Unwrap) - .as_assignment(), - ":" => - return operator - .with_binary_infix_precedence(2) - .as_right_associative() - .with_lhs_section_termination(operator::SectionTermination::Reify) - .as_compile_time_operation() - .as_type_annotation(), - "->" => - return operator - .with_binary_infix_precedence(2) - .as_right_associative() - .with_lhs_section_termination(operator::SectionTermination::Unwrap) - .as_compile_time_operation() - .as_arrow(), - - "!" => return operator.with_binary_infix_precedence(3).as_value_operation(), - "||" | "\\\\" | "&&" => - return operator.with_binary_infix_precedence(4).as_value_operation(), - ">>" | "<<" => return operator.with_binary_infix_precedence(5), - "|>" | "|>>" => return operator.with_binary_infix_precedence(6), - "<|" | "<<|" => return operator.with_binary_infix_precedence(6).as_right_associative(), - // Other special operators. - "<=" | ">=" => return operator.with_binary_infix_precedence(14).as_value_operation(), - "==" | "!=" => return operator.with_binary_infix_precedence(5).as_value_operation(), - "," => - return operator - .with_binary_infix_precedence(1) - .as_compile_time_operation() - .as_special() - .as_sequence(), - "." => return operator.with_binary_infix_precedence(80).as_dot(), - _ => (), + // Note that inclusive comparison operators (matched above) are not modifiers. + operator = operator.as_modifier(); } // "The precedence of all other operators is determined by the operator's Precedence Character:" let mut precedence_char = None; @@ -845,7 +735,7 @@ fn analyze_operator(token: &str) -> token::OperatorProperties { '^' => 17, _ => 18, }; - let operator = operator.with_binary_infix_precedence(binary); + operator = operator.with_binary_infix_precedence(binary); if !has_right_arrow && !has_left_arrow { operator.as_value_operation() } else { @@ -859,14 +749,22 @@ fn analyze_operator(token: &str) -> token::OperatorProperties { // === Symbols === // =============== -impl<'s> Lexer<'s> { +impl<'s, Inner: TokenConsumer<'s> + GroupHierarchyConsumer<'s>> Lexer<'s, Inner> { /// Parse a symbol. fn symbol(&mut self) { - if let Some(token) = self.token(|this| this.take_1(&['(', '{', '['])) { - self.submit_token(token.with_variant(token::Variant::open_symbol())); + if let Some(token) = self.token(|this| this.take_1('(')) { + self.inner.start_group(token.with_variant(token::variant::OpenSymbol())); + return; } - if let Some(token) = self.token(|this| this.take_1(&[')', '}', ']'])) { - self.submit_token(token.with_variant(token::Variant::close_symbol())); + if let Some(token) = self.token(|this| this.take_1(')')) { + self.inner.end_group(token.with_variant(token::variant::CloseSymbol())); + return; + } + if let Some(token) = self.token(|this| this.take_1(&['{', '['])) { + self.inner.push_token(token.with_variant(token::Variant::open_symbol())); + } + if let Some(token) = self.token(|this| this.take_1(&['}', ']'])) { + self.inner.push_token(token.with_variant(token::Variant::close_symbol())); } } } @@ -877,30 +775,22 @@ impl<'s> Lexer<'s> { // === Number === // ============== -impl<'s> Lexer<'s> { +impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> { /// Parse a number. fn number(&mut self) { let mut base = None; let token = self.token(|this| { - let mut old_hex_chars_matched = 0; - let mut old_bin_chars_matched = 0; - let mut new_based_chars_matched = 0; + let mut base_chars_matched = 0; match this.current_char { - Some('0') => new_based_chars_matched = 1, - Some('1') => old_hex_chars_matched = 1, - Some('2') => old_bin_chars_matched = 1, + Some('0') => base_chars_matched = 1, Some(d) if is_decimal_digit(d) => (), _ => return, } this.next_input_char(); - let mut prev_was_underscore = false; match this.current_char { - Some('_') if old_bin_chars_matched == 1 => base = Some(token::Base::Binary), - Some('_') => prev_was_underscore = true, - Some('b') if new_based_chars_matched == 1 => base = Some(token::Base::Binary), - Some('o') if new_based_chars_matched == 1 => base = Some(token::Base::Octal), - Some('x') if new_based_chars_matched == 1 => base = Some(token::Base::Hexadecimal), - Some('6') if old_hex_chars_matched == 1 => old_hex_chars_matched = 2, + Some('b') if base_chars_matched == 1 => base = Some(token::Base::Binary), + Some('o') if base_chars_matched == 1 => base = Some(token::Base::Octal), + Some('x') if base_chars_matched == 1 => base = Some(token::Base::Hexadecimal), Some(d) if is_decimal_digit(d) => (), _ => return, } @@ -908,19 +798,7 @@ impl<'s> Lexer<'s> { if base.is_some() { return; } - let mut was_underscore = false; - match this.current_char { - Some('_') if old_hex_chars_matched == 2 => { - base = Some(token::Base::Hexadecimal); - this.next_input_char(); - return; - } - Some('_') if !prev_was_underscore => was_underscore = true, - Some(d) if is_decimal_digit(d) => (), - _ => return, - } - prev_was_underscore = was_underscore; - this.next_input_char(); + let mut prev_was_underscore = false; loop { let mut was_underscore = false; match this.current_char { @@ -933,33 +811,18 @@ impl<'s> Lexer<'s> { } }); if let Some(token) = token { - if token.left_offset.is_empty() { - self.unspaced_term(); - } if let Some(base) = base { - self.submit_token(token.with_variant(token::Variant::number_base())); - let after_base = self.current_offset; + self.inner.push_token(token.with_variant(token::Variant::number_base())); if let Some(digits) = match base { token::Base::Binary => self.token(|this| this.take_while(is_binary_digit)), token::Base::Octal => self.token(|this| this.take_while(is_octal_digit)), token::Base::Hexadecimal => self.token(|this| this.take_while(is_hexadecimal_digit)), } { - // The base and the digits are separate tokens so that they can have separate - // spans. A pseudo-token binds them together tightly so that the parser can - // assemble them into one number node. - let joiner = token::OperatorProperties::new() - .with_binary_infix_precedence(u32::MAX) - .as_token_joiner(); - self.submit_token(Token( - Code::empty(after_base), - Code::empty(after_base), - token::Variant::operator(joiner), - )); - self.submit_token(digits.with_variant(token::Variant::digits(Some(base)))); + self.inner.push_token(digits.with_variant(token::Variant::digits(Some(base)))); } } else { - self.submit_token(token.with_variant(token::Variant::digits(None))); + self.inner.push_token(token.with_variant(token::Variant::digits(None))); } } } @@ -971,7 +834,9 @@ impl<'s> Lexer<'s> { // === Text === // ============ -impl<'s> Lexer<'s> { +impl<'s, Inner> Lexer<'s, Inner> +where Inner: TokenConsumer<'s> + BlockHierarchyConsumer + NewlineConsumer<'s> +{ /// Read a text literal. fn text(&mut self) { let (quote_char, text_type) = match self.current_char { @@ -982,15 +847,12 @@ impl<'s> Lexer<'s> { self.end_splice(state); } else { let token = self.token(|this| this.take_next()).unwrap(); - self.submit_token(token.with_variant(token::Variant::invalid())); + self.inner.push_token(token.with_variant(token::Variant::invalid())); } return; } _ => return, }; - if self.last_spaces_visible_offset == VisibleOffset(0) { - self.unspaced_term(); - } let indent = self.current_block_indent; let open_quote_start = self.mark(); self.take_next(); @@ -1021,17 +883,17 @@ impl<'s> Lexer<'s> { close_quote_start.clone(), token::Variant::text_start(), ); - self.submit_token(token); + self.inner.push_token(token); let token = self.make_token(close_quote_start, close_quote_end, token::Variant::text_end()); - self.submit_token(token); + self.inner.push_token(token); } } else { // One quote followed by non-quote character: Inline quote. let open_quote_end = self.mark_without_whitespace(); let token = self.make_token(open_quote_start, open_quote_end, token::Variant::text_start()); - self.submit_token(token); + self.inner.push_token(token); self.inline_quote(quote_char, text_type); } self.spaces_after_lexeme(); @@ -1045,12 +907,12 @@ impl<'s> Lexer<'s> { ) { let open_quote_end = self.mark_without_whitespace(); let token = self.make_token(open_quote_start, open_quote_end, token::Variant::text_start()); - self.submit_token(token); + self.inner.push_token(token); let mut initial_indent = None; if text_type.expects_initial_newline() && let Some(newline) = self.line_break() { - self.submit_token(newline.with_variant(token::Variant::text_initial_newline())); + self.inner.push_token(newline.with_variant(token::Variant::text_initial_newline())); if self.last_spaces_visible_offset > block_indent { initial_indent = self.last_spaces_visible_offset.into(); } @@ -1072,7 +934,7 @@ impl<'s> Lexer<'s> { let splice_quote_end = self.mark_without_whitespace(); let token = self.make_token(splice_quote_start, splice_quote_end, token::Variant::close_symbol()); - self.submit_token(token); + self.inner.push_token(token); match state { State::InlineText => self.inline_quote('\'', TextType::Interpolated), State::MultilineText { .. } => { @@ -1120,14 +982,13 @@ impl<'s> Lexer<'s> { // If `token.code.is_empty()`, we ignore the `token.left_offset` here even if // it is non-empty, because it will be attached to the newline token. if !token.code.is_empty() { - self.submit_token(token); + self.inner.push_token(token); } else { before_newline = text_start; } self.advance_line_pos(); let newline_end = self.mark_without_whitespace(); - let token = - self.make_token(before_newline, newline_end, token::Variant::newline()); + let token = self.make_newline(before_newline, newline_end); newlines.push(token); if let Some(initial) = *initial_indent { let trim = std::cmp::max(initial, *block_indent + MIN_TEXT_TRIM); @@ -1155,19 +1016,19 @@ impl<'s> Lexer<'s> { let offset = Offset(VisibleOffset(0), location.clone()); Token(offset, location, token::Variant::text_end()) }; - self.submit_token(text_end); - self.end_blocks(indent, newlines.first().as_ref().unwrap()); - self.submit_tokens(newlines); + self.inner.push_token(text_end); + self.end_blocks(indent); + newlines.into_iter().for_each(|newline| self.inner.push_newline(newline)); if self.current_offset == text_start.location { self.last_spaces_visible_offset = text_start.offset.visible; self.last_spaces_offset = text_start.offset.code.range().start; } return TextEndedAt::End; } - let newlines = newlines + newlines .into_iter() - .map(|token| token.with_variant(token::Variant::text_newline())); - self.submit_tokens(newlines); + .map(|token| token.with_variant(token::Variant::text_newline())) + .for_each(|newline| self.inner.push_token(newline)); continue; } } @@ -1183,7 +1044,7 @@ impl<'s> Lexer<'s> { if token.code.is_empty() { backslash_start = text_start.clone(); } else { - self.submit_token(token); + self.inner.push_token(token); } self.last_spaces_offset = self.current_offset; text_start = self.text_escape(backslash_start, char); @@ -1202,7 +1063,7 @@ impl<'s> Lexer<'s> { if token.code.is_empty() { splice_quote_start = text_start; } else { - self.submit_token(token); + self.inner.push_token(token); } self.take_next(); let splice_quote_end = self.mark_without_whitespace(); @@ -1211,7 +1072,7 @@ impl<'s> Lexer<'s> { splice_quote_end, token::Variant::open_symbol(), ); - self.submit_token(token); + self.inner.push_token(token); self.stack.push(state); self.last_spaces_offset = self.current_offset; return TextEndedAt::Splice; @@ -1221,7 +1082,7 @@ impl<'s> Lexer<'s> { let text_end = self.mark_without_whitespace(); let token = self.make_token(text_start, text_end.clone(), token::Variant::text_section()); if !(token.code.is_empty() && token.left_offset.code.is_empty()) { - self.submit_token(token); + self.inner.push_token(token); } let end_token = if self.current_char == closing_char { self.take_next(); @@ -1233,7 +1094,7 @@ impl<'s> Lexer<'s> { Code::empty(self.current_offset), )) }; - self.submit_token(end_token); + self.inner.push_token(end_token); TextEndedAt::End } @@ -1271,7 +1132,7 @@ impl<'s> Lexer<'s> { sequence_end.clone(), token::Variant::text_escape(value.map(Codepoint::from_u32).unwrap_or_default()), ); - self.submit_token(token); + self.inner.push_token(token); sequence_end } else { let value = match char { @@ -1297,7 +1158,7 @@ impl<'s> Lexer<'s> { escape_end.clone(), token::Variant::text_escape(value.map(Codepoint::from_char).unwrap_or_default()), ); - self.submit_token(token); + self.inner.push_token(token); escape_end } } @@ -1329,6 +1190,18 @@ impl<'s> Lexer<'s> { let end8 = usize_from(end.utf8); Token(offset, Code::from_str_at_location(&self.input[start8..end8], start), variant) } + + fn make_newline(&self, from: Mark<'s>, to: Mark<'s>) -> token::Newline<'s> { + let Mark { location: start, offset } = from; + let end = to.location; + let start8 = usize_from(start.utf8); + let end8 = usize_from(end.utf8); + Token( + offset, + Code::from_str_at_location(&self.input[start8..end8], start), + token::variant::Newline(), + ) + } } #[derive(PartialEq, Eq)] @@ -1360,12 +1233,14 @@ impl TextType { // === Comments === // ================ -impl<'s> Lexer<'s> { +impl<'s, Inner> Lexer<'s, Inner> +where Inner: TokenConsumer<'s> + BlockHierarchyConsumer + NewlineConsumer<'s> +{ #[inline(always)] fn submit_line_as(&mut self, kind: token::Variant) { let token = self.token(|this| this.take_rest_of_line()); if let Some(token) = token { - self.submit_token(token.with_variant(kind)); + self.inner.push_token(token.with_variant(kind)); } } @@ -1380,7 +1255,7 @@ impl<'s> Lexer<'s> { } else { self.take_rest_of_line(); let end_line = self.mark(); - let token = self.make_token(start, end_line, token::Variant::newline()); + let token = self.make_newline(start, end_line); self.newlines_starting_with(token.into()); } } @@ -1393,8 +1268,13 @@ impl<'s> Lexer<'s> { // === Block === // ============= -impl<'s> Lexer<'s> { +impl<'s, Inner> Lexer<'s, Inner> +where Inner: TokenConsumer<'s> + BlockHierarchyConsumer + NewlineConsumer<'s> +{ fn line_break(&mut self) -> Option> { + if let Some(state) = self.stack.pop() { + self.end_splice(state); + } let token = self.token(|this| { let matched = if this.take_1('\n') { true @@ -1420,30 +1300,25 @@ impl<'s> Lexer<'s> { self.newlines_starting_with(None); } - fn newlines_starting_with(&mut self, first: Option>) { + fn newlines_starting_with(&mut self, first: Option>) { let mut newlines = self.token_storage.take(); newlines.extend(first); while let Some(token) = self.line_break() { - newlines.push(token.with_variant(token::Variant::newline())); + newlines.push(token.with_variant(token::variant::Newline())); } - if let Some(first) = newlines.first() { + if !newlines.is_empty() { let block_indent = self.last_spaces_visible_offset; if block_indent > self.current_block_indent { - let block_start = { - let location = first.left_offset.code.position_before(); - let offset = Offset(VisibleOffset(0), location.clone()); - Token(offset, location, token::Variant::block_start()) - }; - self.submit_token(block_start); + self.inner.start_block(); self.start_block(block_indent); } - self.end_blocks(block_indent, newlines.first().as_ref().unwrap()); - newlines.drain(..).for_each(|token| self.submit_token(token)); + self.end_blocks(block_indent); + newlines.drain(..).for_each(|token| self.inner.push_newline(token)); } self.token_storage.set_from(newlines); } - fn end_blocks(&mut self, block_indent: VisibleOffset, newline: &Token<'s>) { + fn end_blocks(&mut self, block_indent: VisibleOffset) { while block_indent < self.current_block_indent { let Some(previous_indent) = self.block_indent_stack.last().copied() else { // If the file starts at indent > 0, we treat that as the root indent level @@ -1458,12 +1333,7 @@ impl<'s> Lexer<'s> { break; } self.end_block(); - let block_end = { - let location = newline.left_offset.code.position_before(); - let offset = Offset(VisibleOffset(0), location.clone()); - Token(offset, location, token::Variant::block_end()) - }; - self.submit_token(block_end); + self.inner.end_block(); } } } @@ -1474,39 +1344,56 @@ impl<'s> Lexer<'s> { // === Glue === // ============ -/// All defined parsers in order they should be fired. The order is determined by two factors: -/// 1. The most common parsers should be first in order to minimize comparison for each new char. -/// 2. Some parsers could consume input even if it should be qualified as something else. Thus, some -/// parsers should be run first in order to make the token consuming process correct. -const PARSERS: &[for<'r> fn(&'r mut Lexer<'_>)] = &[ - |t| t.number(), - |t| t.ident(), - |t| t.operator(), - |t| t.newlines(), - |t| t.symbol(), - |t| t.comment(), - |t| t.text(), -]; +impl<'s, Inner> Lexer<'s, Inner> +where Inner: TokenConsumer<'s> + + Debug + + BlockHierarchyConsumer + + GroupHierarchyConsumer<'s> + + NewlineConsumer<'s> +{ + /// Run all defined parsers. The order is determined by two factors: + /// 1. The most common parsers should be first in order to minimize comparison for each new + /// char. + /// 2. Some parsers could consume input even if it should be qualified as something else. Thus, + /// some parsers should be run first in order to make the token consuming process correct. + fn parse_token(&mut self) -> bool { + self.run_and_check_if_progressed(|this| this.number()) + || self.run_and_check_if_progressed(|this| this.ident()) + || self.run_and_check_if_progressed(|this| this.operator()) + || self.run_and_check_if_progressed(|this| this.newlines()) + || self.run_and_check_if_progressed(|this| this.symbol()) + || self.run_and_check_if_progressed(|this| this.comment()) + || self.run_and_check_if_progressed(|this| this.text()) + } +} + +impl<'s, Inner> Finish for Lexer<'s, Inner> +where Inner: TokenConsumer<'s> + + Finish + + Debug + + BlockHierarchyConsumer + + GroupHierarchyConsumer<'s> + + NewlineConsumer<'s> +{ + type Result = ParseResult; -impl<'s> Lexer<'s> { /// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented /// as start and end tokens). - pub fn run(mut self) -> ParseResult>> { + fn finish(&mut self) -> Self::Result { // If the first line is indented, open a block for it. self.spaces_after_lexeme(); let first_block_indent = self.last_spaces_visible_offset; if first_block_indent.width_in_spaces != 0 { let start = Location::default(); - self.submit_token(token::block_start(Code::empty(start), Code::empty(start)).into()); + self.inner.start_block(); self.start_block(first_block_indent); - self.submit_token(token::newline(Code::empty(start), Code::empty(start)).into()); + self.inner.push_newline(token::newline(Code::empty(start), Code::empty(start))); } // Main parsing loop. - while PARSERS.iter().any(|f| self.run_and_check_if_progressed(f)) {} + while self.parse_token() {} // If any blocks were still open at EOF, close them. while self.end_block().is_some() { - let block_end = self.marker_token(token::Variant::block_end()); - self.submit_token(block_end); + self.inner.end_block(); } // If the last line ended in whitespace, ensure it is represented; we'll attach it to a // phantom newline token. @@ -1517,17 +1404,17 @@ impl<'s> Lexer<'s> { let visible_offset = self.last_spaces_visible_offset; let offset = Offset(visible_offset, Code::from_str_at_location(offset_code, left_offset_start)); - let eof = token::variant::Variant::Newline(token::variant::Newline()); - self.submit_token(Token(offset, Code::empty(self.current_offset), eof)); - } - // Sanity check. - let mut internal_error = self.internal_error.take(); - if self.current_char.is_some() { - let message = format!("Lexer did not consume all input. State: {self:?}"); - internal_error.get_or_insert(message); + let eof = token::variant::Newline(); + self.inner.push_newline(Token(offset, Code::empty(self.current_offset), eof)); } + let internal_error = if self.current_char.is_some() { + format!("Lexer did not consume all input. State: {self:?}").into() + } else { + None + }; + debug_assert!(internal_error.is_none()); - let value = self.output; + let value = self.inner.finish(); ParseResult { value, internal_error } } } @@ -1535,11 +1422,10 @@ impl<'s> Lexer<'s> { /// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented /// as start and end tokens). pub fn run(input: &'_ str) -> ParseResult>> { - Lexer::new(input).run() + Lexer::new(input, vec![]).finish() } - // ============= // === Tests === // ============= @@ -1583,7 +1469,7 @@ pub mod test { /// Constructor. pub fn operator_<'s>(left_offset: &'s str, code: &'s str) -> Token<'s> { - let variant = Variant::operator(analyze_operator(code)); + let variant = analyze_operator(code); let left_offset = test_code(left_offset); let code = test_code(code); Token(left_offset, code, variant) @@ -1617,6 +1503,13 @@ pub mod debug { } let mut locations = code::debug::LocationCheck::new(); for token in &tokens { + if matches!( + token.variant, + // Not a token; only constructed as a debug representation. + token::Variant::BlockStart(_) | token::Variant::BlockEnd(_) + ) { + continue; + } let left_offset = token.left_offset.code.range(); let code = token.code.range(); sum_span = Some(concat(&sum_span, &left_offset)); @@ -2062,8 +1955,7 @@ mod benches { let str = &str[..str.len() - 1]; b.iter(move || { - let lexer = Lexer::new(str); - assert_eq!(lexer.run().unwrap().len(), reps); + assert_eq!(run(str).unwrap().len(), reps); }); } } diff --git a/lib/rust/parser/src/lib.rs b/lib/rust/parser/src/lib.rs index 3af3ce96bf..917e00fefd 100644 --- a/lib/rust/parser/src/lib.rs +++ b/lib/rust/parser/src/lib.rs @@ -93,6 +93,12 @@ use crate::prelude::*; +use crate::lexer::Lexer; +use crate::source::Code; +use crate::syntax::token; +use crate::syntax::tree::SyntaxError; +use crate::syntax::Finish; + // ============== // === Export === @@ -163,11 +169,9 @@ impl Parser { /// Main entry point. pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> { - let tokens = lexer::run(code); - let mut resolver = macros::resolver::Resolver::new_statement(); - let result = tokens.map(|tokens| resolver.run(&self.macros, tokens)); - let value = result.value; - if let Some(error) = result.internal_error { + let resolver = macros::resolver::Resolver::new(&self.macros); + let ParseResult { value, internal_error } = Lexer::new(code, resolver).finish(); + if let Some(error) = internal_error { return value.with_error(format!("Internal error: {error}")); } value @@ -183,271 +187,89 @@ impl Default for Parser { // == Parsing helpers == -/// Reinterpret an expression in a statement context (i.e. as a top level member of a block). -/// -/// In statement context, an expression that has an assignment operator at its top level is -/// interpreted as a variable assignment or method definition. -fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> { - use syntax::tree::*; - match &mut *tree.variant { - Variant::Annotated(annotated) => { - annotated.expression = annotated.expression.take().map(expression_to_statement); - } - Variant::AnnotatedBuiltin(annotated) => { - annotated.expression = annotated.expression.take().map(expression_to_statement); - } - Variant::Documented(documented) => { - documented.expression = documented.expression.take().map(expression_to_statement); - } - Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs: None, .. }) => { - return tree.with_error("Expected expression before indented block."); - } - Variant::TypeAnnotated(typed) => { - tree.variant = Box::new(Variant::TypeSignature(TypeSignature { - variable: mem::take(&mut typed.expression), - operator: mem::take(&mut typed.operator), - type_: mem::take(&mut typed.type_), - })); - } - Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs }) - if opr.properties.is_assignment() => - { - let (lhs, return_spec) = match &mut *lhs.variant { - Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }) - if opr.properties.is_arrow() => - ( - lhs, - Some(ReturnSpecification { - arrow: mem::take(opr), - r#type: mem::take(rhs), - }), - ), - _ => (lhs, None), - }; - let (leftmost, args) = collect_arguments(lhs.clone()); - if return_spec.is_none() { - if let Some(rhs) = rhs { - if let Variant::Ident(ident) = &*leftmost.variant - && ident.token.variant.is_type - { - // If the LHS is a type, this is a (destructuring) assignment. - let lhs = expression_to_pattern(mem::take(lhs)); - tree.variant = Box::new(Variant::Assignment(Assignment { - pattern: lhs, - equals: mem::take(opr), - expr: mem::take(rhs), - })); - return tree; - } - if !is_invalid_pattern(&leftmost) && args.is_empty() && !is_body_block(rhs) { - // If the LHS has no arguments, and there is a RHS, and the RHS is not a - // body block, this is a variable assignment. - tree.variant = Box::new(Variant::Assignment(Assignment { - pattern: leftmost, - equals: mem::take(opr), - expr: mem::take(rhs), - })); - return tree; - } - } - } - if is_qualified_name(&leftmost) { - // If this is not a variable assignment, and the leftmost leaf of the `App` tree is - // a qualified name, this is a function definition. - tree.variant = Box::new(Variant::Function(Function { - name: leftmost, - args, - returns: return_spec, - equals: mem::take(opr), - body: mem::take(rhs), - })); - return tree; - } - return tree.with_error("Invalid use of assignment operator `=`."); - } - _ => (), - } - tree -} - -/// If this function returns `true`, the input is not valid where a pattern is expected. -fn is_invalid_pattern(tree: &syntax::Tree) -> bool { - use syntax::tree::*; - match &*tree.variant { - Variant::App(App { func: Tree { variant: box Variant::Ident(ident), .. }, arg }) => - !ident.token.is_type || is_invalid_pattern(arg), - Variant::App(App { func, arg }) => is_invalid_pattern(func) || is_invalid_pattern(arg), - Variant::TypeAnnotated(TypeAnnotated { expression, .. }) => is_invalid_pattern(expression), - _ => false, - } -} - fn is_qualified_name(tree: &syntax::Tree) -> bool { use syntax::tree::*; - match &*tree.variant { + match &tree.variant { Variant::Ident(_) => true, - Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }) - if matches!(&*rhs.variant, Variant::Ident(_)) && opr.properties.is_dot() => + Variant::OprApp(box OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }) + if matches!(rhs.variant, Variant::Ident(_)) && opr.code.repr.0 == "." => is_qualified_name(lhs), _ => false, } } +fn expect_qualified_name(tree: syntax::Tree) -> syntax::Tree { + if is_qualified_name(&tree) { + tree + } else { + tree.with_error(SyntaxError::ExpectedQualifiedName) + } +} + +fn empty_tree(location: Code) -> syntax::Tree { + syntax::Tree::ident(token::ident(location.clone(), location, false, 0, false, false, false)) +} + fn expression_to_pattern(mut input: syntax::Tree<'_>) -> syntax::Tree<'_> { use syntax::tree::*; - if let Variant::Wildcard(wildcard) = &mut *input.variant { + if let Variant::Wildcard(wildcard) = &mut input.variant { wildcard.de_bruijn_index = None; return input; } - let mut out = match input.variant { - box Variant::TemplateFunction(TemplateFunction { ast, .. }) => expression_to_pattern(ast), - box Variant::Group(Group { open, body: Some(body), close }) => - Tree::group(open, Some(expression_to_pattern(body)), close), - box Variant::App(App { func, arg }) => - Tree::app(expression_to_pattern(func), expression_to_pattern(arg)), - box Variant::TypeAnnotated(TypeAnnotated { expression, operator, type_ }) => - Tree::type_annotated(expression_to_pattern(expression), operator, type_), - box Variant::AutoscopedIdentifier(_) => - return input.with_error("The autoscope operator (..) cannot be used in a pattern."), - _ => return input, + let mut error = None; + match input.variant { + // === Special-case errors === + Variant::App(box App { func: Tree { variant: Variant::Ident(ref ident), .. }, .. }) + if !ident.token.is_type => + error = Some(SyntaxError::PatternUnexpectedExpression), + + // === Recursions === + Variant::Group(box Group { body: Some(ref mut body), .. }) => + transform_tree(body, expression_to_pattern), + Variant::App(box App { ref mut func, ref mut arg }) => { + transform_tree(func, expression_to_pattern); + transform_tree(arg, expression_to_pattern); + } + Variant::TypeAnnotated(box TypeAnnotated { ref mut expression, .. }) => + transform_tree(expression, expression_to_pattern), + Variant::OprApp(box OprApp { opr: Ok(ref opr), .. }) if opr.code == "." => + if !is_qualified_name(&input) { + error = Some(SyntaxError::PatternUnexpectedDot); + }, + + // === Transformations === + Variant::TemplateFunction(box TemplateFunction { ast, .. }) => { + let mut out = expression_to_pattern(ast); + out.span.left_offset += input.span.left_offset; + return out; + } + + // === Unconditional and fallthrough errors === + Variant::AutoscopedIdentifier(_) => error = Some(SyntaxError::PatternUnexpectedExpression), + Variant::OprApp(_) => error = Some(SyntaxError::PatternUnexpectedExpression), + + // === Unhandled === + _ => {} }; - out.span.left_offset += input.span.left_offset; - out + maybe_with_error(input, error) } -fn collect_arguments(tree: syntax::Tree) -> (syntax::Tree, Vec) { - let mut args = vec![]; - let tree = unroll_arguments(tree, &mut args); - args.reverse(); - (tree, args) +thread_local! { + static DEFAULT_TREE: RefCell>> = default(); } -fn collect_arguments_inclusive(tree: syntax::Tree) -> Vec { - let mut args = vec![]; - let first = unroll_arguments(tree, &mut args); - args.push(parse_argument_definition(first)); - args.reverse(); - args -} - -fn unroll_arguments<'s>( - mut tree: syntax::Tree<'s>, - args: &mut Vec>, -) -> syntax::Tree<'s> { - while let Some(arg) = parse_argument_application(&mut tree) { - args.push(arg); - } - tree -} - -/// Try to parse the expression as an application of a function to an `ArgumentDefinition`. If it -/// matches, replace the expression with its LHS, and return the `ArgumentDefinition` node. -pub fn parse_argument_application<'s>( - expression: &'_ mut syntax::Tree<'s>, -) -> Option> { - use syntax::tree::*; - match &mut expression.variant { - box Variant::App(App { func, arg }) => { - let arg = parse_argument_definition(arg.clone()); - func.span.left_offset += expression.span.left_offset.take_as_prefix(); - *expression = func.clone(); - Some(arg) - } - box Variant::NamedApp(NamedApp { func, open, name, equals, arg, close }) => { - let open = mem::take(open); - let close = mem::take(close); - let equals = equals.clone(); - let pattern = Tree::ident(name.clone()); - let open2 = default(); - let suspension = default(); - let close2 = default(); - let type_ = default(); - let default = Some(ArgumentDefault { equals, expression: arg.clone() }); - func.span.left_offset += expression.span.left_offset.take_as_prefix(); - *expression = func.clone(); - Some(ArgumentDefinition { - open, - open2, - pattern, - suspension, - default, - close2, - type_, - close, - }) - } - _ => None, - } -} - -/// Interpret the expression as an element of an argument definition sequence. -pub fn parse_argument_definition(mut pattern: syntax::Tree) -> syntax::tree::ArgumentDefinition { - use syntax::tree::*; - let mut open1 = default(); - let mut close1 = default(); - if let box Variant::Group(Group { mut open, body: Some(mut body), close }) = pattern.variant { - *(if let Some(open) = open.as_mut() { - &mut open.left_offset - } else { - &mut body.span.left_offset - }) += pattern.span.left_offset; - open1 = open; - close1 = close; - pattern = body; - } - let mut default_ = default(); - if let Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }) = - &*pattern.variant - && opr.properties.is_assignment() - { - let left_offset = pattern.span.left_offset; - default_ = Some(ArgumentDefault { equals: opr.clone(), expression: rhs.clone() }); - pattern = lhs.clone(); - pattern.span.left_offset += left_offset; - } - let mut open2 = default(); - let mut close2 = default(); - if let box Variant::Group(Group { mut open, body: Some(mut body), close }) = pattern.variant { - *(if let Some(open) = open.as_mut() { - &mut open.left_offset - } else { - &mut body.span.left_offset - }) += pattern.span.left_offset; - open2 = open; - close2 = close; - pattern = body; - } - let mut type__ = default(); - if let box Variant::TypeAnnotated(TypeAnnotated { mut expression, operator, type_ }) = - pattern.variant - { - expression.span.left_offset += pattern.span.left_offset; - type__ = Some(ArgumentType { operator, type_ }); - pattern = expression; - } - let mut suspension = default(); - if let box Variant::TemplateFunction(TemplateFunction { mut ast, .. }) = pattern.variant { - ast.span.left_offset += pattern.span.left_offset; - pattern = ast; - } - if let Variant::UnaryOprApp(UnaryOprApp { opr, rhs: Some(rhs) }) = &*pattern.variant - && opr.properties.is_suspension() - { - let mut opr = opr.clone(); - opr.left_offset += pattern.span.left_offset; - suspension = Some(opr); - pattern = rhs.clone(); - } - let pattern = expression_to_pattern(pattern); - let open = open1; - let close = close1; - let type_ = type__; - ArgumentDefinition { open, open2, pattern, suspension, default: default_, close2, type_, close } -} - -/// Return whether the expression is a body block. -fn is_body_block(expression: &syntax::tree::Tree<'_>) -> bool { - matches!(&*expression.variant, syntax::tree::Variant::BodyBlock { .. }) +fn transform_tree(tree: &mut syntax::Tree, f: impl FnOnce(syntax::Tree) -> syntax::Tree) { + let default: syntax::Tree<'static> = + DEFAULT_TREE.with(|default| default.borrow_mut().take()).unwrap_or_default(); + let original = mem::replace(tree, default); + let transformed = f(original); + let default_returned = mem::replace(tree, transformed); + // This lifetime cast is sound because this is the same value as `default` above; its lifetime + // was narrowed by the type system when it was stored in the `tree` reference. + #[allow(unsafe_code)] + let default_returned = + unsafe { mem::transmute::, syntax::Tree<'static>>(default_returned) }; + DEFAULT_TREE.with(|default| *default.borrow_mut() = Some(default_returned)); } diff --git a/lib/rust/parser/src/macros/built_in.rs b/lib/rust/parser/src/macros/built_in.rs index 20bd9d6d2a..2743c752d3 100644 --- a/lib/rust/parser/src/macros/built_in.rs +++ b/lib/rust/parser/src/macros/built_in.rs @@ -3,8 +3,14 @@ use crate::macros::pattern::*; use crate::macros::*; +use crate::empty_tree; +use crate::expect_qualified_name; use crate::source::Code; use crate::syntax::operator; +use crate::syntax::token; +use crate::syntax::tree::SyntaxError; +use crate::syntax::Item; +use crate::syntax::Token; @@ -22,7 +28,6 @@ fn expression() -> resolver::SegmentMap<'static> { let mut macro_map = resolver::SegmentMap::default(); macro_map.register(if_then()); macro_map.register(if_then_else()); - macro_map.register(group()); macro_map.register(lambda()); macro_map.register(case()); macro_map.register(array()); @@ -38,9 +43,6 @@ fn statement() -> resolver::SegmentMap<'static> { let mut macro_map = resolver::SegmentMap::default(); register_import_macros(&mut macro_map); register_export_macros(&mut macro_map); - macro_map.register(type_def()); - macro_map.register(private()); - macro_map.register(foreign()); macro_map } @@ -93,7 +95,7 @@ fn import_body<'s>( body = Some( precedence .resolve(tokens) - .map(expect_qualified) + .map(expect_qualified_name) .unwrap_or_else(|| expected_nonempty(header.code.position_after())), ); &mut from @@ -101,7 +103,7 @@ fn import_body<'s>( "import" => { let expect = match from { Some(_) => expect_ident, - None => expect_qualified, + None => expect_qualified_name, }; body = sequence_tree(precedence, tokens, expect); incomplete_import = body.is_none(); @@ -109,7 +111,7 @@ fn import_body<'s>( } "all" => { debug_assert!(tokens.is_empty()); - all = Some(into_ident(header)); + all = Some(header.with_variant(token::variant::AllKeyword())); incomplete_import = false; continue; } @@ -180,7 +182,7 @@ fn export_body<'s>( body = Some( precedence .resolve(tokens) - .map(expect_qualified) + .map(expect_qualified_name) .unwrap_or_else(|| expected_nonempty(header.code.position_after())), ); &mut from @@ -188,7 +190,7 @@ fn export_body<'s>( "export" => { let expect = match from { Some(_) => expect_ident, - None => expect_qualified, + None => expect_qualified_name, }; body = sequence_tree(precedence, tokens, expect); incomplete_export = body.is_none(); @@ -196,9 +198,9 @@ fn export_body<'s>( } "all" => { debug_assert!(tokens.is_empty()); - all = Some(into_ident(header)); + body = None; incomplete_export = false; - continue; + &mut all } "as" => { body = Some( @@ -220,11 +222,23 @@ fn export_body<'s>( }; *field = Some(syntax::tree::MultiSegmentAppSegment { header, body }); } - let export = syntax::Tree::export(from, export.unwrap(), all, as_, hiding); - if incomplete_export { - return export.with_error("Expected name or `all` keyword following `export` keyword."); - } - export + let export = export.unwrap(); + let error = if all.is_some() { + SyntaxError::ImportsNoAllInExport + } else if hiding.is_some() { + SyntaxError::ImportsNoHidingInExport + } else if incomplete_export { + SyntaxError::ImportsExpectedNameInExport + } else { + return syntax::Tree::export(from, export, as_); + }; + let mut segments = vec![]; + segments.extend(from); + segments.push(export); + segments.extend(all); + segments.extend(as_); + segments.extend(hiding); + return syntax::Tree::multi_segment_app(segments.try_into().unwrap()).with_error(error); } /// If-then-else macro definition. @@ -242,200 +256,7 @@ fn if_body<'s>( segments: NonEmptyVec>, precedence: &mut operator::Precedence<'s>, ) -> syntax::Tree<'s> { - use syntax::tree::*; - let segments = segments.mapped(|s| { - let header = s.header; - let body = s.result.tokens(); - let body = match precedence.resolve(body) { - Some(Tree { - variant: - box Variant::ArgumentBlockApplication(ArgumentBlockApplication { - lhs: None, - arguments, - }), - span, - .. - }) => { - let mut block = block::body_from_lines(arguments); - block.span.left_offset += span.left_offset; - Some(block) - } - e => e, - }; - MultiSegmentAppSegment { header, body } - }); - Tree::multi_segment_app(segments) -} - -/// Group macro definition. -pub fn group<'s>() -> Definition<'s> { - crate::macro_definition! {("(", everything(), ")", nothing()) group_body} -} - -fn group_body<'s>( - segments: NonEmptyVec>, - precedence: &mut operator::Precedence<'s>, -) -> syntax::Tree<'s> { - let (close, mut segments) = segments.pop(); - let close = into_close_symbol(close.header); - let segment = segments.pop().unwrap(); - let open = into_open_symbol(segment.header); - let body = segment.result.tokens(); - let body = precedence.resolve(body); - syntax::Tree::group(Some(open), body, Some(close)) -} - -/// Type definitions. -fn type_def<'s>() -> Definition<'s> { - crate::macro_definition! {("type", everything()) type_def_body} -} - -fn type_def_body<'s>( - matched_segments: NonEmptyVec>, - precedence: &mut operator::Precedence<'s>, -) -> syntax::Tree<'s> { - use syntax::tree::*; - let segment = matched_segments.pop().0; - let header = into_ident(segment.header); - let mut tokens = segment.result.tokens(); - let mut block = vec![]; - if let Some(syntax::Item::Block(lines)) = tokens.last_mut() { - block = mem::take(lines); - tokens.pop(); - } - let mut tokens = tokens.into_iter(); - let name = match tokens.next() { - Some(syntax::Item::Token(syntax::Token { - left_offset, - code, - variant: syntax::token::Variant::Ident(ident), - })) => syntax::Token(left_offset, code, ident), - _ => return Tree::ident(header).with_error("Expected identifier after `type` keyword."), - }; - let params = precedence - .resolve_non_section(tokens) - .map(crate::collect_arguments_inclusive) - .unwrap_or_default(); - for line in &mut block { - if let Some(syntax::Item::Token(syntax::Token { variant, .. })) = line.items.first_mut() - && let syntax::token::Variant::Operator(operator) = variant - && !operator.properties.is_annotation() - { - let opr_ident = - syntax::token::variant::Ident { is_operator_lexically: true, ..default() }; - *variant = syntax::token::Variant::Ident(opr_ident); - } - } - let parse_line = |syntax::item::Line { newline, items }| block::Line { - newline, - expression: precedence.resolve(items), - }; - let body = block::compound_lines(block.into_iter().map(parse_line)) - .map(|line| line.map_expression(to_body_statement)) - .collect(); - Tree::type_def(header, name, params, body) -} - -fn to_body_statement(mut line_expression: syntax::Tree<'_>) -> syntax::Tree<'_> { - use syntax::tree::*; - - // Unwrap `Private` tree from any `Invalid` added in expression context; it will be revalidated - // in the new context. - if let Tree { - variant: - box Variant::Invalid(Invalid { - ast: mut inner @ Tree { variant: box Variant::Private(_), .. }, - .. - }), - span, - .. - } = line_expression - { - inner.span = span; - return to_body_statement(inner); - } - // Recurse into body of `Private` keyword; validate usage of the keyword in type-body context. - if let Tree { variant: box Variant::Private(ref mut private), .. } = &mut line_expression { - let body_statement = private.body.take().map(to_body_statement); - let error = match body_statement.as_ref().map(|tree| &*tree.variant) { - Some(Variant::ConstructorDefinition(_)) => None, - Some(Variant::Function(_)) => None, - None => Some("Expected declaration after `private` keyword in type definition."), - _ => Some("The `private` keyword inside a type definition may only be applied to a constructor definition or a method."), - }; - private.body = body_statement; - return match error { - Some(error) => line_expression.with_error(error), - None => line_expression, - }; - } - if let Tree { variant: box Variant::Documented(Documented { expression, .. }), .. } = - &mut line_expression - { - *expression = expression.take().map(to_body_statement); - return line_expression; - } - if let Tree { variant: box Variant::Annotated(Annotated { expression, .. }), .. } = - &mut line_expression - { - *expression = expression.take().map(to_body_statement); - return line_expression; - } - let mut last_argument_default = default(); - let mut left_offset = line_expression.span.left_offset.position_before(); - let lhs = match &line_expression { - Tree { - variant: box Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }), - span, - .. - } if opr.properties.is_assignment() => { - left_offset = span.left_offset.clone(); - last_argument_default = Some((opr.clone(), rhs.clone())); - lhs - } - Tree { - variant: - box Variant::ArgumentBlockApplication(ArgumentBlockApplication { - lhs: Some(Tree { variant: box Variant::Ident(ident), span: span_, .. }), - arguments, - }), - span, - .. - } => { - let mut constructor = ident.token.clone(); - constructor.left_offset += &span.left_offset; - constructor.left_offset += &span_.left_offset; - let block = arguments - .iter() - .cloned() - .map(|block::Line { newline, expression }| ArgumentDefinitionLine { - newline, - argument: expression.map(crate::parse_argument_definition), - }) - .collect(); - let arguments = default(); - return Tree::constructor_definition(constructor, arguments, block); - } - _ => &line_expression, - }; - let (constructor, mut arguments) = crate::collect_arguments(lhs.clone()); - if let Tree { variant: box Variant::Ident(Ident { token }), span, .. } = constructor - && token.is_type - { - let mut constructor = token; - constructor.left_offset += left_offset; - constructor.left_offset += span.left_offset; - if let Some((equals, expression)) = last_argument_default - && let Some(ArgumentDefinition { open: None, default, close: None, .. }) = - arguments.last_mut() - && default.is_none() - { - *default = Some(ArgumentDefault { equals, expression }); - } - let block = default(); - return Tree::constructor_definition(constructor, arguments, block); - } - crate::expression_to_statement(line_expression) + capture_expressions(segments, precedence) } /// Lambda expression. @@ -452,9 +273,8 @@ fn lambda_body<'s>( ) -> syntax::Tree<'s> { let (segment, _) = segments.pop(); let operator = segment.header; - let syntax::token::Token { left_offset, code, .. } = operator; - let properties = syntax::token::OperatorProperties::default(); - let operator = syntax::token::operator(left_offset, code, properties); + let Token { left_offset, code, .. } = operator; + let operator = token::lambda_operator(left_offset, code); let arrow = segment.result.tokens(); let arrow = precedence.resolve(arrow); syntax::Tree::lambda(operator, arrow) @@ -472,25 +292,25 @@ fn case_body<'s>( use syntax::tree::*; let (of, mut rest) = segments.pop(); let case = rest.pop().unwrap(); - let case_ = into_ident(case.header); + let case_ = case.header.with_variant(token::variant::CaseKeyword()); let expression = case.result.tokens(); let expression = precedence.resolve(expression); - let of_ = into_ident(of.header); + let of_ = of.header.with_variant(token::variant::OfKeyword()); let mut case_builder = CaseBuilder::default(); let mut initial_case = vec![]; let mut block = default(); for item in of.result.tokens() { match item { - syntax::Item::Block(lines) => block = lines, + Item::Block(lines) => block = lines, _ => initial_case.push(item), } } if !initial_case.is_empty() { let location = of_.code.position_after(); - let newline = syntax::token::newline(location.clone(), location); + let newline = token::newline(location.clone(), location); case_builder.push(syntax::item::Line { newline, items: initial_case }); } - block.into_iter().for_each(|line| case_builder.push(line)); + block.into_vec().into_iter().for_each(|line| case_builder.push(line)); let (case_lines, any_invalid) = case_builder.finish(); let tree = Tree::case_of(case_, expression, of_, case_lines); if any_invalid { @@ -504,10 +324,10 @@ struct CaseBuilder<'s> { // Case components documentation: Option>, pattern: Option>, - arrow: Option>, + arrow: Option>, // Within-case state spaces: bool, - tokens: Vec>, + tokens: Vec>, resolver: operator::Precedence<'s>, // Output case_lines: Vec>, @@ -519,22 +339,16 @@ impl<'s> CaseBuilder<'s> { let syntax::item::Line { newline, items } = line; self.case_lines.push(syntax::tree::CaseLine { newline: newline.into(), ..default() }); for token in items { - if self.arrow.is_none() - && let syntax::Item::Token(syntax::Token { - left_offset, - code, - variant: syntax::token::Variant::Operator(op), - }) = &token - && op.properties.is_arrow() - && !left_offset.is_empty() - { - self.resolver.extend(self.tokens.drain(..)); - self.arrow = - Some(syntax::token::operator(left_offset.clone(), code.clone(), op.properties)); - self.pattern = self.resolver.finish().map(crate::expression_to_pattern); - continue; - } - if let syntax::Item::Token(syntax::Token { left_offset, .. }) = &token { + if let Item::Token(token @ Token { left_offset, variant, .. }) = &token { + if self.arrow.is_none() + && let token::Variant::ArrowOperator(arrow_op) = variant + && !left_offset.is_empty() + { + self.resolver.extend(self.tokens.drain(..)); + self.arrow = Some(token.clone().with_variant(*arrow_op)); + self.pattern = self.resolver.finish().map(crate::expression_to_pattern); + continue; + } self.spaces = self.spaces || (!left_offset.is_empty() && !self.tokens.is_empty()); } self.tokens.push(token); @@ -545,18 +359,11 @@ impl<'s> CaseBuilder<'s> { fn finish_line(&mut self) { if self.arrow.is_none() && !self.spaces { for (i, token) in self.tokens.iter().enumerate() { - if let syntax::Item::Token(syntax::Token { - left_offset, - code, - variant: syntax::token::Variant::Operator(op), - }) = &token - && op.properties.is_arrow() + if let Item::Token( + token @ Token { variant: token::Variant::ArrowOperator(arrow_op), .. }, + ) = token { - self.arrow = Some(syntax::token::operator( - left_offset.clone(), - code.clone(), - op.properties, - )); + self.arrow = Some(token.clone().with_variant(*arrow_op)); let including_arrow = self.tokens.drain(..=i); self.resolver.extend(including_arrow.take(i)); self.pattern = self.resolver.finish().map(crate::expression_to_pattern); @@ -572,7 +379,7 @@ impl<'s> CaseBuilder<'s> { Some(syntax::Tree { span, variant: - box syntax::tree::Variant::Documented(syntax::tree::Documented { + syntax::tree::Variant::Documented(box syntax::tree::Documented { mut documentation, expression: None, }), @@ -586,18 +393,6 @@ impl<'s> CaseBuilder<'s> { case.documentation = documentation.into(); return; } - Some(syntax::Tree { - span, - variant: - box syntax::tree::Variant::ArgumentBlockApplication( - syntax::tree::ArgumentBlockApplication { lhs: None, arguments }, - ), - .. - }) => { - let mut block = syntax::tree::block::body_from_lines(arguments); - block.span.left_offset += span.left_offset; - Some(block) - } e => e, }; if pattern.is_none() && arrow.is_none() && expression.is_none() { @@ -647,10 +442,10 @@ fn tuple_body<'s>( } struct GroupedSequence<'s> { - left: syntax::token::OpenSymbol<'s>, + left: token::OpenSymbol<'s>, first: Option>, rest: Vec>, - right: syntax::token::CloseSymbol<'s>, + right: token::CloseSymbol<'s>, } fn grouped_sequence<'s>( @@ -667,23 +462,19 @@ fn grouped_sequence<'s>( fn sequence<'s>( precedence: &mut operator::Precedence<'s>, - tokens: impl IntoIterator>, + tokens: impl IntoIterator>, ) -> (Option>, Vec>) { use syntax::tree::*; let mut first = None; let mut rest: Vec> = default(); for token in tokens { match token { - syntax::Item::Token(syntax::Token { - left_offset, - code, - variant: syntax::token::Variant::Operator(op), - }) if op.properties.is_sequence() => { + Item::Token(Token { left_offset, code, variant: token::Variant::CommaOperator(_) }) => { *(match rest.last_mut() { Some(rest) => &mut rest.body, None => &mut first, }) = precedence.finish(); - let operator = syntax::Token(left_offset, code, op); + let operator = Token(left_offset, code, token::variant::Operator()); rest.push(OperatorDelimitedTree { operator, body: default() }); } _ => { @@ -700,7 +491,7 @@ fn sequence<'s>( fn sequence_tree<'s>( precedence: &mut operator::Precedence<'s>, - tokens: impl IntoIterator>, + tokens: impl IntoIterator>, mut f: impl FnMut(syntax::Tree<'s>) -> syntax::Tree<'s>, ) -> Option> { use syntax::tree::*; @@ -735,14 +526,6 @@ fn splice_body<'s>( syntax::Tree::text_literal(default(), default(), vec![splice], default()) } -fn foreign<'s>() -> Definition<'s> { - crate::macro_definition! {("foreign", everything()) foreign_body} -} - -fn private<'s>() -> Definition<'s> { - crate::macro_definition! {("private", everything()) private_keyword} -} - fn skip<'s>() -> Definition<'s> { crate::macro_definition! {("SKIP", everything()) capture_expressions} } @@ -751,35 +534,6 @@ fn freeze<'s>() -> Definition<'s> { crate::macro_definition! {("FREEZE", everything()) capture_expressions} } -/// private can be either specified as the very first statement in the module, marking the -/// whole module as private. Or it can be prepended to some definitions. For example it can -/// be prepended to atom constructor definition and a method. -fn private_keyword<'s>( - segments: NonEmptyVec>, - precedence: &mut operator::Precedence<'s>, -) -> syntax::Tree<'s> { - use syntax::tree::*; - let segment = segments.pop().0; - let keyword = into_private(segment.header); - let body_opt = precedence.resolve(segment.result.tokens()); - match body_opt { - Some(body) => { - let statement = crate::expression_to_statement(body); - match statement.variant { - box Variant::ConstructorDefinition(_) => Tree::private(keyword, Some(statement)), - box Variant::Function(_) => Tree::private(keyword, Some(statement)), - _ => Tree::private(keyword, Some(statement)) - .with_error("The 'private' keyword cannot be applied to this expression"), - } - } - None => { - // Just a private keyword without a body. This is valid as the first statement in the - // module, to declare the module as private. - Tree::private(keyword, None) - } - } -} - /// Macro body builder that just parses the tokens of each segment as expressions, and places them /// in a [`MultiSegmentApp`]. fn capture_expressions<'s>( @@ -795,131 +549,57 @@ fn capture_expressions<'s>( })) } -fn foreign_body<'s>( - segments: NonEmptyVec>, - precedence: &mut operator::Precedence<'s>, -) -> syntax::Tree<'s> { - let segment = segments.pop().0; - let keyword = into_ident(segment.header); - let tokens = segment.result.tokens().into_iter(); - match try_foreign_body(keyword.clone(), tokens.clone(), precedence) { - Ok(foreign) => foreign, - Err(error) => (match precedence.resolve(tokens) { - Some(rhs) => syntax::Tree::app(keyword.into(), rhs), - None => keyword.into(), - }) - .with_error(error), - } -} - -fn try_foreign_body<'s>( - keyword: syntax::token::Ident<'s>, - tokens: impl IntoIterator>, - precedence: &mut operator::Precedence<'s>, -) -> Result, &'static str> { - let mut tokens = tokens.into_iter(); - let language = tokens - .next() - .and_then(try_into_token) - .and_then(try_token_into_ident) - .ok_or("Expected an identifier specifying foreign method's language.")?; - let expected_name = "Expected an identifier specifying foreign function's name."; - let function = precedence.resolve(tokens).ok_or(expected_name)?; - let expected_function = "Expected a function definition after foreign declaration."; - let box syntax::tree::Variant::OprApp(syntax::tree::OprApp { - lhs: Some(lhs), - opr: Ok(equals), - rhs: Some(body), - }) = function.variant - else { - return Err(expected_function); - }; - if !equals.properties.is_assignment() { - return Err(expected_function); - }; - if !matches!(body.variant, box syntax::tree::Variant::TextLiteral(_)) { - return Err("Expected a text literal as body of `foreign` declaration."); - } - let (name, args) = crate::collect_arguments(lhs); - let mut name = try_tree_into_ident(name).ok_or(expected_name)?; - name.left_offset += function.span.left_offset; - Ok(syntax::Tree::foreign_function(keyword, language, name, args, equals, body)) -} - // === Token conversions === -fn try_into_token(item: syntax::Item) -> Option { +fn try_into_token(item: Item) -> Option { match item { - syntax::Item::Token(token) => Some(token), + Item::Token(token) => Some(token), _ => None, } } -fn try_token_into_ident(token: syntax::Token) -> Option { +fn try_token_into_ident(token: Token) -> Option { match token.variant { - syntax::token::Variant::Ident(ident) => { - let syntax::token::Token { left_offset, code, .. } = token; - Some(syntax::Token(left_offset, code, ident)) + token::Variant::Ident(ident) => { + let Token { left_offset, code, .. } = token; + Some(Token(left_offset, code, ident)) } _ => None, } } -fn try_tree_into_ident(tree: syntax::Tree) -> Option { +fn try_tree_into_ident(tree: syntax::Tree) -> Option { match tree.variant { - box syntax::tree::Variant::Ident(syntax::tree::Ident { token }) => Some(token), + syntax::tree::Variant::Ident(box syntax::tree::Ident { token }) => Some(token), _ => None, } } -fn into_open_symbol(token: syntax::token::Token) -> syntax::token::OpenSymbol { - let syntax::token::Token { left_offset, code, .. } = token; - syntax::token::open_symbol(left_offset, code) +fn into_open_symbol(token: Token) -> token::OpenSymbol { + let Token { left_offset, code, .. } = token; + token::open_symbol(left_offset, code) } -fn into_close_symbol(token: syntax::token::Token) -> syntax::token::CloseSymbol { - let syntax::token::Token { left_offset, code, .. } = token; - syntax::token::close_symbol(left_offset, code) +fn into_close_symbol(token: Token) -> token::CloseSymbol { + let Token { left_offset, code, .. } = token; + token::close_symbol(left_offset, code) } -fn into_ident(token: syntax::token::Token) -> syntax::token::Ident { - let syntax::token::Token { left_offset, code, .. } = token; - syntax::token::ident(left_offset, code, false, 0, false, false, false) -} - -fn into_private(token: syntax::token::Token) -> syntax::token::Private { - let syntax::token::Token { left_offset, code, .. } = token; - syntax::token::private(left_offset, code) +fn into_ident(token: Token) -> token::Ident { + token.with_variant(token::variant::Ident(false, 0, false, false, false)) } // === Validators === fn expect_ident(tree: syntax::Tree) -> syntax::Tree { - if matches!(&*tree.variant, syntax::tree::Variant::Ident(_)) { + if matches!(tree.variant, syntax::tree::Variant::Ident(_)) { tree } else { tree.with_error("Expected identifier.") } } -fn expect_qualified(tree: syntax::Tree) -> syntax::Tree { - if crate::is_qualified_name(&tree) { - tree - } else { - tree.with_error("Expected qualified name.") - } -} - fn expected_nonempty(location: Code) -> syntax::Tree { - let empty = syntax::Tree::ident(syntax::token::ident( - location.clone(), - location, - false, - 0, - false, - false, - false, - )); - empty.with_error("Expected tokens.") + empty_tree(location).with_error("Expected tokens.") } diff --git a/lib/rust/parser/src/macros/pattern.rs b/lib/rust/parser/src/macros/pattern.rs index 350134c2e4..2b8291d7e8 100644 --- a/lib/rust/parser/src/macros/pattern.rs +++ b/lib/rust/parser/src/macros/pattern.rs @@ -185,7 +185,7 @@ pub enum Match<'s> { Identifier(syntax::Item<'s>), Expected(String, Box>), Named(String, Box>), - Block(Vec>), + Block(Box<[syntax::item::Line<'s>]>), NotBlock(syntax::Item<'s>), } diff --git a/lib/rust/parser/src/macros/resolver.rs b/lib/rust/parser/src/macros/resolver.rs index c93cae864a..24fb44316e 100644 --- a/lib/rust/parser/src/macros/resolver.rs +++ b/lib/rust/parser/src/macros/resolver.rs @@ -30,6 +30,12 @@ use crate::source::Code; use crate::syntax; use crate::syntax::token; use crate::syntax::token::Token; +use crate::syntax::BlockHierarchyConsumer; +use crate::syntax::Finish; +use crate::syntax::GroupHierarchyConsumer; +use crate::syntax::Item; +use crate::syntax::NewlineConsumer; +use crate::syntax::TokenConsumer; use enso_data_structures::im_list::List; use std::collections::HashMap; @@ -122,16 +128,17 @@ impl<'a> SegmentMap<'a> { /// Macro resolver capable of resolving nested macro usages. See the docs of the main parser module /// to learn more about the macro resolution steps. #[derive(Debug)] -pub struct Resolver<'s> { +struct ResolverState<'s> { blocks: Vec, /// The lines of all currently-open blocks. This is partitioned by `blocks`. lines: Vec>, + groups: Vec>, /// All currently-open macros. These are partitioned into scopes by `blocks`. macros: Vec>, /// Segments of all currently-open macros. These are partitioned by `macros`. segments: Vec>, /// Items of all segments of all currently-open macros. These are partitioned by `segments`. - items: Vec>, + items: Vec>, context: Context, precedence: syntax::operator::Precedence<'s>, } @@ -139,46 +146,101 @@ pub struct Resolver<'s> { // === Public API === -impl<'s> Resolver<'s> { +impl<'s> ResolverState<'s> { /// Create a new resolver, in statement context. - pub fn new_statement() -> Self { + fn new_statement() -> Self { Self { context: Context::Statement, precedence: syntax::operator::Precedence::new(), blocks: default(), - lines: default(), + lines: vec![initial_line()], + groups: default(), macros: default(), segments: default(), items: default(), } } +} - /// Run the resolver. Returns the resolved AST. - pub fn run( - &mut self, - root_macro_map: &MacroMap, - tokens: impl IntoIterator>, - ) -> syntax::Tree<'s> { - let start = crate::source::code::Location::default(); - self.lines.push(syntax::item::Line { - newline: token::newline(Code::empty(start), Code::empty(start)), - items: default(), - }); - tokens.into_iter().for_each(|t| self.push(root_macro_map, t)); +fn initial_line<'s>() -> syntax::item::Line<'s> { + syntax::item::Line { + newline: token::newline(Code::empty(default()), Code::empty(default())), + items: default(), + } +} + +impl<'s> Finish for ResolverState<'s> { + type Result = syntax::Tree<'s>; + + fn finish(&mut self) -> Self::Result { self.finish_current_line(); - let lines = self.lines.drain(..).map(|syntax::item::Line { newline, items }| { - syntax::tree::block::Line { newline, expression: self.precedence.resolve(items) } - }); - let tree = syntax::tree::block::body_from_lines(lines); + let tree = syntax::tree::block::parse_module(self.lines.drain(..), &mut self.precedence); debug_assert!(self.blocks.is_empty()); debug_assert!(self.lines.is_empty()); + debug_assert!(self.groups.is_empty()); debug_assert!(self.macros.is_empty()); debug_assert!(self.segments.is_empty()); debug_assert!(self.items.is_empty()); + self.context = Context::Statement; + self.lines.push(initial_line()); tree } } +/// Resolves macros. +#[derive(Debug)] +pub struct Resolver<'s, 'macros> { + resolver: ResolverState<'s>, + root_macro_map: &'macros MacroMap, +} + +impl<'s, 'macros> Resolver<'s, 'macros> { + /// Creates a macro resolver to use with the given macro map. + pub fn new(root_macro_map: &'macros MacroMap) -> Self { + Self { resolver: ResolverState::new_statement(), root_macro_map } + } +} + +impl<'s, 'macros> TokenConsumer<'s> for Resolver<'s, 'macros> { + fn push_token(&mut self, token: Token<'s>) { + self.resolver.push(self.root_macro_map, token); + } +} + +impl<'s, 'macros> NewlineConsumer<'s> for Resolver<'s, 'macros> { + fn push_newline(&mut self, newline: token::Newline<'s>) { + self.resolver.push_newline(newline); + } +} + +impl<'s, 'macros> BlockHierarchyConsumer for Resolver<'s, 'macros> { + fn start_block(&mut self) { + self.resolver.start_block() + } + + fn end_block(&mut self) { + self.resolver.end_block() + } +} + +impl<'s, 'macros> GroupHierarchyConsumer<'s> for Resolver<'s, 'macros> { + fn start_group(&mut self, open: token::OpenSymbol<'s>) { + self.resolver.start_group(open); + } + + fn end_group(&mut self, close: token::CloseSymbol<'s>) { + self.resolver.close_group(close); + } +} + +impl<'s, 'macros> Finish for Resolver<'s, 'macros> { + type Result = syntax::Tree<'s>; + + fn finish(&mut self) -> Self::Result { + self.resolver.finish() + } +} + // === Implementation === @@ -186,8 +248,8 @@ impl<'s> Resolver<'s> { #[derive(Clone, Debug)] enum Step<'s> { StartSegment(Token<'s>), - NormalToken(syntax::Item<'s>), - MacroStackPop(syntax::Item<'s>), + NormalToken(Item<'s>), + MacroStackPop(Item<'s>), } /// Information about macro resolution state that is stored while processing a deeper indentation @@ -204,16 +266,33 @@ struct Block { items: usize, } -impl<'s> Resolver<'s> { +#[derive(Debug)] +struct OpenGroup<'s> { + open: token::OpenSymbol<'s>, + /// Index in `macro_stack` after the last element in the enclosing scope. + macros_start: usize, + /// Index in `items` after the last element in the enclosing scope. + items: usize, +} + +impl<'s> ResolverState<'s> { /// Returns the index of the first element in `self.macro_stack` that is active in the current /// scope. Any macros before that index are active in some block that contains the current /// block, so they will not match tokens within this block. fn macro_scope_start(&self) -> usize { - self.blocks.last().map(|scope| scope.macros_start).unwrap_or_default() + self.groups + .last() + .map(|scope| scope.macros_start) + .or_else(|| self.blocks.last().map(|scope| scope.macros_start)) + .unwrap_or_default() } fn items_start(&self) -> usize { - self.blocks.last().map(|scope| scope.items).unwrap_or_default() + self.groups + .last() + .map(|scope| scope.items) + .or_else(|| self.blocks.last().map(|scope| scope.items)) + .unwrap_or_default() } /// Pop the macro stack if the current token is reserved. For example, when matching the @@ -225,61 +304,86 @@ impl<'s> Resolver<'s> { reserved.and_option_from(|| self.macros.pop()) } + fn start_block(&mut self) { + while let Some(group) = self.groups.pop() { + self.end_group(group, None); + } + let macros_start = self.macros.len(); + let outputs_start = self.lines.len(); + let items = self.items.len(); + self.blocks.push(Block { macros_start, outputs_start, items }); + self.context = Context::Statement; + } + + fn end_block(&mut self) { + self.finish_current_line(); + if let Some(Block { macros_start, outputs_start, items }) = self.blocks.pop() { + debug_assert_eq!(macros_start, self.macros.len()); + debug_assert_eq!(items, self.items.len()); + let block = self.lines.drain(outputs_start..).collect(); + self.items.push(Item::Block(block)); + } + } + + fn start_group(&mut self, open: token::OpenSymbol<'s>) { + let macros_start = self.macros.len(); + let items = self.items.len(); + self.groups.push(OpenGroup { open, macros_start, items }); + self.context = Context::Expression; + } + + fn close_group(&mut self, close: token::CloseSymbol<'s>) { + match self.groups.pop() { + Some(group) => self.end_group(group, close.into()), + None => self.items.push(Item::Token(close.into())), + } + } + + fn end_group(&mut self, group: OpenGroup<'s>, close: Option>) { + let OpenGroup { open, macros_start, items } = group; + while self.macros.len() > macros_start { + let mac = self.macros.pop().unwrap(); + self.resolve(mac); + } + let body = self.items.drain(items..).collect(); + self.items.push(syntax::item::Group { open, body, close }.into()); + } + + fn push_newline(&mut self, newline: token::Newline<'s>) { + self.finish_current_line(); + self.lines.push(syntax::item::Line { newline, items: default() }); + self.context = Context::Statement; + } + /// Append a token to the state. - fn push(&mut self, root_macro_map: &MacroMap, token: Token<'s>) { - match token.variant { - token::Variant::Newline(newline) => { - if !self.lines.is_empty() { - self.finish_current_line(); + fn push(&mut self, root_macro_map: &MacroMap, mut token: Token<'s>) { + debug_assert!(!matches!(token.variant, token::Variant::Newline(_))); + loop { + token = match self.process_token(root_macro_map, token, self.context) { + Step::MacroStackPop(Item::Token(t)) => t, + Step::MacroStackPop(item) => { + self.items.push(item); + break; } - let newline = token.with_variant(newline); - self.lines.push(syntax::item::Line { newline, items: default() }); - self.context = Context::Statement; - } - token::Variant::BlockStart(_) => { - let macros_start = self.macros.len(); - let outputs_start = self.lines.len(); - let items = self.items.len(); - let scope = Block { macros_start, outputs_start, items }; - self.blocks.push(scope); - self.context = Context::Statement; - } - token::Variant::BlockEnd(_) => { - self.finish_current_line(); - if let Some(Block { macros_start, outputs_start, items }) = self.blocks.pop() { - debug_assert_eq!(macros_start, self.macros.len()); - debug_assert_eq!(items, self.items.len()); - let block = self.lines.drain(outputs_start..).collect(); - self.items.push(syntax::Item::Block(block)); + Step::StartSegment(header) => { + let items_start = self.items.len(); + self.segments.push(MatchedSegment { header, items_start }); + self.context = Context::Expression; + break; } - } - _ => { - let mut token = token; - loop { - token = match self.process_token(root_macro_map, token, self.context) { - Step::MacroStackPop(syntax::Item::Token(t)) => t, - Step::MacroStackPop(item) => { - self.items.push(item); - break; - } - Step::StartSegment(header) => { - let items_start = self.items.len(); - self.segments.push(MatchedSegment { header, items_start }); - self.context = Context::Expression; - break; - } - Step::NormalToken(item) => { - self.items.push(item); - self.context = Context::Expression; - break; - } - } + Step::NormalToken(item) => { + self.items.push(item); + self.context = Context::Expression; + break; } } } } fn finish_current_line(&mut self) { + while let Some(group) = self.groups.pop() { + self.end_group(group, None); + } let macros_start = self.macro_scope_start(); let items_start = self.items_start(); while self.macros.len() > macros_start { @@ -365,8 +469,7 @@ impl<'s> Resolver<'s> { }); let out = if all_tokens_consumed { let unwrap_match = |(header, match_result)| { - let match_result: Result> = - match_result; + let match_result: Result> = match_result; pattern::MatchedSegment::new(header, match_result.unwrap().matched) }; let parser = &mut self.precedence; diff --git a/lib/rust/parser/src/syntax.rs b/lib/rust/parser/src/syntax.rs index 5d66b7136b..99bb6acbda 100644 --- a/lib/rust/parser/src/syntax.rs +++ b/lib/rust/parser/src/syntax.rs @@ -8,14 +8,19 @@ pub mod item; pub mod operator; +pub mod statement; pub mod token; pub mod tree; +mod consumer; mod treebuilding; +pub use consumer::*; pub use item::Item; pub use token::Token; +pub use tree::maybe_with_error; pub use tree::Tree; pub use tree::WARNINGS; +pub use treebuilding::TokenOrTree; diff --git a/lib/rust/parser/src/syntax/consumer.rs b/lib/rust/parser/src/syntax/consumer.rs new file mode 100644 index 0000000000..013fcb7a98 --- /dev/null +++ b/lib/rust/parser/src/syntax/consumer.rs @@ -0,0 +1,234 @@ +use crate::prelude::*; + +use crate::syntax::token; +use crate::syntax::treebuilding::Spacing; +use crate::syntax::treebuilding::SpacingLookaheadTokenConsumer; +use crate::syntax::treebuilding::SpacingLookaheadTreeConsumer; +use crate::syntax::Item; +use crate::syntax::Token; +use crate::syntax::Tree; + +/// Item consumer. +pub trait ItemConsumer<'s> { + /// Push an item. + fn push_item(&mut self, tree: Item<'s>); +} + +/// Tree consumer. +pub trait TreeConsumer<'s> { + /// Push a tree. + fn push_tree(&mut self, tree: Tree<'s>); +} + +/// Token consumer. +pub trait TokenConsumer<'s> { + /// Push a token. + fn push_token(&mut self, token: Token<'s>); +} + +/// Newline consumer. +pub trait NewlineConsumer<'s> { + /// Push a newline. + fn push_newline(&mut self, newline: token::Newline<'s>); +} + + +/// Block hierarchy consumer. +pub trait BlockHierarchyConsumer { + /// Start a block. + fn start_block(&mut self); + /// End the block. + fn end_block(&mut self); +} + +/// Parenthesized-group hierarchy consumer. +pub trait GroupHierarchyConsumer<'s> { + /// Start a parenthesized group. + fn start_group(&mut self, open: token::OpenSymbol<'s>); + /// End the parenthesized group. + fn end_group(&mut self, close: token::CloseSymbol<'s>); +} + +/// Trait for a token consumer to enter a scope that will be handled independently. +pub trait ScopeHierarchyConsumer { + /// The result of the scope ending. + type Result; + /// Start a scope. + fn start_scope(&mut self); + /// End the scope. + fn end_scope(&mut self) -> Self::Result; +} + +/// An operation that can be finished. +pub trait Finish { + /// The output. + type Result; + + /// Indicates end of input. + fn finish(&mut self) -> Self::Result; +} + +/// Trait for a type that wraps another type, and exposes it. +pub trait HasInner { + /// The inner type. + type Inner; + + /// Access the inner type. + fn inner_mut(&mut self) -> &mut Self::Inner; +} + +/// Process all retained state. +pub trait Flush { + /// Process all retained state. + fn flush(&mut self); +} + + +// ================ +// === Adapters === +// ================ + +/// Adapts a parser that consumes only tokens to fit into a more complex pipeline stages. +#[derive(Debug, Default)] +pub struct TokenOnlyParser { + parser: Parser, +} + +impl<'s, Inner, Parser> SpacingLookaheadTokenConsumer<'s> for TokenOnlyParser +where + Parser: HasInner + SpacingLookaheadTokenConsumer<'s>, + Inner: SpacingLookaheadTokenConsumer<'s>, +{ + fn push_token(&mut self, token: Token<'s>, following_spacing: Option) { + self.parser.push_token(token, following_spacing); + } +} + +impl<'s, Parser, Inner> SpacingLookaheadTreeConsumer<'s> for TokenOnlyParser +where + Parser: HasInner + Flush, + Inner: SpacingLookaheadTreeConsumer<'s>, +{ + fn push_tree(&mut self, tree: Tree<'s>, following_spacing: Option) { + self.parser.flush(); + self.parser.inner_mut().push_tree(tree, following_spacing) + } +} + +impl<'s, Parser, Inner> GroupHierarchyConsumer<'s> for TokenOnlyParser +where + Parser: HasInner + Flush, + Inner: GroupHierarchyConsumer<'s>, +{ + fn start_group(&mut self, open: token::OpenSymbol<'s>) { + self.parser.flush(); + self.parser.inner_mut().start_group(open) + } + + fn end_group(&mut self, close: token::CloseSymbol<'s>) { + self.parser.flush(); + self.parser.inner_mut().end_group(close) + } +} + +impl Finish for TokenOnlyParser +where Inner: Finish +{ + type Result = Inner::Result; + + fn finish(&mut self) -> Self::Result { + self.parser.finish() + } +} + + +// ================= +// === Debugging === +// ================= + +/// Debugging tool. Can be inserted into parsing pipeline at different stages to debug components. +#[derive(Debug, Default)] +pub struct Inspect(pub(crate) Inner); + +impl Inspect { + pub(crate) fn observe(&self, event: &impl Debug) { + eprintln!("-> {:?}", event); + } + + pub(crate) fn observe_received(&self, event: &impl Debug) { + eprintln!("<- {:?}", event); + } + + pub(crate) fn observe_token<'a: 'b, 'b, T: Into>>(&self, token: T) { + eprintln!("-> Token({})", token.into().code.repr.0); + } +} + +impl> ScopeHierarchyConsumer + for Inspect +{ + type Result = Inner::Result; + + fn start_scope(&mut self) { + self.observe(&"StartScope"); + self.0.start_scope(); + } + + fn end_scope(&mut self) -> Self::Result { + self.observe(&"EndScope"); + let result = self.0.end_scope(); + self.observe_received(&result); + result + } +} + +impl<'s, Inner: GroupHierarchyConsumer<'s>> GroupHierarchyConsumer<'s> for Inspect { + fn start_group(&mut self, open: token::OpenSymbol<'s>) { + self.observe_token(&open); + self.0.start_group(open); + } + + fn end_group(&mut self, close: token::CloseSymbol<'s>) { + self.observe_token(&close); + self.0.end_group(close); + } +} + +impl<'s, Inner: SpacingLookaheadTokenConsumer<'s>> SpacingLookaheadTokenConsumer<'s> + for Inspect +{ + fn push_token(&mut self, token: Token<'s>, following_spacing: Option) { + self.observe_token(&token); + self.0.push_token(token, following_spacing); + } +} + +impl<'s, Inner: SpacingLookaheadTreeConsumer<'s>> SpacingLookaheadTreeConsumer<'s> + for Inspect +{ + fn push_tree(&mut self, tree: Tree<'s>, following_spacing: Option) { + self.observe(&tree); + self.0.push_tree(tree, following_spacing); + } +} + +impl<'s, Inner: ItemConsumer<'s>> ItemConsumer<'s> for Inspect { + fn push_item(&mut self, item: Item<'s>) { + match &item { + Item::Token(token) => self.observe_token(token), + _ => self.observe(&item), + } + self.0.push_item(item); + } +} + +impl> Finish for Inspect { + type Result = Inner::Result; + + fn finish(&mut self) -> Self::Result { + self.observe(&"Finish"); + let result = self.0.finish(); + self.observe_received(&result); + result + } +} diff --git a/lib/rust/parser/src/syntax/item.rs b/lib/rust/parser/src/syntax/item.rs index c1bf531a43..c89867956f 100644 --- a/lib/rust/parser/src/syntax/item.rs +++ b/lib/rust/parser/src/syntax/item.rs @@ -18,8 +18,20 @@ use crate::syntax::*; #[allow(missing_docs)] pub enum Item<'s> { Token(Token<'s>), - Block(Vec>), + Block(Box<[Line<'s>]>), Tree(Tree<'s>), + Group(Group<'s>), +} + +/// A parenthesized subtree. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Group<'s> { + /// The opening parenthesis. + pub open: token::OpenSymbol<'s>, + /// The parenthesized subtree. + pub body: Box<[Item<'s>]>, + /// The closing parenthesis. + pub close: Option>, } /// A line. @@ -46,6 +58,7 @@ impl<'s> Item<'s> { match self { Self::Token(t) => t.span().left_offset.visible, Self::Tree(t) => t.span.left_offset.visible, + Self::Group(t) => t.open.left_offset.visible, Self::Block(_) => default(), } } @@ -73,18 +86,16 @@ impl<'s> From> for Item<'s> { } } - -/// Given a sequence of [`Line`]s belonging to one block, create an AST block node, of a type -/// determined by the syntax of the lines in the block. -pub fn build_block<'s>( - lines: impl IntoIterator>, - parser: &mut operator::Precedence<'s>, -) -> Tree<'s> { - let mut block_builder = tree::block::Builder::new(); - for Line { newline, items } in lines { - block_builder.push(newline, items, parser); +impl<'s> From> for Item<'s> { + fn from(group: Group<'s>) -> Self { + Item::Group(group) + } +} + +impl<'s> AsRef> for Item<'s> { + fn as_ref(&self) -> &Self { + self } - block_builder.build() } diff --git a/lib/rust/parser/src/syntax/operator.rs b/lib/rust/parser/src/syntax/operator.rs index 9949b1bc00..2e85629897 100644 --- a/lib/rust/parser/src/syntax/operator.rs +++ b/lib/rust/parser/src/syntax/operator.rs @@ -2,15 +2,16 @@ +mod annotations; mod application; mod apply; mod arity; -mod operand; +mod group; +mod named_app; mod precedence_resolver; mod reducer; +mod section; mod types; - - // =============== // === Exports === // =============== diff --git a/lib/rust/parser/src/syntax/operator/annotations.rs b/lib/rust/parser/src/syntax/operator/annotations.rs new file mode 100644 index 0000000000..4e1af123c3 --- /dev/null +++ b/lib/rust/parser/src/syntax/operator/annotations.rs @@ -0,0 +1,134 @@ +use crate::lexer::test::Precedence; +use crate::prelude::*; + +use crate::syntax::operator::reducer::ApplyToOperand; +use crate::syntax::operator::section::MaybeSection; +use crate::syntax::operator::types::Arity; +use crate::syntax::operator::types::ModifiedPrecedence; +use crate::syntax::operator::types::Operator; +use crate::syntax::operator::types::OperatorConsumer; +use crate::syntax::token::AnnotationOperator; +use crate::syntax::token::Associativity; +use crate::syntax::token::Ident; +use crate::syntax::token::Variant; +use crate::syntax::tree; +use crate::syntax::tree::SyntaxError; +use crate::syntax::treebuilding::Spacing; +use crate::syntax::treebuilding::SpacingLookaheadTokenConsumer; +use crate::syntax::treebuilding::SpacingLookaheadTreeConsumer; +use crate::syntax::Finish; +use crate::syntax::Flush; +use crate::syntax::HasInner; +use crate::syntax::Token; +use crate::syntax::TokenOnlyParser; +use crate::syntax::Tree; + + +// =================== +// === Annotations === +// =================== + +pub type ParseAnnotations<'s, Inner> = TokenOnlyParser>; + +#[derive(Debug, Default)] +pub struct AnnotationParser<'s, Inner> { + operator: Option>, + inner: Inner, +} + +#[derive(Debug)] +pub struct Annotation<'s> { + operator: AnnotationOperator<'s>, + ident: Ident<'s>, +} + +impl<'s> Annotation<'s> { + fn apply(self, operand: Option>>) -> Tree<'s> { + let Self { operator, ident } = self; + let operand = operand.map(Tree::from); + if ident.is_type { + Tree::annotated_builtin(operator, ident, default(), operand) + } else { + Tree::annotated(operator, ident, operand, default(), default()) + } + } +} + +impl<'s> ApplyToOperand<'s> for Annotation<'s> { + fn apply_to_operand(self, operand: Option>>) -> MaybeSection> { + self.apply(operand).into() + } +} + +impl<'s> Annotation<'s> { + pub fn spacing(&self) -> Spacing { + Spacing::of_token(&self.operator) + } +} + +impl<'s, Inner> SpacingLookaheadTokenConsumer<'s> for AnnotationParser<'s, Inner> +where Inner: + SpacingLookaheadTokenConsumer<'s> + SpacingLookaheadTreeConsumer<'s> + OperatorConsumer<'s> +{ + fn push_token(&mut self, token: Token<'s>, following_spacing: Option) { + match (self.operator.as_mut(), token.variant) { + (None, Variant::AnnotationOperator(variant)) + if following_spacing == Some(Spacing::Unspaced) => + self.operator = token.with_variant(variant).into(), + (Some(_), Variant::Ident(variant)) => { + let operator = self.operator.take().unwrap(); + let ident = token.with_variant(variant); + let annotation = Annotation { operator, ident }; + if following_spacing.is_some() { + self.inner.push_operator(Operator { + left_precedence: None, + right_precedence: ModifiedPrecedence::new( + following_spacing.unwrap_or_default(), + Precedence::annotation(), + false, + ), + associativity: Associativity::Left, + arity: Arity::Annotation(annotation), + }); + } else { + self.inner.push_tree(annotation.apply(None), following_spacing); + } + } + _ => { + self.flush(); + self.inner.push_token(token, following_spacing); + } + } + } +} + +impl<'s, Inner> Flush for AnnotationParser<'s, Inner> +where Inner: SpacingLookaheadTreeConsumer<'s> +{ + fn flush(&mut self) { + if let Some(operator) = self.operator.take() { + let tree = tree::to_ast(operator.into()) + .with_error(SyntaxError::AnnotationOpMustBeAppliedToIdent); + self.inner.push_tree(tree, Some(Spacing::Unspaced)); + } + } +} + +impl<'s, Inner> Finish for AnnotationParser<'s, Inner> +where Inner: Finish + SpacingLookaheadTreeConsumer<'s> +{ + type Result = Inner::Result; + + fn finish(&mut self) -> Self::Result { + self.flush(); + self.inner.finish() + } +} + +impl<'s, Inner> HasInner for AnnotationParser<'s, Inner> { + type Inner = Inner; + + fn inner_mut(&mut self) -> &mut Self::Inner { + &mut self.inner + } +} diff --git a/lib/rust/parser/src/syntax/operator/application.rs b/lib/rust/parser/src/syntax/operator/application.rs index fcbcd3236f..e876196b0e 100644 --- a/lib/rust/parser/src/syntax/operator/application.rs +++ b/lib/rust/parser/src/syntax/operator/application.rs @@ -1,10 +1,12 @@ use crate::syntax::operator::types::*; use enso_prelude::*; -use crate::syntax::operator::operand::Operand; +use crate::syntax::operator::named_app::NamedApp; use crate::syntax::token; -use crate::syntax::treebuilding::Finish; use crate::syntax::treebuilding::Spacing; +use crate::syntax::Finish; +use crate::syntax::GroupHierarchyConsumer; +use crate::syntax::ScopeHierarchyConsumer; use crate::syntax::Tree; @@ -17,17 +19,61 @@ use crate::syntax::Tree; #[derive(Default, Debug)] pub struct InsertApps { prev_applicable: bool, + stack: Vec, inner: Inner, } -impl<'s, Inner: OperatorConsumer<'s> + OperandConsumer<'s>> OperandConsumer<'s> - for InsertApps +impl<'s, Inner> NamedOperandConsumer<'s> for InsertApps +where Inner: OperatorConsumer<'s> + OperandConsumer<'s> { - fn push_operand(&mut self, operand: Operand>) { - if mem::replace(&mut self.prev_applicable, true) { - self.inner.push_operator(application(Spacing::of_tree(&operand.value))); + fn push_maybe_named_operand(&mut self, operand: OperandMaybeNamed<'s>) { + match operand { + OperandMaybeNamed::Unnamed(operand) => { + if mem::replace(&mut self.prev_applicable, true) { + self.inner.push_operator(application(Spacing::of_tree(&operand.value))); + } + self.inner.push_operand(operand) + } + OperandMaybeNamed::Named { parens, name, equals, expression } => { + if mem::replace(&mut self.prev_applicable, true) { + let spacing = if let Some((open, _)) = &parens { + Spacing::of_token(open) + } else { + Spacing::of_token(&name) + }; + let precedence = + ModifiedPrecedence::new(spacing, token::Precedence::application(), false); + let right_precedence = ModifiedPrecedence::new( + // Named applications always have unspaced right-precedence; if it reads + // from left to right as a named application, a following operator can't + // cause the interpretation to change. + Spacing::Unspaced, + token::Precedence::application(), + false, + ); + let operator = Operator { + left_precedence: Some(precedence), + right_precedence, + associativity: token::Associativity::Left, + arity: Arity::NamedApp( + NamedApp { parens, name, equals, expression }.into(), + ), + }; + self.inner.push_operator(operator); + } else { + let mut tree = Tree::opr_app( + Tree::ident(name).into(), + Ok(equals.with_variant(token::variant::Operator())), + expression.into(), + ); + if let Some((open, close)) = parens { + tree = Tree::group(Some(open), tree.into(), close); + } + // After removing support for old lambdas, we can make this an error. + self.inner.push_operand(tree.into()) + } + } } - self.inner.push_operand(operand) } } @@ -59,19 +105,45 @@ impl Finish for InsertApps { } fn application<'s>(spacing: Spacing) -> Operator<'s> { - let precedence = ModifiedPrecedence { - spacing, - precedence: token::Precedence::application(), - is_value_operation: false, - }; + let precedence = ModifiedPrecedence::new(spacing, token::Precedence::application(), false); Operator { left_precedence: Some(precedence), right_precedence: precedence, associativity: token::Associativity::Left, - arity: Arity::Binary { - tokens: default(), - missing: None, - reify_rhs_section: true, - }, + arity: Arity::App, + } +} + +impl<'s, Inner> GroupHierarchyConsumer<'s> for InsertApps +where Inner: OperatorConsumer<'s> + GroupHierarchyConsumer<'s> +{ + fn start_group(&mut self, open: token::OpenSymbol<'s>) { + if mem::replace(&mut self.prev_applicable, false) { + self.inner.push_operator(application(Spacing::of_token(&open))); + } + self.inner.start_group(open); + } + + fn end_group(&mut self, close: token::CloseSymbol<'s>) { + self.prev_applicable = true; + self.inner.end_group(close); + } +} + +impl<'s, Inner> ScopeHierarchyConsumer for InsertApps +where Inner: OperandConsumer<'s> + OperatorConsumer<'s> + ScopeHierarchyConsumer +{ + type Result = Inner::Result; + + fn start_scope(&mut self) { + let state = mem::replace(&mut self.prev_applicable, false); + self.stack.push(state); + self.inner.start_scope(); + } + + fn end_scope(&mut self) -> Self::Result { + let state = self.stack.pop().unwrap(); + self.prev_applicable = state; + self.inner.end_scope() } } diff --git a/lib/rust/parser/src/syntax/operator/apply.rs b/lib/rust/parser/src/syntax/operator/apply.rs index cc95d31485..16dd723661 100644 --- a/lib/rust/parser/src/syntax/operator/apply.rs +++ b/lib/rust/parser/src/syntax/operator/apply.rs @@ -2,8 +2,11 @@ use crate::prelude::*; use crate::syntax::operator::types::*; use crate::syntax; -use crate::syntax::operator::operand::Operand; +use crate::syntax::maybe_with_error; +use crate::syntax::operator::section::MaybeSection; use crate::syntax::token; +use crate::syntax::token::TokenOperatorProperties; +use crate::syntax::Token; use crate::syntax::Tree; @@ -17,15 +20,15 @@ use crate::syntax::Tree; #[derive(Debug)] pub struct ApplyOperator<'s> { - tokens: Vec>, - lhs: Option>>, - rhs: Option>>, + tokens: Vec>, + lhs: Option>>, + rhs: Option>>, reify_rhs_section: bool, warnings: Option, } impl<'s> ApplyOperator<'s> { - pub fn tokens(tokens: Vec>) -> Self { + pub fn tokens(tokens: Vec>) -> Self { Self { tokens, lhs: default(), @@ -35,15 +38,15 @@ impl<'s> ApplyOperator<'s> { } } - pub fn token(token: token::Operator<'s>) -> Self { + pub fn token(token: Token<'s>) -> Self { Self::tokens(vec![token]) } - pub fn with_lhs(self, lhs: Option>>) -> Self { + pub fn with_lhs(self, lhs: Option>>) -> Self { Self { lhs, ..self } } - pub fn with_rhs(self, rhs: Option>>, reify_rhs_section: bool) -> Self { + pub fn with_rhs(self, rhs: Option>>, reify_rhs_section: bool) -> Self { Self { rhs, reify_rhs_section, ..self } } @@ -51,10 +54,11 @@ impl<'s> ApplyOperator<'s> { Self { warnings: Some(warnings), ..self } } - pub fn finish(self) -> Operand> { + pub fn finish(self) -> MaybeSection> { let Self { tokens, lhs, rhs: rhs_, reify_rhs_section, warnings } = self; - let mut operand = if let Some(lhs_termination) = - tokens.first().and_then(|token| token.properties.lhs_section_termination()) + let mut operand = if let Some(lhs_termination) = tokens + .first() + .and_then(|token| token.operator_properties().unwrap().lhs_section_termination()) { let lhs = match lhs_termination { SectionTermination::Reify => lhs.map(Tree::from), @@ -62,10 +66,10 @@ impl<'s> ApplyOperator<'s> { }; let rhs = rhs_.map(Tree::from); let ast = syntax::tree::apply_operator(lhs, tokens, rhs); - Operand::from(ast) + MaybeSection::from(ast) } else if tokens.len() < 2 && let Some(opr) = tokens.first() - && opr.properties.can_form_section() + && !opr.is_syntactic_binary_operator() { let mut rhs = None; let mut elided = 0; @@ -81,18 +85,18 @@ impl<'s> ApplyOperator<'s> { } elided += lhs.is_none() as u32 + rhs.is_none() as u32; let mut operand = - Operand::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs)); + MaybeSection::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs)); operand.elided += elided; operand.wildcards += wildcards; operand } else { let rhs = rhs_.map(Tree::from); let mut elided = 0; - if tokens.len() != 1 || tokens[0].properties.can_form_section() { + if tokens.len() != 1 || !tokens[0].is_syntactic_binary_operator() { elided += lhs.is_none() as u32 + rhs.is_none() as u32; } let mut operand = - Operand::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs)); + MaybeSection::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs)); operand.elided += elided; operand }; @@ -108,18 +112,18 @@ impl<'s> ApplyOperator<'s> { #[derive(Debug)] pub struct ApplyUnaryOperator<'s> { - token: token::Operator<'s>, - rhs: Option>>, + token: token::UnaryOperator<'s>, + rhs: Option>>, error: Option>, warnings: Option, } impl<'s> ApplyUnaryOperator<'s> { - pub fn token(token: token::Operator<'s>) -> Self { + pub fn token(token: token::UnaryOperator<'s>) -> Self { Self { token, rhs: default(), error: default(), warnings: default() } } - pub fn with_rhs(self, rhs: Option>>) -> Self { + pub fn with_rhs(self, rhs: Option>>) -> Self { Self { rhs, ..self } } @@ -131,17 +135,19 @@ impl<'s> ApplyUnaryOperator<'s> { Self { warnings: Some(warnings), ..self } } - pub fn finish(self) -> Operand> { + pub fn finish(self) -> MaybeSection> { let Self { token, rhs, error, warnings } = self; - Operand::new(rhs).map(|rhs| { - let mut tree = syntax::tree::apply_unary_operator(token, rhs); + MaybeSection::new(rhs).map(|rhs| { + let mut tree = match rhs { + Some(rhs) => Tree::unary_opr_app(token, Some(rhs)), + None => + Tree::opr_app(None, Ok(token.with_variant(token::variant::Operator())), None) + .with_error("Operator must be applied to an operand."), + }; if let Some(warnings) = warnings { warnings.apply(&mut tree); } - match error { - None => tree, - Some(error) => tree.with_error(error), - } + maybe_with_error(tree, error) }) } } diff --git a/lib/rust/parser/src/syntax/operator/arity.rs b/lib/rust/parser/src/syntax/operator/arity.rs index 1bf1411ce8..28e338d8c0 100644 --- a/lib/rust/parser/src/syntax/operator/arity.rs +++ b/lib/rust/parser/src/syntax/operator/arity.rs @@ -2,15 +2,16 @@ use crate::syntax::operator::apply::*; use crate::syntax::operator::types::*; use enso_prelude::*; -use crate::syntax::operator::operand::Operand; use crate::syntax::token; +use crate::syntax::token::OperatorProperties; +use crate::syntax::token::TokenOperatorProperties; use crate::syntax::tree; -use crate::syntax::treebuilding::Finish; use crate::syntax::treebuilding::Spacing; use crate::syntax::treebuilding::SpacingLookaheadTokenConsumer; -use crate::syntax::treebuilding::TreeConsumer; +use crate::syntax::Finish; +use crate::syntax::GroupHierarchyConsumer; +use crate::syntax::ScopeHierarchyConsumer; use crate::syntax::Token; -use crate::syntax::Tree; @@ -23,65 +24,64 @@ use crate::syntax::Tree; pub struct ClassifyArity<'s, Inner> { /// Next item that will be emitted. If it is an operator, it may still be extended with /// additional operators to become a multiple-operator error. - lhs_item: Option>, + lhs_item: Option>, inner: Inner, } -impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> SpacingLookaheadTokenConsumer<'s> - for ClassifyArity<'s, Inner> +impl<'s, Inner> SpacingLookaheadTokenConsumer<'s> for ClassifyArity<'s, Inner> +where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s> { - fn push_token(&mut self, tt: Token<'s>, rhs: Option) { - match tt { - Token { variant: token::Variant::Operator(opr), left_offset, code } => - self.operator(Token(left_offset, code, opr), rhs), - token => self.push_tree(tree::to_ast(token)), + fn push_token(&mut self, token: Token<'s>, rhs: Option) { + let properties = token.operator_properties(); + match properties { + Some(properties) => self.operator(token, properties, rhs), + None => self.push_operand(tree::to_ast(token).into()), } } } -impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> TreeConsumer<'s> - for ClassifyArity<'s, Inner> +impl<'s, Inner> NamedOperandConsumer<'s> for ClassifyArity<'s, Inner> +where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s> { - fn push_tree(&mut self, tree: Tree<'s>) { - self.emit(Operand::from(tree)) + fn push_maybe_named_operand(&mut self, operand: OperandMaybeNamed<'s>) { + self.emit(MaybeOperator::Operand); + self.inner.push_maybe_named_operand(operand); } } -impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s> + Finish> Finish - for ClassifyArity<'s, Inner> +impl<'s, Inner> Finish for ClassifyArity<'s, Inner> +where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s> + Finish { type Result = Inner::Result; fn finish(&mut self) -> Self::Result { - self.step(None); + self.flush(); self.inner.finish() } } -impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, Inner> { - fn emit>>(&mut self, item: T) { +impl<'s, Inner> ClassifyArity<'s, Inner> +where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s> +{ + fn emit>>(&mut self, item: T) { self.step(Some(item.into())); } - fn step(&mut self, item: Option>) { - match mem::replace(&mut self.lhs_item, item) { - Some(OperatorOrOperand::Operand(item)) => self.inner.push_operand(item), - Some(OperatorOrOperand::Operator(item)) => self.inner.push_operator(item), - None => (), + fn flush(&mut self) { + self.step(None); + } + + fn step(&mut self, item: Option>) { + if let Some(MaybeOperator::Operator(item)) = mem::replace(&mut self.lhs_item, item) { + self.inner.push_operator(item) } } - fn operator(&mut self, token: token::Operator<'s>, rhs: Option) { - let properties = &token.variant.properties; - let lhs = match self.lhs_item { - Some( - OperatorOrOperand::Operand(_) - | OperatorOrOperand::Operator(Operator { - arity: Arity::Binary { missing: Some(BinaryOperand::Right), .. }, - .. - }), - ) => Some(Spacing::of_token(&token)), - _ => None, + fn operator(&mut self, token: Token<'s>, properties: OperatorProperties, rhs: Option) { + let lhs = if self.lhs_item.as_ref().is_some_and(|item| !item.expects_rhs()) { + Some(Spacing::of_token(&token)) + } else { + None }; // Asymmetric whitespace creates operator sections. // Exception: If an operator cannot form sections, and its LHS is unspaced, a spaced RHS is @@ -99,7 +99,7 @@ impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, In (_, Some(unary), None, Some(Spacing::Unspaced)) => self.unary_operator_applied(unary, assoc, token), (Some(binary), _, _, _) => self.binary_operator(binary, assoc, token, lhs, rhs), - (_, Some(_), _, _) => self.unary_operator_section(token, rhs), + (_, Some(_), _, _) => self.unary_operator_section(token), (None, None, _, _) => unreachable!(), } } @@ -108,87 +108,74 @@ impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, In &mut self, precedence: token::Precedence, associativity: token::Associativity, - token: token::Operator<'s>, + token: Token<'s>, ) { - let error = match self.lhs_item { - Some(OperatorOrOperand::Operand(_)) - if token.left_offset.visible.width_in_spaces == 0 => - Some("Space required between term and unary-operator expression.".into()), - _ => None, - }; - let is_value_operation = token.properties.is_value_operation(); + let is_value_operation = token.operator_properties().unwrap().is_value_operation(); self.emit(Operator { left_precedence: None, - right_precedence: ModifiedPrecedence { - spacing: Spacing::Unspaced, + right_precedence: ModifiedPrecedence::new( + Spacing::Unspaced, precedence, is_value_operation, - }, + ), associativity, - arity: Arity::Unary { token, error }, + arity: Arity::Unary(token.with_variant(token::variant::UnaryOperator())), }); } - fn unary_operator_section(&mut self, token: token::Operator<'s>, rhs: Option) { - match &mut self.lhs_item { - Some(OperatorOrOperand::Operator(Operator { - arity: Arity::Binary { tokens, .. }, - .. - })) if !(tokens.first().unwrap().left_offset.visible.width_in_spaces == 0 - && token.left_offset.visible.width_in_spaces == 0) => - self.multiple_operator_error(token, rhs), - _ => self.emit(ApplyUnaryOperator::token(token).finish()), - } + fn unary_operator_section(&mut self, token: Token<'s>) { + self.emit(MaybeOperator::Operand); + self.inner.push_maybe_named_operand(OperandMaybeNamed::Unnamed( + ApplyUnaryOperator::token(token.with_variant(token::variant::UnaryOperator())).finish(), + )); } fn binary_operator( &mut self, precedence: token::Precedence, associativity: token::Associativity, - token: token::Operator<'s>, + token: Token<'s>, lhs: Option, rhs: Option, ) { - if let Some(OperatorOrOperand::Operator(Operator { - arity: Arity::Binary { missing: None | Some(BinaryOperand::Left), .. }, - .. - })) = &self.lhs_item - && !matches!(rhs, Some(Spacing::Unspaced)) + if self.lhs_item.as_ref().is_some_and(|item| item.expects_rhs()) + && rhs != Some(Spacing::Unspaced) { self.multiple_operator_error(token, rhs); return; } let missing = match (lhs, rhs) { (None, None) => { - self.emit(ApplyOperator::token(token).finish()); + self.inner.push_maybe_named_operand(OperandMaybeNamed::Unnamed( + ApplyOperator::token(token).finish(), + )); + self.emit(MaybeOperator::Operand); return; } (Some(_), None) => Some(BinaryOperand::Right), (None, Some(_)) => Some(BinaryOperand::Left), (Some(_), Some(_)) => None, }; - let reify_rhs_section = token.properties.can_form_section() + let properties = token.operator_properties().unwrap(); + let reify_rhs_section = properties.can_form_section() && (lhs == Some(Spacing::Spaced) || rhs == Some(Spacing::Spaced)); - let is_value_operation = missing.is_none() && token.properties.is_value_operation(); + let is_value_operation = missing.is_none() && properties.is_value_operation(); self.emit(Operator { - left_precedence: lhs.map(|spacing| ModifiedPrecedence { - spacing, + left_precedence: lhs + .map(|spacing| ModifiedPrecedence::new(spacing, precedence, is_value_operation)), + right_precedence: ModifiedPrecedence::new( + rhs.or(lhs).unwrap(), precedence, is_value_operation, - }), - right_precedence: ModifiedPrecedence { - spacing: rhs.or(lhs).unwrap(), - precedence, - is_value_operation, - }, + ), associativity, arity: Arity::Binary { tokens: vec![token], missing, reify_rhs_section }, }); } - fn multiple_operator_error(&mut self, token: token::Operator<'s>, rhs: Option) { + fn multiple_operator_error(&mut self, token: Token<'s>, rhs: Option) { match &mut self.lhs_item { - Some(OperatorOrOperand::Operator(Operator { + Some(MaybeOperator::Operator(Operator { arity: Arity::Binary { tokens, missing, .. }, .. })) => { @@ -196,10 +183,13 @@ impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, In if rhs.is_none() { match missing { None => *missing = Some(BinaryOperand::Right), - Some(BinaryOperand::Left) => - self.lhs_item = Some(OperatorOrOperand::Operand( + Some(BinaryOperand::Left) => { + let operand = OperandMaybeNamed::Unnamed( ApplyOperator::tokens(mem::take(tokens)).finish(), - )), + ); + self.inner.push_maybe_named_operand(operand); + self.lhs_item = Some(MaybeOperator::Operand); + } Some(BinaryOperand::Right) => unreachable!(), } } @@ -208,3 +198,65 @@ impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, In } } } + +impl<'s, Inner> ScopeHierarchyConsumer for ClassifyArity<'s, Inner> +where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s> + ScopeHierarchyConsumer +{ + type Result = Inner::Result; + + fn start_scope(&mut self) { + self.flush(); + self.inner.start_scope() + } + + fn end_scope(&mut self) -> Self::Result { + self.flush(); + self.inner.end_scope() + } +} + +impl<'s, Inner> GroupHierarchyConsumer<'s> for ClassifyArity<'s, Inner> +where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s> + GroupHierarchyConsumer<'s> +{ + fn start_group(&mut self, open: token::OpenSymbol<'s>) { + self.flush(); + self.inner.start_group(open); + } + + fn end_group(&mut self, close: token::CloseSymbol<'s>) { + self.emit(MaybeOperator::Operand); + self.inner.end_group(close); + } +} + +impl<'s, Inner> OperatorConsumer<'s> for ClassifyArity<'s, Inner> +where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s> +{ + fn push_operator(&mut self, operator: Operator<'s>) { + self.emit(operator); + } +} + + +// === Operator or Operand + +#[derive(Debug)] +enum MaybeOperator<'s> { + Operand, + Operator(Operator<'s>), +} + +impl<'s> From> for MaybeOperator<'s> { + fn from(operator: Operator<'s>) -> Self { + MaybeOperator::Operator(operator) + } +} + +impl<'s> MaybeOperator<'s> { + fn expects_rhs(&self) -> bool { + match self { + MaybeOperator::Operand => false, + MaybeOperator::Operator(op) => op.arity.expects_rhs(), + } + } +} diff --git a/lib/rust/parser/src/syntax/operator/group.rs b/lib/rust/parser/src/syntax/operator/group.rs new file mode 100644 index 0000000000..08ca16ad90 --- /dev/null +++ b/lib/rust/parser/src/syntax/operator/group.rs @@ -0,0 +1,87 @@ +use crate::syntax::operator::types::*; +use enso_prelude::*; + +use crate::syntax::operator::section::MaybeSection; +use crate::syntax::token::CloseSymbol; +use crate::syntax::token::OpenSymbol; +use crate::syntax::tree::SyntaxError; +use crate::syntax::Finish; +use crate::syntax::GroupHierarchyConsumer; +use crate::syntax::ScopeHierarchyConsumer; +use crate::syntax::Tree; + + + +// ===================== +// === Group Builder === +// ===================== + +/// Constructs parenthesized groups. +#[derive(Default, Debug)] +pub struct BuildGroups<'s, Inner> { + open: Vec>, + inner: Inner, +} + +impl<'s, Inner: OperandConsumer<'s>> OperandConsumer<'s> for BuildGroups<'s, Inner> { + fn push_operand(&mut self, operand: MaybeSection>) { + self.inner.push_operand(operand) + } +} + +impl<'s, Inner: OperatorConsumer<'s>> OperatorConsumer<'s> for BuildGroups<'s, Inner> { + fn push_operator(&mut self, operator: Operator<'s>) { + self.inner.push_operator(operator) + } +} + +impl<'s, ScopeResult, Inner> Finish for BuildGroups<'s, Inner> +where + ScopeResult: Into>>, + Inner: Finish + ScopeHierarchyConsumer + OperandConsumer<'s>, +{ + type Result = ::Result; + + fn finish(&mut self) -> Self::Result { + for open in self.open.drain(..).rev() { + let expression = self.inner.end_scope().into(); + self.inner.push_operand( + Tree::group(Some(open), expression, None) + .with_error(SyntaxError::ExprUnclosedParen) + .into(), + ); + } + self.inner.finish() + } +} + +impl<'s, ScopeResult, Inner> GroupHierarchyConsumer<'s> for BuildGroups<'s, Inner> +where + ScopeResult: Into>>, + Inner: ScopeHierarchyConsumer + OperandConsumer<'s>, +{ + fn start_group(&mut self, open: OpenSymbol<'s>) { + self.open.push(open); + self.inner.start_scope(); + } + + fn end_group(&mut self, close: CloseSymbol<'s>) { + let open = self.open.pop().unwrap(); + let expression = self.inner.end_scope().into(); + self.inner.push_operand(Tree::group(Some(open), expression, Some(close)).into()); + } +} + +impl<'s, Inner> ScopeHierarchyConsumer for BuildGroups<'s, Inner> +where Inner: ScopeHierarchyConsumer +{ + type Result = Inner::Result; + + fn start_scope(&mut self) { + self.inner.start_scope() + } + + fn end_scope(&mut self) -> Self::Result { + self.inner.end_scope() + } +} diff --git a/lib/rust/parser/src/syntax/operator/named_app.rs b/lib/rust/parser/src/syntax/operator/named_app.rs new file mode 100644 index 0000000000..1d57e2ce82 --- /dev/null +++ b/lib/rust/parser/src/syntax/operator/named_app.rs @@ -0,0 +1,287 @@ +use crate::prelude::*; + +use crate::syntax::operator::reducer::ApplyToOperand; +use crate::syntax::operator::section::MaybeSection; +use crate::syntax::operator::types::NamedOperandConsumer; +use crate::syntax::operator::types::OperandMaybeNamed; +use crate::syntax::operator::types::Operator; +use crate::syntax::operator::types::OperatorConsumer; +use crate::syntax::token; +use crate::syntax::treebuilding::Spacing; +use crate::syntax::treebuilding::SpacingLookaheadTokenConsumer; +use crate::syntax::treebuilding::SpacingLookaheadTreeConsumer; +use crate::syntax::Finish; +use crate::syntax::GroupHierarchyConsumer; +use crate::syntax::ScopeHierarchyConsumer; +use crate::syntax::Token; +use crate::syntax::Tree; + + + +// ======================== +// === Named-App Parser === +// ======================== + +/// Parses named-application syntax. +#[derive(Default, Debug)] +pub struct ParseAppNames<'s, Inner> { + inner: Inner, + partial: Option>, + stack: Vec>, +} + +#[derive(Debug)] +pub struct NamedApp<'s> { + pub parens: Option<(token::OpenSymbol<'s>, Option>)>, + pub name: token::Ident<'s>, + pub equals: token::AssignmentOperator<'s>, + pub expression: Tree<'s>, +} + +impl<'s> ApplyToOperand<'s> for NamedApp<'s> { + fn apply_to_operand(self, operand: Option>>) -> MaybeSection> { + let NamedApp { parens, name, equals, expression } = self; + let func = operand.unwrap(); + let (open, close) = match parens { + None => (None, None), + Some((open, close)) => (Some(open), close), + }; + func.map(|func| Tree::named_app(func, open, name, equals, expression, close)) + } +} + +#[derive(Debug)] +enum Partial<'s> { + ExpectingName { open: token::OpenSymbol<'s> }, + ExpectingEquals { open: Option>, name: token::Ident<'s> }, +} + +#[derive(Debug, Default)] +struct AppName<'s> { + open: Option>, + name: token::Ident<'s>, + equals: token::AssignmentOperator<'s>, + spaceproof: bool, + inner_parens: u32, +} + +impl<'s> AppName<'s> { + fn finish( + self, + expression: Option>, + close: &mut Option>, + ) -> OperandMaybeNamed<'s> { + let Self { open, name, equals, inner_parens: _, spaceproof: _ } = self; + // An `OuterAppName` is only constructed when lookahead indicates there's a + // token after the `=`. + let expression = expression.unwrap(); + OperandMaybeNamed::Named { + parens: open.map(|open| (open, close.take())), + name, + equals, + expression, + } + } +} + +impl<'s, Inner> ParseAppNames<'s, Inner> +where Inner: NamedOperandConsumer<'s> + + ScopeHierarchyConsumer>> + + GroupHierarchyConsumer<'s> + + SpacingLookaheadTokenConsumer<'s> +{ + fn maybe_end_unspaced_expression( + &mut self, + following_spacing: Option, + is_syntactic_binary_operator: bool, + ) { + if let Some(last) = self.stack.last_mut() { + if !last.spaceproof + && last.inner_parens == 0 + && last.open.is_none() + && following_spacing != Some(Spacing::Unspaced) + { + if is_syntactic_binary_operator { + last.spaceproof = true; + } else { + self.flush_complete(None); + } + } + } + } + + fn flush_paren(&mut self, open: token::OpenSymbol<'s>) { + self.inner.start_group(open); + if let Some(last) = self.stack.last_mut() { + last.inner_parens += 1; + } + } + + fn flush_paren_and_ident( + &mut self, + open: Option>, + name: token::Ident<'s>, + following_spacing: Option, + ) { + if let Some(open) = open { + self.flush_paren(open); + } + self.inner.push_token(name.into(), following_spacing); + } + + fn flush_partial(&mut self, following: impl FnOnce() -> Option) { + match self.partial.take() { + None => {} + Some(Partial::ExpectingName { open }) => self.flush_paren(open), + Some(Partial::ExpectingEquals { open, name }) => + self.flush_paren_and_ident(open, name, following()), + }; + } + + fn flush_complete(&mut self, mut close: Option>) { + let expression = self.inner.end_scope(); + let operand = self.stack.pop().unwrap().finish(expression, &mut close); + self.inner.push_maybe_named_operand(operand); + if let Some(close) = close { + self.inner.end_group(close); + } + } +} + +impl<'s, Inner> SpacingLookaheadTokenConsumer<'s> for ParseAppNames<'s, Inner> +where Inner: SpacingLookaheadTokenConsumer<'s> + + NamedOperandConsumer<'s> + + ScopeHierarchyConsumer>> + + GroupHierarchyConsumer<'s> +{ + fn push_token(&mut self, token: Token<'s>, following_spacing: Option) { + self.partial = loop { + self.maybe_end_unspaced_expression(Some(Spacing::of_token(&token)), false); + break match (token.variant, (self.partial.take(), following_spacing)) { + (token::Variant::Ident(variant), (None, Some(Spacing::Unspaced))) + if !variant.is_type && token.is_spaced() => + { + let name = token.with_variant(variant); + Some(Partial::ExpectingEquals { open: None, name }) + } + (token::Variant::Ident(variant), (Some(Partial::ExpectingName { open }), _)) + if !variant.is_type => + { + let name = token.with_variant(variant); + Some(Partial::ExpectingEquals { open: Some(open), name }) + } + ( + token::Variant::AssignmentOperator(variant), + (Some(Partial::ExpectingEquals { open, name }), Some(Spacing::Unspaced)) + | (Some(Partial::ExpectingEquals { open: open @ Some(_), name }), _), + ) => { + let equals = token.with_variant(variant); + self.stack.push(AppName { + open, + name, + equals, + inner_parens: 0, + spaceproof: false, + }); + self.inner.start_scope(); + None + } + (_, (None, _)) => { + let is_syntactic_binary_operator = token.is_syntactic_binary_operator(); + self.inner.push_token(token, following_spacing); + self.maybe_end_unspaced_expression( + following_spacing, + is_syntactic_binary_operator, + ); + None + } + (_, (Some(Partial::ExpectingName { open }), _)) => { + self.flush_paren(open); + self.inner.push_token(token, following_spacing); + None + } + (_, (Some(Partial::ExpectingEquals { open, name }), _)) => { + self.flush_paren_and_ident(open, name, Spacing::of_token(&token).into()); + continue; + } + }; + } + } +} + +impl<'s, Inner> SpacingLookaheadTreeConsumer<'s> for ParseAppNames<'s, Inner> +where Inner: SpacingLookaheadTokenConsumer<'s> + + NamedOperandConsumer<'s> + + ScopeHierarchyConsumer>> + + GroupHierarchyConsumer<'s> +{ + fn push_tree(&mut self, tree: Tree<'s>, following_spacing: Option) { + self.flush_partial(|| Spacing::of_tree(&tree).into()); + self.maybe_end_unspaced_expression(Some(Spacing::of_tree(&tree)), false); + self.inner.push_maybe_named_operand(OperandMaybeNamed::Unnamed(MaybeSection::from(tree))); + self.maybe_end_unspaced_expression(following_spacing, false); + } +} + +impl<'s, Inner> GroupHierarchyConsumer<'s> for ParseAppNames<'s, Inner> +where Inner: GroupHierarchyConsumer<'s> + + SpacingLookaheadTokenConsumer<'s> + + NamedOperandConsumer<'s> + + ScopeHierarchyConsumer>> +{ + fn start_group(&mut self, open: token::OpenSymbol<'s>) { + self.flush_partial(|| Spacing::of_token(&open).into()); + self.partial = if open.is_spaced() { + Some(Partial::ExpectingName { open }) + } else { + self.flush_paren(open); + None + } + } + + fn end_group(&mut self, close: token::CloseSymbol<'s>) { + self.flush_partial(|| Spacing::of_token(&close).into()); + if let Some(last) = self.stack.last_mut() { + if last.inner_parens > 0 { + self.inner.end_group(close); + last.inner_parens -= 1; + } else { + self.flush_complete(close.into()); + } + } else { + self.inner.end_group(close); + } + } +} + +impl<'s, Inner: Finish> Finish for ParseAppNames<'s, Inner> +where Inner: Finish + + SpacingLookaheadTokenConsumer<'s> + + NamedOperandConsumer<'s> + + ScopeHierarchyConsumer>> + + GroupHierarchyConsumer<'s> +{ + type Result = ::Result; + + fn finish(&mut self) -> Self::Result { + self.flush_partial(|| None); + while !self.stack.is_empty() { + self.flush_complete(None); + } + self.inner.finish() + } +} + +impl<'s, Inner> OperatorConsumer<'s> for ParseAppNames<'s, Inner> +where Inner: OperatorConsumer<'s> + + NamedOperandConsumer<'s> + + ScopeHierarchyConsumer>> + + GroupHierarchyConsumer<'s> + + SpacingLookaheadTokenConsumer<'s> +{ + fn push_operator(&mut self, operator: Operator<'s>) { + self.flush_partial(|| None); + self.maybe_end_unspaced_expression(Some(operator.spacing()), false); + self.inner.push_operator(operator); + } +} diff --git a/lib/rust/parser/src/syntax/operator/precedence_resolver.rs b/lib/rust/parser/src/syntax/operator/precedence_resolver.rs index 90e67540e1..31f4cbadbc 100644 --- a/lib/rust/parser/src/syntax/operator/precedence_resolver.rs +++ b/lib/rust/parser/src/syntax/operator/precedence_resolver.rs @@ -1,12 +1,18 @@ use crate::prelude::*; use crate::syntax; +use crate::syntax::operator::annotations::ParseAnnotations; use crate::syntax::operator::application::InsertApps; use crate::syntax::operator::arity::ClassifyArity; +use crate::syntax::operator::group::BuildGroups; +use crate::syntax::operator::named_app::ParseAppNames; use crate::syntax::operator::reducer::Reduce; -use crate::syntax::treebuilding; -use crate::syntax::treebuilding::Finish; -use crate::syntax::treebuilding::ItemConsumer; +use crate::syntax::treebuilding::CompoundTokens; +use crate::syntax::treebuilding::FlattenBlockTrees; +use crate::syntax::treebuilding::ParseNumbers; +use crate::syntax::treebuilding::PeekSpacing; +use crate::syntax::Finish; +use crate::syntax::ItemConsumer; use crate::syntax::Tree; @@ -15,23 +21,33 @@ use crate::syntax::Tree; // === Precedence === // ================== +macro_rules! compose_types { + ($ty:ident<'s>) => { + $ty<'s> + }; + ($ty:ident<'s, _>, $($tail:tt)*) => { + $ty<'s, compose_types!($($tail)*)> + }; + ($ty:ident<_>, $($tail:tt)*) => { + $ty + }; +} + /// Operator precedence resolver. #[derive(Debug, Default)] pub struct Precedence<'s> { - #[rustfmt::skip] - resolver: - // Items -> Tokens/Trees - treebuilding::FlattenBlockTrees<'s, - // Tokens/Trees -> Tokens/Trees (proper tokens only) - treebuilding::AssembleCompoundTokens<'s, - // Tokens/Trees -> Tokens/Trees + Spacing-lookahead - treebuilding::PeekSpacing<'s, - // Tokens/Trees + Spacing-lookahead -> Operators/Operands - ClassifyArity<'s, - // Operators/Operands -> Operators/Operands (balanced) - InsertApps< - // Operators/Operands -> Tree - Reduce<'s>>>>>>, + resolver: compose_types![ + FlattenBlockTrees<'s, _>, // Items -> Tokens/Trees/Groups + CompoundTokens<'s, _>, + ParseNumbers<'s, _>, + PeekSpacing<'s, _>, // Tokens/Trees/Groups -> Tokens/Trees/Groups + Spacing-lookahead + ParseAnnotations<'s, _>, // Tokens/Trees/Groups + S -> T/T/Operators/Groups + S + ParseAppNames<'s, _>, + ClassifyArity<'s, _>, // Tokens/Trees/Groups + Spacing-lookahead -> Oper*s/Groups + InsertApps<_>, // Operators/Operands/Groups -> Oper*s/Groups/Applications + BuildGroups<'s, _>, // Operators/Operands/Groups/Applications -> Oper*s/Applications + Reduce<'s> // Operators/Operands/Applications -> Tree + ], } impl<'s> Precedence<'s> { diff --git a/lib/rust/parser/src/syntax/operator/reducer.rs b/lib/rust/parser/src/syntax/operator/reducer.rs index 24d611ca24..1385c20f18 100644 --- a/lib/rust/parser/src/syntax/operator/reducer.rs +++ b/lib/rust/parser/src/syntax/operator/reducer.rs @@ -1,14 +1,14 @@ +use crate::prelude::*; use crate::syntax::operator::apply::*; use crate::syntax::operator::types::*; -use crate::syntax::operator::operand::Operand; +use crate::syntax::operator::section::MaybeSection; use crate::syntax::token; -use crate::syntax::treebuilding::Finish; -use crate::syntax::treebuilding::Spacing; +use crate::syntax::tree::apply; +use crate::syntax::Finish; +use crate::syntax::ScopeHierarchyConsumer; use crate::syntax::Tree; -use enso_prelude::VecOps; - // =============== @@ -25,12 +25,13 @@ use enso_prelude::VecOps; /// [^2](https://en.wikipedia.org/wiki/Shunting_yard_algorithm) #[derive(Default, Debug)] pub struct Reduce<'s> { - output: Vec>>, + output: Vec>>, operator_stack: Vec>, + scope_stack: Vec<(u32, u32)>, } impl<'s> OperandConsumer<'s> for Reduce<'s> { - fn push_operand(&mut self, operand: Operand>) { + fn push_operand(&mut self, operand: MaybeSection>) { self.output.push(operand) } } @@ -53,14 +54,10 @@ impl<'s> OperatorConsumer<'s> for Reduce<'s> { } impl<'s> Finish for Reduce<'s> { - type Result = Option>>; + type Result = Option>>; fn finish(&mut self) -> Self::Result { - self.reduce(ModifiedPrecedence { - spacing: Spacing::Spaced, - precedence: token::Precedence::min(), - is_value_operation: false, - }); + self.reduce(ModifiedPrecedence::min()); let out = self.output.pop(); debug_assert!(self.operator_stack.is_empty()); debug_assert_eq!( @@ -72,14 +69,37 @@ impl<'s> Finish for Reduce<'s> { } } +impl<'s> ScopeHierarchyConsumer for Reduce<'s> { + type Result = Option>; + + fn start_scope(&mut self) { + let operators = self.operator_stack.len() as u32; + let operands = self.output.len() as u32; + self.scope_stack.push((operators, operands)); + } + + fn end_scope(&mut self) -> Self::Result { + let result = if self.output.len() > self.scope_start().1 { + self.reduce(ModifiedPrecedence::min()); + self.output.pop().map(Tree::from) + } else { + None + }; + self.scope_stack.pop(); + result + } +} + impl<'s> Reduce<'s> { /// Given a starting value, replace it with the result of successively applying to it all /// operators in the `operator_stack` that have precedence greater than or equal to the /// specified value, consuming LHS values from the `output` stack as needed. fn reduce(&mut self, right_op_precedence: ModifiedPrecedence) -> Warnings { - let mut rhs = self.output.pop(); + let mut operand = self.output.pop(); let mut right_op_warnings = Warnings::default(); - while let Some(opr) = self.operator_stack.pop_if_mut(|opr| { + let scope_start = self.scope_start().0; + while self.operator_stack.len() > scope_start { + let opr = self.operator_stack.last_mut().unwrap(); let ModifiedPrecedenceComparisonResult { is_greater, inconsistent_spacing } = opr .right_precedence .compare(&right_op_precedence, opr.associativity == token::Associativity::Left); @@ -87,46 +107,82 @@ impl<'s> Reduce<'s> { if is_greater { &mut right_op_warnings } else { &mut opr.warnings } .set_inconsistent_spacing(); } - is_greater - }) { + if !is_greater { + break; + } + let opr = self.operator_stack.pop().unwrap(); let StackOperator { right_precedence: _, associativity: _, arity, warnings } = opr; - match arity { - Arity::Unary { token, error } => { - let rhs_ = rhs.take(); - debug_assert_ne!(rhs_, None); - rhs = ApplyUnaryOperator::token(token) - .with_rhs(rhs_) - .with_error(error) - .with_warnings(warnings) - .finish() - .into(); - } - Arity::Binary { tokens, missing, reify_rhs_section } => { - let operand = rhs.take(); - debug_assert_ne!(operand, None); - let (lhs, rhs_) = match missing { - Some(BinaryOperand::Left) => (None, operand), - Some(BinaryOperand::Right) => (operand, None), - None => { - let lhs = self.output.pop(); - debug_assert_ne!(lhs, None); - (lhs, operand) - } - }; - rhs = ApplyOperator::tokens(tokens) - .with_lhs(lhs) - .with_rhs(rhs_, reify_rhs_section) - .with_warnings(warnings) - .finish() - .into(); - } - }; + operand = reduce_step(arity, operand.take(), &mut self.output).into(); + if let Some(operand) = operand.as_mut() { + warnings.apply(&mut operand.value); + } } - if let Some(rhs) = rhs { + if let Some(rhs) = operand { self.output.push(rhs); } right_op_warnings } + + fn scope_start(&self) -> (usize, usize) { + let (operators, operands) = self.scope_stack.last().copied().unwrap_or_default(); + (operators as usize, operands as usize) + } +} + +pub trait ApplyToOperands<'s> { + fn apply_to_operands( + self, + operand: Option>>, + additional_operands: &mut Vec>>, + ) -> MaybeSection>; +} + +pub trait ApplyToOperand<'s> { + fn apply_to_operand(self, operand: Option>>) -> MaybeSection>; +} + +impl<'s, T: ApplyToOperand<'s>> ApplyToOperands<'s> for T { + fn apply_to_operands( + self, + operand: Option>>, + _: &mut Vec>>, + ) -> MaybeSection> { + self.apply_to_operand(operand) + } +} + +fn reduce_step<'s>( + arity: Arity<'s>, + operand: Option>>, + additional_operands: &mut Vec>>, +) -> MaybeSection> { + match arity { + Arity::Unary(token) => { + let rhs = operand; + debug_assert_ne!(rhs, None); + ApplyUnaryOperator::token(token).with_rhs(rhs).finish() + } + Arity::Binary { tokens, missing, reify_rhs_section } => { + let op1 = operand; + debug_assert_ne!(op1, None); + let (lhs, rhs) = match missing { + Some(BinaryOperand::Left) => (None, op1), + Some(BinaryOperand::Right) => (op1, None), + None => { + let lhs = additional_operands.pop(); + debug_assert_ne!(lhs, None); + (lhs, op1) + } + }; + ApplyOperator::tokens(tokens).with_lhs(lhs).with_rhs(rhs, reify_rhs_section).finish() + } + Arity::App => { + let (lhs, rhs) = (additional_operands.pop().unwrap(), operand); + lhs.map(|lhs| apply(lhs, rhs.unwrap().into())) + } + Arity::NamedApp(app) => app.apply_to_operand(operand), + Arity::Annotation(annotation) => annotation.apply_to_operand(operand), + } } diff --git a/lib/rust/parser/src/syntax/operator/operand.rs b/lib/rust/parser/src/syntax/operator/section.rs similarity index 64% rename from lib/rust/parser/src/syntax/operator/operand.rs rename to lib/rust/parser/src/syntax/operator/section.rs index 9853d8a4c8..3d3c3abd44 100644 --- a/lib/rust/parser/src/syntax/operator/operand.rs +++ b/lib/rust/parser/src/syntax/operator/section.rs @@ -1,17 +1,17 @@ +use enso_prelude::*; + use crate::syntax::tree; use crate::syntax::Tree; -use enso_prelude::default; - -// =============== -// === Operand === -// =============== +// ==================== +// === MaybeSection === +// ==================== /// Wraps a value, tracking the number of wildcards or elided operands within it. #[derive(Default, Debug, PartialEq, Eq)] -pub struct Operand { +pub struct MaybeSection { pub value: T, /// Number of elided operands in the subtree, potentially forming an *operator section*. pub elided: u32, @@ -20,23 +20,23 @@ pub struct Operand { } /// Transpose. Note that an absent input will not be treated as an elided value; for that -/// conversion, use [`Operand::new`]. -impl From>> for Operand> { - fn from(operand: Option>) -> Self { +/// conversion, use [`MaybeSection::new`]. +impl From>> for MaybeSection> { + fn from(operand: Option>) -> Self { match operand { - Some(Operand { value, elided, wildcards }) => + Some(MaybeSection { value, elided, wildcards }) => Self { value: Some(value), elided, wildcards }, None => default(), } } } -/// Unit. Creates an Operand from a node. -impl<'s> From> for Operand> { +/// Unit. Creates a MaybeSection from a node. +impl<'s> From> for MaybeSection> { fn from(mut value: Tree<'s>) -> Self { let elided = 0; let wildcards = if let Tree { - variant: box tree::Variant::Wildcard(tree::Wildcard { de_bruijn_index, .. }), + variant: tree::Variant::Wildcard(box tree::Wildcard { de_bruijn_index, .. }), .. } = &mut value { @@ -51,9 +51,9 @@ impl<'s> From> for Operand> { } /// Counit. Bakes any information about elided operands into the tree. -impl<'s> From>> for Tree<'s> { - fn from(operand: Operand>) -> Self { - let Operand { mut value, elided, wildcards } = operand; +impl<'s> From>> for Tree<'s> { + fn from(operand: MaybeSection>) -> Self { + let MaybeSection { mut value, elided, wildcards } = operand; if elided != 0 { value = Tree::opr_section_boundary(elided, value); } @@ -64,24 +64,24 @@ impl<'s> From>> for Tree<'s> { } } -impl Operand> { +impl MaybeSection> { /// Lift an option value to a potentially-elided operand. - pub fn new(value: Option>) -> Self { + pub fn new(value: Option>) -> Self { match value { None => Self { value: None, elided: 1, wildcards: default() }, Some(value) => { - let Operand { value, elided, wildcards } = value; + let MaybeSection { value, elided, wildcards } = value; Self { value: Some(value), elided, wildcards } } } } } -impl Operand { +impl MaybeSection { /// Operate on the contained value without altering the elided-operand information. - pub fn map(self, f: impl FnOnce(T) -> U) -> Operand { + pub fn map(self, f: impl FnOnce(T) -> U) -> MaybeSection { let Self { value, elided, wildcards } = self; let value = f(value); - Operand { value, elided, wildcards } + MaybeSection { value, elided, wildcards } } } diff --git a/lib/rust/parser/src/syntax/operator/types.rs b/lib/rust/parser/src/syntax/operator/types.rs index 8a577300dd..6448ee8ce7 100644 --- a/lib/rust/parser/src/syntax/operator/types.rs +++ b/lib/rust/parser/src/syntax/operator/types.rs @@ -1,12 +1,15 @@ -use crate::syntax::operator::operand::Operand; +use crate::syntax::operator::section::MaybeSection; use crate::syntax::token; use crate::syntax::tree; use crate::syntax::treebuilding::Spacing; +use crate::syntax::Inspect; +use crate::syntax::Token; use crate::syntax::Tree; +use crate::syntax::TreeConsumer; -use std::borrow::Cow; -use std::cmp::Ordering; - +use crate::syntax::operator::annotations::Annotation; +use crate::syntax::operator::named_app::NamedApp; +use std::fmt::Debug; // ================ @@ -22,26 +25,40 @@ pub struct Operator<'s> { pub arity: Arity<'s>, } +impl<'s> Operator<'s> { + pub(crate) fn spacing(&self) -> Spacing { + match &self.arity { + Arity::Unary(token) => Spacing::of_token(token), + Arity::Binary { tokens, .. } => Spacing::of_token(tokens.first().unwrap()), + Arity::App => Spacing::Spaced, + Arity::NamedApp(_) => Spacing::Spaced, + Arity::Annotation(annotation) => annotation.spacing(), + } + } +} // === Arity === /// Classifies the role of an operator. #[derive(Debug)] pub enum Arity<'s> { - Unary { - token: token::Operator<'s>, - error: Option>, - }, + Unary(token::UnaryOperator<'s>), Binary { - tokens: Vec>, + tokens: Vec>, missing: Option, reify_rhs_section: bool, }, + App, + NamedApp(Box>), + Annotation(Annotation<'s>), } impl<'s> Arity<'s> { - fn unary(token: token::Operator<'s>) -> Self { - Self::Unary { token, error: None } + pub fn expects_rhs(&self) -> bool { + matches!( + self, + Arity::Unary(_) | Arity::Binary { missing: None | Some(BinaryOperand::Left), .. } + ) } } @@ -59,9 +76,8 @@ pub enum BinaryOperand { #[derive(Debug, Copy, Clone)] pub struct ModifiedPrecedence { - pub spacing: Spacing, - pub precedence: token::Precedence, - pub is_value_operation: bool, + value: u8, + mask: u8, } pub struct ModifiedPrecedenceComparisonResult { @@ -70,30 +86,28 @@ pub struct ModifiedPrecedenceComparisonResult { } impl ModifiedPrecedence { + pub fn new(spacing: Spacing, precedence: token::Precedence, is_value_operation: bool) -> Self { + let unspaced_bit = match spacing { + Spacing::Spaced => 0, + Spacing::Unspaced => 0x80, + }; + let value = precedence.into_u8() | unspaced_bit; + let mask = if is_value_operation { 0x7f } else { 0xff }; + Self { value, mask } + } + pub fn compare(&self, other: &Self, include_eq: bool) -> ModifiedPrecedenceComparisonResult { - let spacing_ordering = match (self.spacing, other.spacing) { - (Spacing::Spaced, Spacing::Unspaced) => Some(Ordering::Less), - (Spacing::Unspaced, Spacing::Spaced) => Some(Ordering::Greater), - _ => None, - }; - let use_spacing = !(self.is_value_operation && other.is_value_operation); - let natural_ordering = self.precedence.cmp(&other.precedence); - let natural_is_greater = natural_ordering == Ordering::Greater - || (include_eq && natural_ordering == Ordering::Equal); - let (is_greater, inconsistent_spacing) = match spacing_ordering { - Some(spacing_ordering) => { - let spacing_is_greater = spacing_ordering == Ordering::Greater - || (include_eq && spacing_ordering == Ordering::Equal); - if use_spacing { - (spacing_is_greater, false) - } else { - (natural_is_greater, natural_is_greater != spacing_is_greater) - } - } - None => (natural_is_greater, false), - }; + let adjusted_self = self.value + include_eq as u8; + let mask = self.mask | other.mask; + let is_greater = adjusted_self & mask > other.value & mask; + let is_greater_including_space = adjusted_self > other.value; + let inconsistent_spacing = is_greater != is_greater_including_space; ModifiedPrecedenceComparisonResult { is_greater, inconsistent_spacing } } + + pub fn min() -> Self { + Self { value: 0, mask: 0xff } + } } @@ -129,37 +143,71 @@ impl Warnings { // ====================================== pub trait OperandConsumer<'s> { - fn push_operand(&mut self, operand: Operand>); + fn push_operand(&mut self, operand: MaybeSection>); } pub trait OperatorConsumer<'s> { fn push_operator(&mut self, operator: Operator<'s>); } - -// =========================== -// === Operator or Operand === -// =========================== - -#[derive(Debug)] -pub enum OperatorOrOperand<'s> { - Operand(Operand>), - Operator(Operator<'s>), +pub trait NamedOperandConsumer<'s> { + fn push_maybe_named_operand(&mut self, operand: OperandMaybeNamed<'s>); } -impl<'s> From>> for OperatorOrOperand<'s> { - fn from(operand: Operand>) -> Self { - OperatorOrOperand::Operand(operand) + +// === Debugging === + +impl<'s, Inner: NamedOperandConsumer<'s>> NamedOperandConsumer<'s> for Inspect { + fn push_maybe_named_operand(&mut self, operand: OperandMaybeNamed<'s>) { + self.observe(&operand); + self.0.push_maybe_named_operand(operand); } } -impl<'s> From> for OperatorOrOperand<'s> { - fn from(operator: Operator<'s>) -> Self { - OperatorOrOperand::Operator(operator) +impl<'s, Inner: OperatorConsumer<'s>> OperatorConsumer<'s> for Inspect { + fn push_operator(&mut self, operator: Operator<'s>) { + self.observe(&operator); + self.0.push_operator(operator); } } +// === Conversions === + +impl<'s, T> OperandConsumer<'s> for T +where T: NamedOperandConsumer<'s> +{ + fn push_operand(&mut self, operand: MaybeSection>) { + self.push_maybe_named_operand(OperandMaybeNamed::Unnamed(operand)); + } +} + +impl<'s, T> TreeConsumer<'s> for T +where T: OperandConsumer<'s> +{ + fn push_tree(&mut self, tree: Tree<'s>) { + self.push_operand(tree.into()); + } +} + + +// ====================== +// === Named Operands === +// ====================== + +#[derive(Debug, PartialEq, Eq)] +#[allow(clippy::large_enum_variant)] // Clippy considers the `Unnamed` is "at least 0 bytes". +pub enum OperandMaybeNamed<'s> { + Unnamed(MaybeSection>), + Named { + parens: Option<(token::OpenSymbol<'s>, Option>)>, + name: token::Ident<'s>, + equals: token::AssignmentOperator<'s>, + expression: Tree<'s>, + }, +} + + // ========================== // === SectionTermination === // ========================== diff --git a/lib/rust/parser/src/syntax/statement.rs b/lib/rust/parser/src/syntax/statement.rs new file mode 100644 index 0000000000..7f58c2e80d --- /dev/null +++ b/lib/rust/parser/src/syntax/statement.rs @@ -0,0 +1,383 @@ +//! Parses statements in module, body blocks, and type blocks. + + + +mod function_def; +mod type_def; + +use crate::empty_tree; +use crate::expression_to_pattern; +use crate::is_qualified_name; +use crate::prelude::*; +use crate::syntax::item; +use crate::syntax::maybe_with_error; +use crate::syntax::operator::Precedence; +use crate::syntax::statement::function_def::parse_function_decl; +use crate::syntax::statement::function_def::try_parse_foreign_function; +use crate::syntax::statement::type_def::try_parse_type_def; +use crate::syntax::token; +use crate::syntax::tree; +use crate::syntax::tree::block; +use crate::syntax::tree::ArgumentDefinition; +use crate::syntax::tree::SyntaxError; +use crate::syntax::treebuilding::Spacing; +use crate::syntax::Item; +use crate::syntax::Token; +use crate::syntax::Tree; + +/// Parses normal statements. +#[derive(Debug, Default)] +pub struct BodyBlockParser<'s> { + statement_parser: StatementParser<'s>, +} + +impl<'s> BodyBlockParser<'s> { + /// Parse the statements in a block. + pub fn parse_body_block( + &mut self, + lines: impl IntoIterator>, + precedence: &mut Precedence<'s>, + ) -> Tree<'s> { + let lines = lines.into_iter().map(|item::Line { newline, mut items }| block::Line { + newline, + expression: self.statement_parser.parse_body_block_statement(&mut items, 0, precedence), + }); + Tree::body_block(block::compound_lines(lines).collect()) + } + + /// Parse the declarations and statements at the top level of a module. + pub fn parse_module( + &mut self, + lines: impl IntoIterator>, + precedence: &mut Precedence<'s>, + ) -> Tree<'s> { + let lines = lines.into_iter().map(|item::Line { newline, mut items }| block::Line { + newline, + expression: self.statement_parser.parse_module_statement(&mut items, 0, precedence), + }); + Tree::body_block(block::compound_lines(lines).collect()) + } +} + +#[derive(Debug, Default)] +struct StatementParser<'s> { + args_buffer: Vec>, +} + +impl<'s> StatementParser<'s> { + fn parse_body_block_statement( + &mut self, + items: &mut Vec>, + start: usize, + precedence: &mut Precedence<'s>, + ) -> Option> { + let private_keywords = scan_private_keywords(&*items); + let mut statement = parse_body_block_statement( + items, + start + private_keywords, + precedence, + &mut self.args_buffer, + ); + for _ in 0..private_keywords { + let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() }; + let token::Variant::Private(variant) = keyword.variant else { unreachable!() }; + let keyword = keyword.with_variant(variant); + let error = match statement.as_ref().map(|tree| &tree.variant) { + Some(tree::Variant::Invalid(_) | tree::Variant::Function(_)) => None, + _ => SyntaxError::StmtUnexpectedPrivateUsage.into(), + }; + let private_stmt = Tree::private(keyword, statement.take()); + statement = maybe_with_error(private_stmt, error).into(); + } + statement + } + + fn parse_module_statement( + &mut self, + items: &mut Vec>, + start: usize, + precedence: &mut Precedence<'s>, + ) -> Option> { + let private_keywords = scan_private_keywords(&*items); + let mut statement = parse_body_block_statement( + items, + start + private_keywords, + precedence, + &mut self.args_buffer, + ); + for _ in 0..private_keywords { + let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() }; + let token::Variant::Private(variant) = keyword.variant else { unreachable!() }; + let keyword = keyword.with_variant(variant); + let error = match statement.as_ref().map(|tree| &tree.variant) { + Some(tree::Variant::Invalid(_) | tree::Variant::Function(_)) | None => None, + _ => SyntaxError::StmtUnexpectedPrivateUsage.into(), + }; + let private_stmt = Tree::private(keyword, statement.take()); + statement = maybe_with_error(private_stmt, error).into(); + } + statement + } +} + +fn scan_private_keywords<'s>(items: impl IntoIterator>>) -> usize { + items + .into_iter() + .take_while(|item| { + matches!(item.as_ref(), Item::Token(Token { variant: token::Variant::Private(_), .. })) + }) + .count() +} + +fn parse_body_block_statement<'s>( + items: &mut Vec>, + start: usize, + precedence: &mut Precedence<'s>, + args_buffer: &mut Vec>, +) -> Option> { + use token::Variant; + if let Some(type_def) = try_parse_type_def(items, start, precedence, args_buffer) { + return Some(type_def); + } + let top_level_operator = match find_top_level_operator(&items[start..]) { + Ok(top_level_operator) => top_level_operator.map(|(i, t)| (i + start, t)), + Err(e) => + return precedence + .resolve_non_section(items.drain(start..)) + .unwrap() + .with_error(e) + .into(), + }; + let statement = match top_level_operator { + Some((i, Token { variant: Variant::AssignmentOperator(_), .. })) => + parse_assignment_like_statement(items, start, i, precedence, args_buffer).into(), + Some((i, Token { variant: Variant::TypeAnnotationOperator(_), .. })) => { + let type_ = precedence.resolve_non_section(items.drain(i + 1..)); + let Some(Item::Token(operator)) = items.pop() else { unreachable!() }; + let Variant::TypeAnnotationOperator(variant) = operator.variant else { unreachable!() }; + let operator = operator.with_variant(variant); + let lhs = precedence.resolve_non_section(items.drain(start..)); + let type_ = type_.unwrap_or_else(|| { + empty_tree(operator.code.position_after()).with_error(SyntaxError::ExpectedType) + }); + if lhs.as_ref().is_some_and(is_qualified_name) { + Tree::type_signature(lhs.unwrap(), operator, type_).into() + } else { + let lhs = lhs.unwrap_or_else(|| { + empty_tree(operator.left_offset.code.position_before()) + .with_error(SyntaxError::ExpectedExpression) + }); + Tree::type_annotated(lhs, operator, type_).into() + } + } + Some(_) => unreachable!(), + None => precedence.resolve(items.drain(start..)), + }; + debug_assert_eq!(items.len(), start); + statement +} + +fn parse_assignment_like_statement<'s>( + items: &mut Vec>, + start: usize, + operator: usize, + precedence: &mut Precedence<'s>, + args_buffer: &mut Vec>, +) -> Tree<'s> { + if operator == start { + return precedence + .resolve_non_section(items.drain(start..)) + .unwrap() + .with_error(SyntaxError::StmtInvalidAssignmentOrMethod); + } + + let mut expression = precedence.resolve(items.drain(operator + 1..)); + + let Some(Item::Token(operator)) = items.pop() else { unreachable!() }; + let token::Variant::AssignmentOperator(variant) = operator.variant else { unreachable!() }; + let operator = operator.with_variant(variant); + + let qn_len = scan_qn(&items[start..]); + + let mut operator = Some(operator); + if let Some(function) = try_parse_foreign_function( + items, + start, + &mut operator, + &mut expression, + precedence, + args_buffer, + ) { + return function; + } + let operator = operator.unwrap(); + + match (expression, qn_len) { + (Some(e), Some(qn_len)) if matches!(e.variant, tree::Variant::BodyBlock(_)) => { + let (qn, args, return_) = + parse_function_decl(items, start, qn_len, precedence, args_buffer); + Tree::function(qn, args, return_, operator, Some(e)) + } + (Some(expression), None) => + parse_assignment(items.drain(start..), operator, expression, precedence), + (Some(expression), Some(1)) if items.len() == start + 1 => + parse_assignment(items.drain(start..), operator, expression, precedence), + (e, Some(qn_len)) => { + let (qn, args, return_) = + parse_function_decl(items, start, qn_len, precedence, args_buffer); + Tree::function(qn, args, return_, operator, e) + } + (None, None) => Tree::opr_app( + precedence.resolve_non_section(items.drain(start..)), + Ok(operator.with_variant(token::variant::Operator())), + None, + ) + .with_error(SyntaxError::StmtInvalidAssignmentOrMethod), + } +} + +fn parse_assignment<'s>( + items: impl IntoIterator>, + operator: token::AssignmentOperator<'s>, + expression: Tree<'s>, + precedence: &mut Precedence<'s>, +) -> Tree<'s> { + let pattern = expression_to_pattern(precedence.resolve_non_section(items).unwrap()); + Tree::assignment(pattern, operator, expression) +} + +fn parse_pattern<'s>( + items: &mut Vec>, + arg_start: usize, + precedence: &mut Precedence<'s>, +) -> (Option>, Option>) { + let have_suspension = matches!( + items.get(arg_start), + Some(Item::Token(Token { variant: token::Variant::SuspensionOperator(_), .. })) + ); + let pattern_start = arg_start + have_suspension as usize; + let pattern = if items.len() - pattern_start == 1 { + Some(match items.pop().unwrap() { + Item::Token(token) => match token.variant { + token::Variant::Ident(variant) => Tree::ident(token.with_variant(variant)), + token::Variant::Wildcard(variant) => + Tree::wildcard(token.with_variant(variant), None), + _ => tree::to_ast(token).with_error(SyntaxError::ArgDefExpectedPattern), + }, + item => precedence + .resolve_non_section(Some(item)) + .map(|tree| tree.with_error(SyntaxError::ArgDefExpectedPattern)) + .unwrap(), + }) + } else { + precedence + .resolve_non_section(items.drain(pattern_start..)) + .map(|tree| tree.with_error(SyntaxError::ArgDefExpectedPattern)) + }; + let suspension = have_suspension.then(|| { + let Item::Token(token) = items.pop().unwrap() else { unreachable!() }; + let token::Variant::SuspensionOperator(variant) = token.variant else { unreachable!() }; + token.with_variant(variant) + }); + (suspension, pattern) +} + +fn find_top_level_operator<'a, 's>( + items: &'a [Item<'s>], +) -> Result)>, SyntaxError> { + use token::Variant; + let mut candidate: Option<(usize, &'a Token<'s>, bool)> = None; + let mut after_first_space = false; + for (i, item) in items.iter().enumerate() { + let next_is_after_space = + i != 0 && (after_first_space || matches!(Spacing::of_item(item), Spacing::Spaced)); + if let Item::Token(token) = item { + let is_spaced = token.is_spaced(); + if !after_first_space || is_spaced { + match &token.variant { + Variant::AssignmentOperator(_) => { + if is_spaced + && items + .get(i + 1) + .is_some_and(|item| Spacing::of_item(item) == Spacing::Unspaced) + { + return Err(SyntaxError::StmtLhsInvalidOperatorSpacing); + } + if is_spaced { + return Ok(Some((i, token))); + } + if candidate.is_none() + || (is_spaced && !candidate.unwrap().2) + || !matches!( + candidate.unwrap().1.variant, + Variant::AssignmentOperator(_) + ) + { + candidate = Some((i, token, is_spaced)); + } + } + Variant::TypeAnnotationOperator(_) => { + if is_spaced + && items + .get(i + 1) + .is_some_and(|item| Spacing::of_item(item) == Spacing::Unspaced) + { + return Err(SyntaxError::StmtLhsInvalidOperatorSpacing); + } + if candidate.is_none() || (is_spaced && !candidate.unwrap().2) { + candidate = Some((i, token, is_spaced)); + } + } + Variant::Operator(_) + | Variant::DotOperator(_) + | Variant::ArrowOperator(_) + | Variant::CommaOperator(_) => + if is_spaced && candidate.is_some_and(|(_, _, is_spaced)| !is_spaced) { + candidate = None; + }, + _ => {} + } + } + } + after_first_space = next_is_after_space; + } + Ok(candidate.map(|(i, t, _)| (i, t))) +} + +fn next_spaced(items: &[Item]) -> Option { + for (i, item) in items.iter().enumerate().skip(1) { + if matches!(Spacing::of_item(item), Spacing::Spaced) { + return Some(i); + } + } + None +} + +/// Returns length of the QN. +fn scan_qn<'s>(items: impl IntoIterator>>) -> Option { + enum State { + ExpectingDot, + ExpectingIdent, + } + use token::Variant::*; + use Item::*; + use State::*; + let mut state = ExpectingIdent; + for (i, item) in items.into_iter().enumerate() { + match item.as_ref() { + Token(token) if i != 0 && token.is_spaced() => break, + Token(token) => match (state, &token.variant) { + (ExpectingDot, DotOperator(_)) => state = ExpectingIdent, + (ExpectingIdent, Ident(ident)) if ident.is_type => state = ExpectingDot, + ( + ExpectingIdent, + Ident(_) | Operator(_) | NegationOperator(_) | UnaryOperator(_), + ) => return Some(i + 1), + _ => break, + }, + Group(_) | Tree(_) => break, + Block(_) => unreachable!(), + } + } + None +} diff --git a/lib/rust/parser/src/syntax/statement/function_def.rs b/lib/rust/parser/src/syntax/statement/function_def.rs new file mode 100644 index 0000000000..613e0b4295 --- /dev/null +++ b/lib/rust/parser/src/syntax/statement/function_def.rs @@ -0,0 +1,376 @@ +use crate::prelude::*; + +use crate::empty_tree; +use crate::syntax::item; +use crate::syntax::maybe_with_error; +use crate::syntax::operator::Precedence; +use crate::syntax::statement::find_top_level_operator; +use crate::syntax::statement::parse_pattern; +use crate::syntax::token; +use crate::syntax::tree; +use crate::syntax::tree::ArgumentDefault; +use crate::syntax::tree::ArgumentDefinition; +use crate::syntax::tree::ArgumentDefinitionLine; +use crate::syntax::tree::ArgumentType; +use crate::syntax::tree::ReturnSpecification; +use crate::syntax::tree::SyntaxError; +use crate::syntax::Item; +use crate::syntax::Token; +use crate::syntax::Tree; + + + +pub fn parse_function_decl<'s>( + items: &mut Vec>, + start: usize, + qn_len: usize, + precedence: &mut Precedence<'s>, + args_buffer: &mut Vec>, +) -> (Tree<'s>, Vec>, Option>) { + let mut arg_starts = vec![]; + let mut arrow = None; + for (i, item) in items.iter().enumerate().skip(start + qn_len) { + if let Item::Token(Token { variant: token::Variant::ArrowOperator(_), .. }) = item { + arrow = Some(i); + break; + } + if i == start + qn_len || matches!(Spacing::of_item(item), Spacing::Spaced) { + arg_starts.push(i); + } + } + let return_ = arrow.map(|arrow| parse_return_spec(items, arrow, precedence)); + + args_buffer.extend( + arg_starts.drain(..).rev().map(|arg_start| parse_arg_def(items, arg_start, precedence)), + ); + let args = args_buffer.drain(..).rev().collect(); + + let qn = precedence.resolve_non_section(items.drain(start..)).unwrap(); + + (qn, args, return_) +} + +pub fn parse_constructor_definition<'s>( + items: &mut Vec>, + start: usize, + precedence: &mut Precedence<'s>, + args_buffer: &mut Vec>, +) -> Tree<'s> { + let mut block_args = vec![]; + if matches!(items.last().unwrap(), Item::Block(_)) { + let Item::Block(block) = items.pop().unwrap() else { unreachable!() }; + block_args.extend(block.into_vec().into_iter().map(|item::Line { newline, mut items }| { + let argument = (!items.is_empty()).then(|| parse_arg_def(&mut items, 0, precedence)); + ArgumentDefinitionLine { newline, argument } + })) + } + let (name, inline_args) = parse_constructor_decl(items, start, precedence, args_buffer); + Tree::constructor_definition(name, inline_args, block_args) +} + +fn parse_constructor_decl<'s>( + items: &mut Vec>, + start: usize, + precedence: &mut Precedence<'s>, + args_buffer: &mut Vec>, +) -> (token::Ident<'s>, Vec>) { + let args = parse_type_args(items, start + 1, precedence, args_buffer); + let Item::Token(name) = items.pop().unwrap() else { unreachable!() }; + let Token { variant: token::Variant::Ident(variant), .. } = name else { unreachable!() }; + let name = name.with_variant(variant); + debug_assert_eq!(items.len(), start); + (name, args) +} + +pub fn parse_type_args<'s>( + items: &mut Vec>, + start: usize, + precedence: &mut Precedence<'s>, + args_buffer: &mut Vec>, +) -> Vec> { + if start == items.len() { + return default(); + } + let mut arg_starts = vec![start]; + let mut expecting_rhs = false; + for (i, item) in items.iter().enumerate().skip(start + 1) { + if expecting_rhs { + expecting_rhs = false; + continue; + } + if let Item::Token(Token { variant: token::Variant::AssignmentOperator(_), .. }) = item { + expecting_rhs = true; + continue; + } + if matches!(Spacing::of_item(item), Spacing::Spaced) { + arg_starts.push(i); + } + } + args_buffer.extend( + arg_starts.drain(..).rev().map(|arg_start| parse_arg_def(items, arg_start, precedence)), + ); + debug_assert_eq!(items.len(), start); + args_buffer.drain(..).rev().collect() +} + +pub fn try_parse_foreign_function<'s>( + items: &mut Vec>, + start: usize, + operator: &mut Option>, + expression: &mut Option>, + precedence: &mut Precedence<'s>, + args_buffer: &mut Vec>, +) -> Option> { + match items.get(start) { + Some(Item::Token(token)) if token.code == "foreign" => {} + _ => return None, + } + let operator = operator.take().unwrap(); + match items.get(start + 1) { + Some(Item::Token(Token { variant: token::Variant::Ident(ident), .. })) + if !ident.is_type => {} + _ => { + items.push(Item::from(Token::from(operator))); + items.extend(expression.take().map(Item::from)); + return precedence + .resolve_non_section(items.drain(start..)) + .unwrap() + .with_error(SyntaxError::ForeignFnExpectedLanguage) + .into(); + } + } + match items.get(start + 2) { + Some(Item::Token(Token { variant: token::Variant::Ident(ident), .. })) + if !ident.is_type => {} + _ => { + items.push(Item::from(Token::from(operator))); + items.extend(expression.take().map(Item::from)); + return precedence + .resolve_non_section(items.drain(start..)) + .unwrap() + .with_error(SyntaxError::ForeignFnExpectedName) + .into(); + } + } + + let body = expression + .take() + .map(|body| { + let error = match &body.variant { + tree::Variant::TextLiteral(_) => None, + _ => Some(SyntaxError::ForeignFnExpectedStringBody), + }; + maybe_with_error(body, error) + }) + .unwrap_or_else(|| { + empty_tree(operator.code.position_after()) + .with_error(SyntaxError::ForeignFnExpectedStringBody) + }); + + let mut arg_starts = vec![]; + for (i, item) in items.iter().enumerate().skip(start + 3) { + if i == start + 3 || matches!(Spacing::of_item(item), Spacing::Spaced) { + arg_starts.push(i); + } + } + args_buffer.extend( + arg_starts.drain(..).rev().map(|arg_start| parse_arg_def(items, arg_start, precedence)), + ); + let args = args_buffer.drain(..).rev().collect(); + + let Item::Token(name) = items.pop().unwrap() else { unreachable!() }; + let token::Variant::Ident(variant) = name.variant else { unreachable!() }; + let name = name.with_variant(variant); + + let Item::Token(language) = items.pop().unwrap() else { unreachable!() }; + let token::Variant::Ident(variant) = language.variant else { unreachable!() }; + let language = language.with_variant(variant); + + let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() }; + let keyword = keyword.with_variant(token::variant::ForeignKeyword()); + + Tree::foreign_function(keyword, language, name, args, operator, body).into() +} + +#[derive(Debug, PartialEq, Eq)] +enum IsParenthesized { + Parenthesized, + Unparenthesized, +} +use crate::syntax::treebuilding::Spacing; +use IsParenthesized::*; + +struct ArgDefInfo { + type_: Option<(IsParenthesized, usize)>, + default: Option, +} + +fn parse_return_spec<'s>( + items: &mut Vec>, + arrow: usize, + precedence: &mut Precedence<'s>, +) -> ReturnSpecification<'s> { + let r#type = precedence.resolve_non_section(items.drain(arrow + 1..)); + let Item::Token(arrow) = items.pop().unwrap() else { unreachable!() }; + let token::Variant::ArrowOperator(variant) = arrow.variant else { unreachable!() }; + let arrow = arrow.with_variant(variant); + let r#type = r#type.unwrap_or_else(|| { + empty_tree(arrow.code.position_after()).with_error(SyntaxError::ExpectedExpression) + }); + ReturnSpecification { arrow, r#type } +} + +fn parse_arg_def<'s>( + items: &mut Vec>, + mut start: usize, + precedence: &mut Precedence<'s>, +) -> ArgumentDefinition<'s> { + let mut open1 = None; + let mut close1 = None; + let mut parenthesized_body = None; + if matches!(items[start..], [Item::Group(_)]) { + let Some(Item::Group(item::Group { open, body, close })) = items.pop() else { + unreachable!() + }; + open1 = open.into(); + close1 = close; + parenthesized_body = body.into_vec().into(); + debug_assert_eq!(items.len(), start); + start = 0; + } + let items = parenthesized_body.as_mut().unwrap_or(items); + let ArgDefInfo { type_, default } = match analyze_arg_def(&items[start..]) { + Err(e) => { + let pattern = + precedence.resolve_non_section(items.drain(start..)).unwrap().with_error(e); + return ArgumentDefinition { + open: open1, + open2: None, + suspension: None, + pattern, + type_: None, + close2: None, + default: None, + close: close1, + }; + } + Ok(arg_def) => arg_def, + }; + let default = default.map(|default| { + let tree = precedence.resolve(items.drain(start + default + 1..)); + let Item::Token(equals) = items.pop().unwrap() else { unreachable!() }; + let expression = tree.unwrap_or_else(|| { + empty_tree(equals.code.position_after()).with_error(SyntaxError::ExpectedExpression) + }); + let Token { variant: token::Variant::AssignmentOperator(variant), .. } = equals else { + unreachable!() + }; + let equals = equals.with_variant(variant); + ArgumentDefault { equals, expression } + }); + let mut open2 = None; + let mut close2 = None; + let mut suspension_and_pattern = None; + let type_ = type_.map(|(parenthesized, type_)| { + let mut parenthesized_body = None; + if parenthesized == Parenthesized + && (start..items.len()).len() == 1 + && matches!(items.last(), Some(Item::Group(_))) + { + let Some(Item::Group(item::Group { open, body, close })) = items.pop() else { + unreachable!() + }; + open2 = open.into(); + close2 = close; + parenthesized_body = body.into_vec().into(); + start = 0; + } + let items = parenthesized_body.as_mut().unwrap_or(items); + let tree = precedence.resolve_non_section(items.drain(start + type_ + 1..)); + let Item::Token(operator) = items.pop().unwrap() else { unreachable!() }; + let type_ = tree.unwrap_or_else(|| { + empty_tree(operator.code.position_after()).with_error(SyntaxError::ExpectedType) + }); + let token::Variant::TypeAnnotationOperator(variant) = operator.variant else { + unreachable!() + }; + let operator = operator.with_variant(variant); + suspension_and_pattern = Some(parse_pattern(items, start, precedence)); + ArgumentType { operator, type_ } + }); + let (suspension, pattern) = + suspension_and_pattern.unwrap_or_else(|| parse_pattern(items, start, precedence)); + let pattern = pattern.unwrap_or_else(|| { + empty_tree( + suspension + .as_ref() + .map(|t| t.code.position_after()) + .or_else(|| open2.as_ref().map(|t| t.code.position_after())) + .or_else(|| open1.as_ref().map(|t| t.code.position_after())) + .or_else(|| type_.as_ref().map(|t| t.operator.left_offset.code.position_before())) + // Why does this one need a type annotation??? + .or_else(|| { + close2 + .as_ref() + .map(|t: &token::CloseSymbol| t.left_offset.code.position_before()) + }) + .or_else(|| default.as_ref().map(|t| t.equals.left_offset.code.position_before())) + .or_else(|| close1.as_ref().map(|t| t.left_offset.code.position_before())) + .unwrap(), + ) + .with_error(SyntaxError::ArgDefExpectedPattern) + }); + ArgumentDefinition { + open: open1, + open2, + suspension, + pattern, + type_, + close2, + default, + close: close1, + } +} + +fn analyze_arg_def(outer: &[Item]) -> Result { + let mut default = None; + let mut type_ = None; + match find_top_level_operator(outer)? { + None => {} + Some(( + annotation_op_pos, + Token { variant: token::Variant::TypeAnnotationOperator(_), .. }, + )) => { + type_ = (Unparenthesized, annotation_op_pos).into(); + } + Some((assignment_op_pos, Token { variant: token::Variant::AssignmentOperator(_), .. })) => { + default = assignment_op_pos.into(); + match find_top_level_operator(&outer[..assignment_op_pos])? { + None => {} + Some(( + annotation_op_pos, + Token { variant: token::Variant::TypeAnnotationOperator(_), .. }, + )) => { + type_ = (Unparenthesized, annotation_op_pos).into(); + } + Some(_) => return Err(SyntaxError::ArgDefUnexpectedOpInParenClause), + } + } + Some(_) => return Err(SyntaxError::ArgDefUnexpectedOpInParenClause), + }; + if type_.is_none() { + if let Item::Group(item::Group { body: inner, .. }) = &outer[0] { + let inner_op = find_top_level_operator(inner)?; + type_ = (Parenthesized, match inner_op { + None => return Err(SyntaxError::ArgDefSpuriousParens), + Some(( + inner_op_pos, + Token { variant: token::Variant::TypeAnnotationOperator(_), .. }, + )) => inner_op_pos, + Some(_) => return Err(SyntaxError::ArgDefUnexpectedOpInParenClause), + }) + .into(); + } + } + Ok(ArgDefInfo { type_, default }) +} diff --git a/lib/rust/parser/src/syntax/statement/type_def.rs b/lib/rust/parser/src/syntax/statement/type_def.rs new file mode 100644 index 0000000000..92e6785da6 --- /dev/null +++ b/lib/rust/parser/src/syntax/statement/type_def.rs @@ -0,0 +1,133 @@ +use crate::prelude::*; + +use crate::syntax::item; +use crate::syntax::maybe_with_error; +use crate::syntax::operator::Precedence; +use crate::syntax::statement::function_def::parse_constructor_definition; +use crate::syntax::statement::function_def::parse_type_args; +use crate::syntax::statement::parse_body_block_statement; +use crate::syntax::statement::scan_private_keywords; +use crate::syntax::token; +use crate::syntax::tree; +use crate::syntax::tree::block; +use crate::syntax::tree::ArgumentDefinition; +use crate::syntax::tree::SyntaxError; +use crate::syntax::treebuilding::Spacing; +use crate::syntax::Item; +use crate::syntax::Token; +use crate::syntax::Tree; + + + +pub fn try_parse_type_def<'s>( + items: &mut Vec>, + start: usize, + precedence: &mut Precedence<'s>, + args_buffer: &mut Vec>, +) -> Option> { + match items.get(start) { + Some(Item::Token(token)) if token.code == "type" => {} + _ => return None, + } + match items.get(start + 1) { + Some(Item::Token(Token { variant: token::Variant::Ident(ident), .. })) if ident.is_type => { + } + _ => + return precedence + .resolve_non_section(items.drain(start..)) + .unwrap() + .with_error(SyntaxError::TypeDefExpectedTypeName) + .into(), + } + + let body = if let Some(Item::Block(lines)) = items.last_mut() { + let block = mem::take(lines).into_vec(); + items.pop(); + let lines = block.into_iter().map(|item::Line { newline, mut items }| block::Line { + newline, + expression: { + if let Some(Item::Token(token)) = items.first_mut() + && matches!(token.variant, token::Variant::Operator(_)) + { + let opr_ident = + token::variant::Ident { is_operator_lexically: true, ..default() }; + token.variant = token::Variant::Ident(opr_ident); + } + parse_type_body_statement(items, precedence, args_buffer) + }, + }); + block::compound_lines(lines).collect() + } else { + default() + }; + + let params = parse_type_args(items, start + 2, precedence, args_buffer); + + let name = { + let Item::Token(name) = items.pop().unwrap() else { unreachable!() }; + let token::Variant::Ident(variant) = name.variant else { unreachable!() }; + name.with_variant(variant) + }; + + let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() }; + let keyword = keyword.with_variant(token::variant::TypeKeyword()); + + debug_assert_eq!(items.len(), start); + + Tree::type_def(keyword, name, params, body).into() +} + +fn parse_type_body_statement<'s>( + mut items: Vec>, + precedence: &mut Precedence<'s>, + args_buffer: &mut Vec>, +) -> Option> { + let private_keywords = scan_private_keywords(&items); + let mut statement = match items.get(private_keywords) { + Some(Item::Token(Token { variant: token::Variant::Ident(ident), .. })) + if ident.is_type + && !items + .get(private_keywords + 1) + .is_some_and(|item| Spacing::of_item(item) == Spacing::Unspaced) => + Some(parse_constructor_definition( + &mut items, + private_keywords, + precedence, + args_buffer, + )), + None => None, + _ => { + let tree = + parse_body_block_statement(&mut items, private_keywords, precedence, args_buffer) + .unwrap(); + let error = match &tree.variant { + tree::Variant::Function(_) + | tree::Variant::ForeignFunction(_) + | tree::Variant::Assignment(_) + | tree::Variant::Documented(_) + | tree::Variant::Annotated(_) + | tree::Variant::AnnotatedBuiltin(_) => None, + tree::Variant::TypeSignature(_) => None, + tree::Variant::TypeDef(_) => None, + _ => Some(SyntaxError::UnexpectedExpressionInTypeBody), + }; + maybe_with_error(tree, error).into() + } + }; + for _ in 0..private_keywords { + let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() }; + let token::Variant::Private(variant) = keyword.variant else { unreachable!() }; + let keyword = keyword.with_variant(variant); + let error = match statement.as_ref().map(|tree| &tree.variant) { + Some( + tree::Variant::Invalid(_) + | tree::Variant::ConstructorDefinition(_) + | tree::Variant::Function(_), + ) => None, + _ => SyntaxError::TypeBodyUnexpectedPrivateUsage.into(), + }; + let private_stmt = Tree::private(keyword, statement.take()); + statement = maybe_with_error(private_stmt, error).into(); + } + statement +} diff --git a/lib/rust/parser/src/syntax/token.rs b/lib/rust/parser/src/syntax/token.rs index cc6d71b7a8..e0c765b6f9 100644 --- a/lib/rust/parser/src/syntax/token.rs +++ b/lib/rust/parser/src/syntax/token.rs @@ -93,9 +93,19 @@ //! //! See the definitions and macros below to learn more. + + +mod collect; +mod operator; + use crate::prelude::*; use crate::source::*; +pub use operator::Associativity; +pub use operator::OperatorProperties; +pub use operator::Precedence; +pub use operator::TokenOperatorProperties; + // ============= @@ -159,6 +169,18 @@ impl<'s, T> Token<'s, T> { let code_length = self.code.length(); span::Ref { left_offset: &self.left_offset, code_length } } + + /// Whether this token has space characters on the left. + pub fn is_spaced(&self) -> bool { + self.left_offset.visible.width_in_spaces != 0 + } +} + +impl<'s> Token<'s, Variant> { + /// Whether this token is a syntactically-special binary operator. + pub fn is_syntactic_binary_operator(&self) -> bool { + is_syntactic_binary_operator(&self.variant) + } } impl<'s, V: Clone> Token<'s, V> { @@ -268,16 +290,31 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg #[reflect(skip)] pub is_default: bool, }, - Operator { - #[serde(skip)] - #[reflect(skip)] - pub properties: OperatorProperties, - }, + // === Binary operators === + Operator, + AssignmentOperator, + TypeAnnotationOperator, + ArrowOperator, + DotOperator, + CommaOperator, + // === Unary operators === + UnaryOperator, + AnnotationOperator, + AutoscopeOperator, + LambdaOperator, + SuspensionOperator, + NegationOperator, + Digits { pub base: Option }, NumberBase, Private, + TypeKeyword, + ForeignKeyword, + AllKeyword, + CaseKeyword, + OfKeyword, TextStart, TextEnd, TextSection, @@ -319,285 +356,6 @@ impl Default for Variant { } -// === Operator properties === - -/// Properties of an operator that are identified when lexing. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)] -pub struct OperatorProperties { - // Precedence - binary_infix_precedence: Option, - unary_prefix_precedence: Option, - is_value_operation: bool, - // Operator section behavior - lhs_section_termination: Option, - // Special properties - is_compile_time_operation: bool, - is_right_associative: bool, - is_modifier: bool, - // Unique operators - is_decimal: bool, - is_type_annotation: bool, - is_assignment: bool, - is_arrow: bool, - is_sequence: bool, - is_suspension: bool, - is_autoscope: bool, - is_annotation: bool, - is_dot: bool, - is_special: bool, - is_token_joiner: bool, -} - -impl OperatorProperties { - /// Construct an operator with default properties. - pub fn new() -> Self { - default() - } - - /// Return a copy of this operator, with the given binary infix precedence. - pub fn with_binary_infix_precedence(self, value: u32) -> Self { - let precedence = Precedence { value }; - debug_assert!(precedence > Precedence::min()); - Self { binary_infix_precedence: Some(precedence), ..self } - } - - /// Return a copy of this operator, with unary prefix parsing allowed. - pub fn with_unary_prefix_mode(self, precedence: Precedence) -> Self { - debug_assert!(precedence > Precedence::min()); - Self { unary_prefix_precedence: Some(precedence), ..self } - } - - /// Return a copy of this operator, modified to be flagged as a compile time operation. - pub fn as_compile_time_operation(self) -> Self { - Self { is_compile_time_operation: true, ..self } - } - - /// Return whether this operator is flagged as a compile time operation. - pub fn is_compile_time_operation(&self) -> bool { - self.is_compile_time_operation - } - - /// Mark the operator as a value-level operation, as opposed to functional. - pub fn as_value_operation(self) -> Self { - Self { is_value_operation: true, ..self } - } - - /// Return whether the operator is a value-level operation, as opposed to functional. - pub fn is_value_operation(&self) -> bool { - self.is_value_operation - } - - /// Return a copy of this operator, modified to be flagged as right associative. - pub fn as_right_associative(self) -> Self { - Self { is_right_associative: true, ..self } - } - - /// Return a copy of this operator, modified to be flagged as an modified-assignment operator. - pub fn as_modifier(self) -> Self { - Self { is_modifier: true, ..self } - } - - /// Return a copy of this operator, modified to be flagged as special. - pub fn as_special(self) -> Self { - Self { is_special: true, ..self } - } - - /// Return a copy of this operator, modified to be flagged as the token-joiner operator. - pub fn as_token_joiner(self) -> Self { - Self { is_token_joiner: true, ..self } - } - - /// Return a copy of this operator, modified to have the specified LHS operator-section/ - /// template-function behavior. - pub fn with_lhs_section_termination(self, lhs_section_termination: T) -> Self - where T: Into> { - Self { lhs_section_termination: lhs_section_termination.into(), ..self } - } - - /// Return a copy of this operator, modified to be flagged as a type annotation operator. - pub fn as_type_annotation(self) -> Self { - Self { is_type_annotation: true, ..self } - } - - /// Return a copy of this operator, modified to be flagged as an assignment operator. - pub fn as_assignment(self) -> Self { - Self { is_assignment: true, ..self } - } - - /// Return a copy of this operator, modified to be flagged as an arrow operator. - pub fn as_arrow(self) -> Self { - Self { is_arrow: true, ..self } - } - - /// Return a copy of this operator, modified to be flagged as the sequence operator. - pub fn as_sequence(self) -> Self { - Self { is_sequence: true, ..self } - } - - /// Return a copy of this operator, modified to be flagged as the annotation operator. - pub fn as_annotation(self) -> Self { - Self { is_annotation: true, ..self } - } - - /// Return a copy of this operator, modified to be flagged as the execution-suspension operator. - pub fn as_suspension(self) -> Self { - Self { is_suspension: true, ..self } - } - - /// Return a copy of this operator, modified to be flagged as the autoscope operator. - pub fn as_autoscope(self) -> Self { - Self { is_autoscope: true, ..self } - } - - /// Return a copy of this operator, modified to be flagged as the dot operator. - pub fn as_dot(self) -> Self { - Self { is_dot: true, ..self } - } - - /// Return a copy of this operator, modified to be interpreted as a decimal point. - pub fn as_decimal(self) -> Self { - Self { is_decimal: true, ..self } - } - - /// Return this operator's binary infix precedence, if it has one. - pub fn binary_infix_precedence(&self) -> Option { - self.binary_infix_precedence - } - - /// Return this operator's unary prefix precedence, if it has one. - pub fn unary_prefix_precedence(&self) -> Option { - self.unary_prefix_precedence - } - - /// Return whether this operator can form operator sections. - pub fn can_form_section(&self) -> bool { - !self.is_compile_time_operation - } - - /// Return whether this operator is the type annotation operator. - pub fn is_type_annotation(&self) -> bool { - self.is_type_annotation - } - - /// Return the LHS operator-section/template-function behavior of this operator. - pub fn lhs_section_termination(&self) -> Option { - self.lhs_section_termination - } - - /// Return whether this operator is illegal outside special uses. - pub fn is_special(&self) -> bool { - self.is_special - } - - /// Return whether this operator is the assignment operator. - pub fn is_assignment(&self) -> bool { - self.is_assignment - } - - /// Return whether this operator is a modified-assignment operator. - pub fn is_modifier(&self) -> bool { - self.is_modifier - } - - /// Return whether this operator is the arrow operator. - pub fn is_arrow(&self) -> bool { - self.is_arrow - } - - /// Return whether this operator is the sequence operator. - pub fn is_sequence(&self) -> bool { - self.is_sequence - } - - /// Return whether this operator is the execution-suspension operator. - pub fn is_suspension(&self) -> bool { - self.is_suspension - } - - /// Return whether this operator is the autoscope operator. - pub fn is_autoscope(&self) -> bool { - self.is_autoscope - } - - /// Return whether this operator is the annotation operator. - pub fn is_annotation(&self) -> bool { - self.is_annotation - } - - /// Return whether this operator is the dot operator. - pub fn is_dot(&self) -> bool { - self.is_dot - } - - /// Return whether this operator is the token-joiner operator. - pub fn is_token_joiner(&self) -> bool { - self.is_token_joiner - } - - /// Return this operator's associativity. - pub fn associativity(&self) -> Associativity { - match self.is_right_associative { - false => Associativity::Left, - true => Associativity::Right, - } - } - - /// Return whether this operator is a decimal point. - pub fn is_decimal(&self) -> bool { - self.is_decimal - } -} - -/// Value that can be compared to determine which operator will bind more tightly within an -/// expression. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Reflect, Deserialize, PartialOrd, Ord)] -pub struct Precedence { - /// A numeric value determining precedence order. - value: u32, -} - -impl Precedence { - /// Return a precedence that is lower than the precedence of any operator. - pub fn min() -> Self { - Precedence { value: 0 } - } - - /// Return the precedence for any operator. - pub fn min_valid() -> Self { - Precedence { value: 1 } - } - - /// Return a precedence that is not lower than any other precedence. - pub fn max() -> Self { - Precedence { value: 100 } - } - - /// Return the precedence of application. - pub fn application() -> Self { - Precedence { value: 80 } - } - - /// Return the precedence of unary minus. - pub fn unary_minus() -> Self { - Precedence { value: 79 } - } - - /// Return the precedence of unary minus when applied to a numeric literal. - pub fn unary_minus_numeric_literal() -> Self { - Precedence { value: 80 } - } -} - -/// Associativity (left or right). -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Associativity { - /// Left-associative. - Left, - /// Right-associative. - Right, -} - - // === Numbers === /// Alternate numeric bases (decimal is the default). @@ -721,4 +479,5 @@ macro_rules! define_token_type { } with_token_definition!(define_token_type()); +use crate::syntax::token::operator::is_syntactic_binary_operator; pub use variant::Variant; diff --git a/lib/rust/parser/src/syntax/token/collect.rs b/lib/rust/parser/src/syntax/token/collect.rs new file mode 100644 index 0000000000..98e59dc081 --- /dev/null +++ b/lib/rust/parser/src/syntax/token/collect.rs @@ -0,0 +1,55 @@ +use enso_prelude::*; + +use crate::source; +use crate::syntax::token; +use crate::syntax::BlockHierarchyConsumer; +use crate::syntax::Finish; +use crate::syntax::GroupHierarchyConsumer; +use crate::syntax::NewlineConsumer; +use crate::syntax::Token; +use crate::syntax::TokenConsumer; + + +// ========================= +// === Collecting Tokens === +// ========================= + +impl<'s> TokenConsumer<'s> for Vec> { + fn push_token(&mut self, token: Token<'s>) { + self.push(token); + } +} + +impl<'s> NewlineConsumer<'s> for Vec> { + fn push_newline(&mut self, token: token::Newline<'s>) { + self.push(token.into()); + } +} + +impl<'s> BlockHierarchyConsumer for Vec> { + fn start_block(&mut self) { + self.push(Token(source::Offset::default(), default(), token::Variant::block_start())); + } + + fn end_block(&mut self) { + self.push(Token(source::Offset::default(), default(), token::Variant::block_end())); + } +} + +impl<'s> GroupHierarchyConsumer<'s> for Vec> { + fn start_group(&mut self, open: token::OpenSymbol<'s>) { + self.push(open.into()) + } + + fn end_group(&mut self, close: token::CloseSymbol<'s>) { + self.push(close.into()) + } +} + +impl<'s> Finish for Vec> { + type Result = Vec>; + + fn finish(&mut self) -> Self::Result { + mem::take(self) + } +} diff --git a/lib/rust/parser/src/syntax/token/operator.rs b/lib/rust/parser/src/syntax/token/operator.rs new file mode 100644 index 0000000000..b6e83d3a1f --- /dev/null +++ b/lib/rust/parser/src/syntax/token/operator.rs @@ -0,0 +1,331 @@ +use crate::syntax::token::*; + +use crate::lexer::analyze_non_syntactic_operator; +use crate::syntax::operator::SectionTermination; + + + +/// Properties of an operator that are identified when lexing. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)] +pub struct OperatorProperties { + // Precedence / associativity + binary_infix_precedence: Option, + unary_prefix_precedence: Option, + is_value_operation: bool, + is_right_associative: bool, + // Special properties + lhs_section_termination: Option, + is_modifier: bool, + is_compile_time: bool, + rhs_is_non_expression: bool, +} + +pub fn is_syntactic_binary_operator(variant: &Variant) -> bool { + use Variant::*; + matches!( + variant, + AssignmentOperator(_) | TypeAnnotationOperator(_) | ArrowOperator(_) | CommaOperator(_) + ) +} + +impl OperatorProperties { + /// Construct an operator with default properties. + pub fn new() -> Self { + default() + } + + /// Construct a value-level operator. + pub fn value() -> Self { + Self { is_value_operation: true, ..default() } + } + + /// Construct a functional operator. + pub fn functional() -> Self { + Self { is_value_operation: false, ..default() } + } + + /// Return a copy of this operator, with the given binary infix precedence. + pub fn with_binary_infix_precedence(self, value: u8) -> Self { + let precedence = Precedence { value }; + debug_assert!(precedence > Precedence::min()); + debug_assert!(value & 0x80 == 0); + debug_assert!((value + 1) & 0x80 == 0); + Self { binary_infix_precedence: Some(precedence), ..self } + } + + /// Return a copy of this operator, with unary prefix parsing allowed. + pub fn with_unary_prefix_mode(self, precedence: Precedence) -> Self { + debug_assert!(precedence > Precedence::min()); + Self { unary_prefix_precedence: Some(precedence), ..self } + } + + /// Mark the operator as a value-level operation, as opposed to functional. + pub fn as_value_operation(self) -> Self { + Self { is_value_operation: true, ..self } + } + + /// Return whether the operator is a value-level operation, as opposed to functional. + pub fn is_value_operation(&self) -> bool { + self.is_value_operation + } + + /// Return a copy of this operator, modified to be flagged as right associative. + pub fn as_right_associative(self) -> Self { + Self { is_right_associative: true, ..self } + } + + /// Return a copy of this operator, modified to be flagged as a modified-assignment operator. + pub fn as_modifier(self) -> Self { + Self { is_modifier: true, ..self } + } + + /// Return a copy of this operator, modified to have the specified LHS operator-section/ + /// template-function behavior. + fn with_lhs_section_termination(self, lhs_section_termination: T) -> Self + where T: Into> { + Self { lhs_section_termination: lhs_section_termination.into(), ..self } + } + + /// Return this operator's binary infix precedence, if it has one. + pub fn binary_infix_precedence(&self) -> Option { + self.binary_infix_precedence + } + + /// Return this operator's unary prefix precedence, if it has one. + pub fn unary_prefix_precedence(&self) -> Option { + self.unary_prefix_precedence + } + + /// Return whether this operator can form operator sections. + pub fn can_form_section(&self) -> bool { + !self.is_compile_time + } + + /// Return the LHS operator-section/template-function behavior of this operator. + pub fn lhs_section_termination(&self) -> Option { + self.lhs_section_termination + } + + /// Return whether this operator is a modified-assignment operator. + pub fn is_modifier(&self) -> bool { + self.is_modifier + } + + /// Return this operator's associativity. + pub fn associativity(&self) -> Associativity { + match self.is_right_associative { + false => Associativity::Left, + true => Associativity::Right, + } + } + + /// Whether the RHS is an expression; if true, the operator may introduce a body block. + pub fn rhs_is_expression(&self) -> bool { + !self.rhs_is_non_expression + } +} + +/// Operator-like tokens have operator properties, including normal operators and syntactic +/// operators. +trait HasOperatorProperties { + /// Return the properties of this operator. + fn operator_properties(&self) -> OperatorProperties; +} + +/// If a token is operator-like, it has associated properties. +pub trait TokenOperatorProperties { + /// Return a value if this token is operator-like. + fn operator_properties(&self) -> Option; +} + +impl<'s, Variant: HasOperatorProperties> HasOperatorProperties for Token<'s, Variant> { + fn operator_properties(&self) -> OperatorProperties { + self.variant.operator_properties() + } +} + +impl<'s> TokenOperatorProperties for Token<'s> { + fn operator_properties(&self) -> Option { + Some(match self.variant { + Variant::Operator(_) => analyze_non_syntactic_operator(self.code.repr.0), + Variant::AssignmentOperator(op) => op.operator_properties(), + Variant::TypeAnnotationOperator(op) => op.operator_properties(), + Variant::ArrowOperator(op) => op.operator_properties(), + Variant::AnnotationOperator(op) => op.operator_properties(), + Variant::AutoscopeOperator(op) => op.operator_properties(), + Variant::NegationOperator(op) => op.operator_properties(), + Variant::LambdaOperator(op) => op.operator_properties(), + Variant::DotOperator(op) => op.operator_properties(), + Variant::SuspensionOperator(op) => op.operator_properties(), + Variant::CommaOperator(op) => op.operator_properties(), + _ => return None, + }) + } +} + +impl HasOperatorProperties for variant::AssignmentOperator { + fn operator_properties(&self) -> OperatorProperties { + OperatorProperties { + binary_infix_precedence: Some(Precedence { value: 1 }), + lhs_section_termination: Some(SectionTermination::Unwrap), + is_right_associative: true, + is_compile_time: true, + ..default() + } + } +} + +impl HasOperatorProperties for variant::TypeAnnotationOperator { + fn operator_properties(&self) -> OperatorProperties { + OperatorProperties { + binary_infix_precedence: Some(Precedence { value: 2 }), + lhs_section_termination: Some(SectionTermination::Reify), + is_right_associative: true, + is_compile_time: true, + rhs_is_non_expression: true, + ..default() + } + } +} + +impl HasOperatorProperties for variant::ArrowOperator { + fn operator_properties(&self) -> OperatorProperties { + OperatorProperties { + binary_infix_precedence: Some(Precedence { value: 2 }), + lhs_section_termination: Some(SectionTermination::Unwrap), + is_right_associative: true, + is_compile_time: true, + ..default() + } + } +} + +impl HasOperatorProperties for variant::AnnotationOperator { + fn operator_properties(&self) -> OperatorProperties { + OperatorProperties { + unary_prefix_precedence: Some(Precedence::max()), + is_right_associative: true, + is_compile_time: true, + rhs_is_non_expression: true, + ..default() + } + } +} + +impl HasOperatorProperties for variant::AutoscopeOperator { + fn operator_properties(&self) -> OperatorProperties { + OperatorProperties { + unary_prefix_precedence: Some(Precedence::min_valid()), + is_compile_time: true, + rhs_is_non_expression: true, + ..default() + } + } +} + +impl HasOperatorProperties for variant::NegationOperator { + fn operator_properties(&self) -> OperatorProperties { + OperatorProperties { + is_value_operation: true, + unary_prefix_precedence: Some(Precedence::unary_minus()), + ..default() + } + } +} + +impl HasOperatorProperties for variant::LambdaOperator { + fn operator_properties(&self) -> OperatorProperties { + OperatorProperties { + unary_prefix_precedence: Some(Precedence::min_valid()), + is_compile_time: true, + ..default() + } + } +} + +impl HasOperatorProperties for variant::DotOperator { + fn operator_properties(&self) -> OperatorProperties { + OperatorProperties { binary_infix_precedence: Some(Precedence { value: 80 }), ..default() } + } +} + +impl HasOperatorProperties for variant::SuspensionOperator { + fn operator_properties(&self) -> OperatorProperties { + OperatorProperties { + unary_prefix_precedence: Some(Precedence::max()), + is_compile_time: true, + rhs_is_non_expression: true, + ..default() + } + } +} + +impl HasOperatorProperties for variant::CommaOperator { + fn operator_properties(&self) -> OperatorProperties { + OperatorProperties { + binary_infix_precedence: Some(Precedence { value: 1 }), + is_compile_time: true, + rhs_is_non_expression: true, + ..default() + } + } +} + +/// Value that can be compared to determine which operator will bind more tightly within an +/// expression. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Precedence { + /// A numeric value determining precedence order. + pub(super) value: u8, +} + +impl Precedence { + /// Return a precedence that is lower than the precedence of any operator. + pub fn min() -> Self { + Precedence { value: 0 } + } + + /// Return the precedence for any operator. + pub fn min_valid() -> Self { + Precedence { value: 1 } + } + + /// Return a precedence that is not lower than any other precedence. + pub fn max() -> Self { + Precedence { value: 100 } + } + + /// Return the precedence of application. + pub fn application() -> Self { + Precedence { value: 80 } + } + + /// Return the precedence of @annotations. + pub fn annotation() -> Self { + Precedence { value: 79 } + } + + /// Return the precedence of unary minus. + pub fn unary_minus() -> Self { + Precedence { value: 79 } + } + + /// Return the precedence of unary minus when applied to a numeric literal. + pub fn unary_minus_numeric_literal() -> Self { + Precedence { value: 80 } + } + + /// Return the value as a number. + pub fn into_u8(self) -> u8 { + self.value + } +} + +/// Associativity (left or right). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Associativity { + /// Left-associative. + Left, + /// Right-associative. + Right, +} diff --git a/lib/rust/parser/src/syntax/tree.rs b/lib/rust/parser/src/syntax/tree.rs index 2c1741c560..a9764ad01c 100644 --- a/lib/rust/parser/src/syntax/tree.rs +++ b/lib/rust/parser/src/syntax/tree.rs @@ -5,11 +5,14 @@ use crate::source::*; use crate::syntax::*; use crate::span_builder; +use crate::syntax::token::TokenOperatorProperties; +use crate::syntax::treebuilding::Spacing; #[cfg(feature = "debug")] use enso_parser_syntax_tree_visitor::Visitor; + // ============== // === Export === // ============== @@ -32,13 +35,13 @@ pub struct Tree<'s> { #[deref] #[deref_mut] #[reflect(subtype)] - pub variant: Box>, + pub variant: Variant<'s>, } /// Constructor. #[allow(non_snake_case)] pub fn Tree<'s>(span: Span<'s>, variant: impl Into>) -> Tree<'s> { - let variant = Box::new(variant.into()); + let variant = variant.into(); Tree { variant, span, warnings: default() } } @@ -51,7 +54,7 @@ impl<'s> AsRef> for Tree<'s> { impl<'s> Default for Tree<'s> { fn default() -> Self { Self { - variant: Box::new(Variant::Ident(Ident { token: Default::default() })), + variant: Variant::Ident(Box::new(Ident { token: Default::default() })), span: Span::empty_without_offset(), warnings: default(), } @@ -63,10 +66,9 @@ impl<'s> Default for Tree<'s> { #[macro_export] macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)* /// [`Tree`] variants definition. See its docs to learn more. - #[tagged_enum] + #[tagged_enum(boxed)] #[cfg_attr(feature = "debug", derive(Visitor))] #[derive(Clone, Eq, PartialEq, Serialize, Reflect, Deserialize)] - #[allow(clippy::large_enum_variant)] // Inefficient. Will be fixed in #182878443. #[tagged_enum(apply_attributes_to = "variants")] #[reflect(inline)] pub enum Variant<'s> { @@ -144,7 +146,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) pub func: Tree<'s>, pub open: Option>, pub name: token::Ident<'s>, - pub equals: token::Operator<'s>, + pub equals: token::AssignmentOperator<'s>, pub arg: Tree<'s>, pub close: Option>, }, @@ -159,12 +161,12 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) /// Application of a unary operator, like `-a` or `~handler`. It is a syntax error for `rhs` /// to be `None`. UnaryOprApp { - pub opr: token::Operator<'s>, + pub opr: token::UnaryOperator<'s>, pub rhs: Option>, }, /// Application of the autoscope operator to an identifier, e.g. `..True`. AutoscopedIdentifier { - pub opr: token::Operator<'s>, + pub opr: token::AutoscopeOperator<'s>, pub ident: token::Ident<'s>, }, /// Defines the point where operator sections should be expanded to lambdas. Let's consider @@ -196,7 +198,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) /// - Type constructors definitions. /// - Bindings, defining either methods or type methods. TypeDef { - pub keyword: token::Ident<'s>, + pub keyword: token::TypeKeyword<'s>, pub name: token::Ident<'s>, pub params: Vec>, pub body: Vec>, @@ -206,7 +208,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) /// The pattern which should be unified with the expression. pub pattern: Tree<'s>, /// The `=` token. - pub equals: token::Operator<'s>, + pub equals: token::AssignmentOperator<'s>, /// The expression initializing the value(s) in the pattern. pub expr: Tree<'s>, }, @@ -219,7 +221,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) /// An optional specification of return type, like `-> Integer`. pub returns: Option>, /// The `=` token. - pub equals: token::Operator<'s>, + pub equals: token::AssignmentOperator<'s>, /// The body, which will typically be an inline expression or a `BodyBlock` expression. /// It is an error for this to be empty. pub body: Option>, @@ -227,7 +229,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) /// A foreign function definition. ForeignFunction { /// The `foreign` keyword. - pub foreign: token::Ident<'s>, + pub foreign: token::ForeignKeyword<'s>, /// The function's language. pub language: token::Ident<'s>, /// The name to which the function should be bound. @@ -235,7 +237,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) /// The argument patterns. pub args: Vec>, /// The `=` token. - pub equals: token::Operator<'s>, + pub equals: token::AssignmentOperator<'s>, /// The body, which is source code for the specified language. pub body: Tree<'s>, }, @@ -244,7 +246,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) pub polyglot: Option>, pub from: Option>, pub import: MultiSegmentAppSegment<'s>, - pub all: Option>, + pub all: Option>, #[reflect(rename = "as")] pub as_: Option>, pub hiding: Option>, @@ -253,10 +255,8 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) Export { pub from: Option>, pub export: MultiSegmentAppSegment<'s>, - pub all: Option>, #[reflect(rename = "as")] pub as_: Option>, - pub hiding: Option>, }, /// An expression grouped by matched parentheses. Group { @@ -269,7 +269,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) /// (Qualified) name of the item whose type is being declared. pub variable: Tree<'s>, /// The `:` token. - pub operator: token::Operator<'s>, + pub operator: token::TypeAnnotationOperator<'s>, /// The variable's type. #[reflect(rename = "type")] pub type_: Tree<'s>, @@ -279,21 +279,21 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) /// The expression whose type is being annotated. pub expression: Tree<'s>, /// The `:` token. - pub operator: token::Operator<'s>, + pub operator: token::TypeAnnotationOperator<'s>, /// The expression's type. #[reflect(rename = "type")] pub type_: Tree<'s>, }, /// A `case _ of` pattern-matching expression. CaseOf { - pub case: token::Ident<'s>, + pub case: token::CaseKeyword<'s>, pub expression: Option>, - pub of: token::Ident<'s>, + pub of: token::OfKeyword<'s>, pub cases: Vec>, }, /// A lambda expression. Lambda { - pub operator: token::Operator<'s>, + pub operator: token::LambdaOperator<'s>, pub arrow: Option>, }, /// An array literal. @@ -316,7 +316,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) /// Table.select_columns : Vector Text | Column_Selector -> Boolean -> Problem_Behavior -> Table /// ``` Annotated { - pub token: token::Operator<'s>, + pub token: token::AnnotationOperator<'s>, pub annotation: token::Ident<'s>, pub argument: Option>, pub newlines: Vec>, @@ -324,7 +324,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) }, /// An expression preceded by a special built-in annotation, e.g. `@Tail_Call foo 4`. AnnotatedBuiltin { - pub token: token::Operator<'s>, + pub token: token::AnnotationOperator<'s>, pub annotation: token::Ident<'s>, pub newlines: Vec>, pub expression: Option>, @@ -510,7 +510,7 @@ impl<'s> span::Builder<'s> for DocComment<'s> { #[allow(missing_docs)] pub struct FractionalDigits<'s> { /// The dot operator. - pub dot: token::Operator<'s>, + pub dot: token::DotOperator<'s>, /// The decimal digits after the dot. pub digits: token::Digits<'s>, } @@ -533,7 +533,7 @@ pub struct ArgumentDefinition<'s> { /// Opening parenthesis (inner). pub open2: Option>, /// An optional execution-suspension unary operator (~). - pub suspension: Option>, + pub suspension: Option>, /// The pattern being bound to an argument. pub pattern: Tree<'s>, /// An optional type ascribed to an argument. @@ -565,7 +565,7 @@ impl<'s> span::Builder<'s> for ArgumentDefinition<'s> { #[derive(Clone, Debug, Eq, PartialEq, Serialize, Reflect, Deserialize)] pub struct ArgumentDefault<'s> { /// The `=` token. - pub equals: token::Operator<'s>, + pub equals: token::AssignmentOperator<'s>, /// The default value. pub expression: Tree<'s>, } @@ -581,7 +581,7 @@ impl<'s> span::Builder<'s> for ArgumentDefault<'s> { #[derive(Clone, Debug, Eq, PartialEq, Serialize, Reflect, Deserialize)] pub struct ArgumentType<'s> { /// The `:` token. - pub operator: token::Operator<'s>, + pub operator: token::TypeAnnotationOperator<'s>, /// The type. #[reflect(rename = "type")] pub type_: Tree<'s>, @@ -598,7 +598,7 @@ impl<'s> span::Builder<'s> for ArgumentType<'s> { #[derive(Clone, Debug, Eq, PartialEq, Serialize, Reflect, Deserialize)] pub struct ReturnSpecification<'s> { /// The `->` operator. - pub arrow: token::Operator<'s>, + pub arrow: token::ArrowOperator<'s>, /// The function's return type. #[reflect(rename = "type")] pub r#type: Tree<'s>, @@ -650,7 +650,7 @@ pub struct Case<'s> { /// The pattern being matched. It is an error for this to be absent. pub pattern: Option>, /// Token. - pub arrow: Option>, + pub arrow: Option>, /// The expression associated with the pattern. It is an error for this to be empty. pub expression: Option>, } @@ -686,7 +686,7 @@ pub type OperatorOrError<'s> = Result, MultipleOperatorError #[derive(Clone, Debug, Eq, PartialEq, Serialize, Reflect, Deserialize)] #[allow(missing_docs)] pub struct MultipleOperatorError<'s> { - pub operators: NonEmptyVec>, + pub operators: Box>>, } impl<'s> span::Builder<'s> for MultipleOperatorError<'s> { @@ -799,6 +799,72 @@ enum WarningId { pub const WARNINGS: [&str; WarningId::NUM_WARNINGS as usize] = ["Spacing is inconsistent with operator precedence"]; +#[allow(missing_copy_implementations)] // Future errors may have attached information. +#[derive(Debug)] +#[allow(missing_docs)] // See associated messages defined below. +pub enum SyntaxError { + ArgDefUnexpectedOpInParenClause, + ArgDefSpuriousParens, + ArgDefExpectedPattern, + ExpectedExpression, + ExpectedPattern, + ExpectedQualifiedName, + ExpectedType, + ForeignFnExpectedLanguage, + ForeignFnExpectedName, + ForeignFnExpectedStringBody, + StmtInvalidAssignmentOrMethod, + StmtLhsInvalidOperatorSpacing, + StmtUnexpectedPrivateUsage, + TypeBodyUnexpectedPrivateUsage, + TypeDefExpectedTypeName, + ExprUnexpectedAssignment, + ExprUnclosedParen, + UnexpectedExpressionInTypeBody, + ImportsNoAllInExport, + ImportsNoHidingInExport, + ImportsExpectedNameInExport, + AnnotationOpMustBeAppliedToIdent, + PatternUnexpectedExpression, + PatternUnexpectedDot, +} + +impl From for Cow<'static, str> { + fn from(error: SyntaxError) -> Self { + use SyntaxError::*; + (match error { + AnnotationOpMustBeAppliedToIdent => "The annotation operator must be applied to an identifier", + ArgDefUnexpectedOpInParenClause => "Unexpected operator in parenthesized argument definition clause", + ArgDefSpuriousParens => "Invalid parentheses in argument definition", + ArgDefExpectedPattern => "Expected identifier or wildcard in argument binding", + ExpectedExpression => "Expected expression", + ExpectedPattern => "Expected pattern", + ExpectedQualifiedName => "Expected qualified name.", + ExpectedType => "Expected type", + ForeignFnExpectedLanguage => "Expected language name in foreign function definition", + ForeignFnExpectedName => "Expected function name in foreign function definition", + ForeignFnExpectedStringBody => "The body of a foreign function must be a text literal", + StmtInvalidAssignmentOrMethod => "Invalid assignment or method definition", + StmtLhsInvalidOperatorSpacing => + "Each operator on the left side of an assignment operator must be applied to two operands, with the same spacing on each side", + StmtUnexpectedPrivateUsage => + "In a body block, the `private` keyword can only be applied to a function definition", + TypeBodyUnexpectedPrivateUsage => + "In a type definition, the `private` keyword can only be applied to a constructor or function definition", + TypeDefExpectedTypeName => "Expected type identifier in type declaration", + ExprUnexpectedAssignment => "Unexpected use of assignment operator in expression", + ExprUnclosedParen => "Unclosed parenthesis in expression", + UnexpectedExpressionInTypeBody => "Expression unexpected in type definition", + ImportsExpectedNameInExport => "Expected name following `export` keyword", + ImportsNoAllInExport => "`all` not allowed in `export` statement", + ImportsNoHidingInExport => "`hiding` not allowed in `export` statement", + PatternUnexpectedExpression => "Expression invalid in a pattern", + PatternUnexpectedDot => "In a pattern, the dot operator can only be used in a qualified name", + }) + .into() + } +} + // ==================================== @@ -810,25 +876,7 @@ pub const WARNINGS: [&str; WarningId::NUM_WARNINGS as usize] = /// For most input types, this simply constructs an `App`; however, for some operand types /// application has special semantics. pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> { - match (&mut *func.variant, &mut *arg.variant) { - (Variant::Annotated(func_ @ Annotated { argument: None, .. }), _) => { - func.span.code_length += arg.span.length_including_whitespace(); - func_.argument = maybe_apply(mem::take(&mut func_.argument), arg).into(); - func - } - (Variant::AnnotatedBuiltin(func_), _) => { - func.span.code_length += arg.span.length_including_whitespace(); - func_.expression = maybe_apply(mem::take(&mut func_.expression), arg).into(); - func - } - ( - Variant::OprApp(OprApp { lhs: Some(_), opr: Ok(_), rhs: rhs @ None }), - Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs: None, arguments }), - ) => { - func.span.code_length += arg.span.length_including_whitespace(); - *rhs = block::body_from_lines(mem::take(arguments)).into(); - func - } + match (&mut func.variant, &mut arg.variant) { (_, Variant::ArgumentBlockApplication(block)) if block.lhs.is_none() => { let code = func.span.code_length + arg.span.left_offset.code.length() + arg.span.code_length; @@ -839,7 +887,7 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> { first.newline.left_offset += arg_left_offset; } block.lhs = Some(func); - arg + return arg; } (_, Variant::OperatorBlockApplication(block)) if block.lhs.is_none() => { let code = @@ -851,30 +899,16 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> { first.newline.left_offset += arg_left_offset; } block.lhs = Some(func); - arg + return arg; } - (_, Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) })) - if opr.properties.is_assignment() - && let Variant::Ident(lhs) = &*lhs.variant => - { - let mut lhs = lhs.token.clone(); - lhs.left_offset += arg.span.left_offset; - Tree::named_app(func, None, lhs, opr.clone(), rhs.clone(), None) - } - (_, Variant::Group(Group { open: Some(open), body: Some(body), close: Some(close) })) - if let box Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }) = - &body.variant - && opr.properties.is_assignment() - && let Variant::Ident(lhs) = &*lhs.variant => - { - let mut open = open.clone(); - open.left_offset += arg.span.left_offset; - let open = Some(open); - let close = Some(close.clone()); - Tree::named_app(func, open, lhs.token.clone(), opr.clone(), rhs.clone(), close) - } - _ => Tree::app(func, arg), + _ => {} } + let error = match Spacing::of_tree(&arg) { + Spacing::Spaced => None, + Spacing::Unspaced if matches!(arg.variant, Variant::SuspendedDefaultArguments(_)) => None, + Spacing::Unspaced => Some("Space required between terms."), + }; + maybe_with_error(Tree::app(func, arg), error) } fn maybe_apply<'s>(f: Option>, x: Tree<'s>) -> Tree<'s> { @@ -890,142 +924,59 @@ fn maybe_apply<'s>(f: Option>, x: Tree<'s>) -> Tree<'s> { /// `ArgumentBlock`) is reinterpreted as a `BodyBlock` when it appears in the RHS of an operator /// expression. pub fn apply_operator<'s>( - mut lhs: Option>, - opr: Vec>, - mut rhs: Option>, + lhs: Option>, + opr: Vec>, + rhs: Option>, ) -> Tree<'s> { let opr = match opr.len() { - 0 => return apply(lhs.unwrap(), rhs.unwrap()), + 0 => unreachable!(), 1 => Ok(opr.into_iter().next().unwrap()), - _ => Err(MultipleOperatorError { operators: NonEmptyVec::try_from(opr).unwrap() }), - }; - if let Ok(opr_) = &opr - && opr_.properties.is_token_joiner() - && let Some(lhs_) = lhs.as_mut() - && let Some(rhs_) = rhs.as_mut() - { - return match (&mut *lhs_.variant, &mut *rhs_.variant) { - ( - Variant::Number(func_ @ Number { base: _, integer: None, fractional_digits: None }), - Variant::Number(Number { base: None, integer, fractional_digits }), - ) => { - func_.integer = mem::take(integer); - func_.fractional_digits = mem::take(fractional_digits); - lhs_.span.code_length += rhs_.span.code_length; - lhs.take().unwrap() - } - _ => { - debug_assert!(false, "Unexpected use of token-joiner operator!"); - apply(lhs.take().unwrap(), rhs.take().unwrap()) - } - }; - } - if let Ok(opr_) = &opr - && opr_.properties.is_special() - { - let tree = Tree::opr_app(lhs, opr, rhs); - return tree.with_error("Invalid use of special operator."); - } - if let Ok(opr_) = &opr - && opr_.properties.is_type_annotation() - { - return match (lhs, rhs) { - (Some(lhs), Some(rhs)) => Tree::type_annotated(lhs, opr.unwrap(), rhs), - (lhs, rhs) => { - let invalid = Tree::opr_app(lhs, opr, rhs); - invalid.with_error("`:` operator must be applied to two operands.") - } - }; - } - if let Ok(opr_) = &opr - && !opr_.properties.can_form_section() - && lhs.is_none() - && rhs.is_none() - { - let error = format!("Operator `{opr:?}` must be applied to two operands."); - let invalid = Tree::opr_app(lhs, opr, rhs); - return invalid.with_error(error); - } - if let Ok(opr) = &opr - && opr.properties.is_decimal() - && let Some(lhs) = lhs.as_mut() - && let box Variant::Number(lhs_) = &mut lhs.variant - && lhs_.fractional_digits.is_none() - && let Some(rhs) = rhs.as_mut() - && let box Variant::Number(Number { - base: None, - integer: Some(digits), - fractional_digits: None, - }) = &mut rhs.variant - { - let dot = opr.clone(); - let digits = digits.clone(); - lhs.span.code_length += dot.code.length() + rhs.span.code_length; - lhs_.fractional_digits = Some(FractionalDigits { dot, digits }); - return lhs.clone(); - } - if let Some(rhs_) = rhs.as_mut() { - if let Variant::ArgumentBlockApplication(block) = &mut *rhs_.variant { - if block.lhs.is_none() { - if let Some(first) = block.arguments.first_mut() { - first.newline.left_offset += rhs_.span.left_offset.take_as_prefix(); - } - let ArgumentBlockApplication { lhs: _, arguments } = block; - let arguments = mem::take(arguments); - *rhs_ = block::body_from_lines(arguments); - } - } - } - Tree::opr_app(lhs, opr, rhs) -} - -/// Apply a unary operator to an operand. -/// -/// For most inputs this will simply construct a `UnaryOprApp`; however, some operators are special. -pub fn apply_unary_operator<'s>(opr: token::Operator<'s>, rhs: Option>) -> Tree<'s> { - if opr.properties.is_annotation() - && let Some(Tree { variant: box Variant::Ident(Ident { token }), .. }) = rhs - { - return match token.is_type { - true => Tree::annotated_builtin(opr, token, vec![], None), - false => Tree::annotated(opr, token, None, vec![], None), - }; - } - if opr.properties.is_autoscope() - && let Some(rhs) = rhs - { - return if let box Variant::Ident(Ident { mut token }) = rhs.variant { - let applied_to_type = token.variant.is_type; - token.left_offset = rhs.span.left_offset; - let autoscope_application = Tree::autoscoped_identifier(opr, token); - return if applied_to_type { - autoscope_application - } else { - autoscope_application.with_error( - "The auto-scope operator may only be applied to a capitalized identifier.", + _ => Err(MultipleOperatorError { + operators: Box::new( + NonEmptyVec::try_from( + opr.into_iter() + .map(|opr| opr.with_variant(token::variant::Operator())) + .collect::>(), ) + .unwrap(), + ), + }), + }; + match opr { + Ok(opr) => { + let error = match (&opr.variant, lhs.as_ref().map(|tree| &tree.variant), &rhs) { + (_, Some(Variant::AutoscopedIdentifier(_)), _) if !opr.is_spaced() => + Some("Space required between term and operator."), + (_, _, None) | (_, None, _) if opr.is_syntactic_binary_operator() => + Some("Operator must be applied to two operands."), + ( + token::Variant::Operator(_) + | token::Variant::DotOperator(_) + | token::Variant::ArrowOperator(_) + | token::Variant::TypeAnnotationOperator(_) + // Old lambda syntax: (a = b) -> a + | token::Variant::AssignmentOperator(_), + _, + _, + ) => None, + _ => Some("Invalid use of syntactic operator in expression"), }; - } else { - Tree::unary_opr_app(opr, Some(rhs)) - .with_error("The auto-scope operator (..) may only be applied to an identifier.") - }; + let tree = match (opr.variant, lhs, rhs) { + (token::Variant::TypeAnnotationOperator(annotation), Some(lhs), Some(rhs)) => + Tree::type_annotated(lhs, opr.with_variant(annotation), rhs), + (_, lhs, rhs) => + Tree::opr_app(lhs, Ok(opr.with_variant(token::variant::Operator())), rhs), + }; + maybe_with_error(tree, error) + } + _ => Tree::opr_app(lhs, opr.map(|opr| opr.with_variant(token::variant::Operator())), rhs), } - if !opr.properties.can_form_section() && rhs.is_none() { - let error = format!("Operator `{opr:?}` must be applied to an operand."); - let invalid = Tree::unary_opr_app(opr, rhs); - return invalid.with_error(error); - } - Tree::unary_opr_app(opr, rhs) } /// Create an AST node for a token. pub fn to_ast(token: Token) -> Tree { match token.variant { token::Variant::Ident(ident) => token.with_variant(ident).into(), - token::Variant::Digits(number) => - Tree::number(None, Some(token.with_variant(number)), None), - token::Variant::NumberBase(base) => - Tree::number(Some(token.with_variant(base)), None, None), token::Variant::Wildcard(wildcard) => Tree::wildcard(token.with_variant(wildcard), default()), token::Variant::SuspendedDefaultArguments(t) => Tree::suspended_default_arguments(token.with_variant(t)), token::Variant::OpenSymbol(s) => @@ -1039,7 +990,24 @@ pub fn to_ast(token: Token) -> Tree { | token::Variant::BlockEnd(_) // This should be unreachable: `Precedence::resolve` doesn't calls `to_ast` for operators. | token::Variant::Operator(_) + | token::Variant::AssignmentOperator(_) + | token::Variant::TypeAnnotationOperator(_) + | token::Variant::ArrowOperator(_) + | token::Variant::AutoscopeOperator(_) + | token::Variant::UnaryOperator(_) + | token::Variant::NegationOperator(_) + | token::Variant::LambdaOperator(_) + | token::Variant::DotOperator(_) + | token::Variant::SuspensionOperator(_) + | token::Variant::AnnotationOperator(_) + | token::Variant::CommaOperator(_) + // Keywords are handled by macros. | token::Variant::Private(_) + | token::Variant::TypeKeyword(_) + | token::Variant::ForeignKeyword(_) + | token::Variant::AllKeyword(_) + | token::Variant::CaseKeyword(_) + | token::Variant::OfKeyword(_) // Handled during compound-token assembly. | token::Variant::TextStart(_) | token::Variant::TextSection(_) @@ -1047,6 +1015,8 @@ pub fn to_ast(token: Token) -> Tree { | token::Variant::TextEnd(_) | token::Variant::TextInitialNewline(_) | token::Variant::TextNewline(_) + | token::Variant::Digits(_) + | token::Variant::NumberBase(_) // Map an error case in the lexer to an error in the AST. | token::Variant::Invalid(_) => { let message = format!("Unexpected token: {token:?}"); @@ -1184,6 +1154,7 @@ spanless_leaf_impls!(Cow<'static, str>); // === ItemVisitable special cases === + #[cfg(feature = "debug")] impl<'s, 'a> ItemVisitable<'s, 'a> for Tree<'s> { fn visit_item>(&'a self, visitor: &mut V) { @@ -1202,6 +1173,13 @@ where &'a Token<'s, T>: Into> } } +#[cfg(feature = "debug")] +impl<'s, 'a, T: ItemVisitable<'s, 'a>> ItemVisitable<'s, 'a> for Box { + fn visit_item>(&'a self, visitor: &mut V) { + Box::as_ref(self).visit_item(visitor) + } +} + // ========================== @@ -1295,3 +1273,14 @@ impl<'s> Tree<'s> { self.variant.visit_item(&mut ItemFnVisitor { f }); } } + + +// === Helper === + +/// Return the input, or an `Invalid` node with the given error. +pub fn maybe_with_error(tree: Tree, error: Option>>) -> Tree { + match error { + None => tree, + Some(error) => tree.with_error(error.into()), + } +} diff --git a/lib/rust/parser/src/syntax/tree/block.rs b/lib/rust/parser/src/syntax/tree/block.rs index 469af66607..20d543ad96 100644 --- a/lib/rust/parser/src/syntax/tree/block.rs +++ b/lib/rust/parser/src/syntax/tree/block.rs @@ -2,6 +2,8 @@ use crate::syntax::tree::*; +use crate::syntax::statement::BodyBlockParser; + // ============= @@ -46,19 +48,16 @@ impl<'s> span::Builder<'s> for Line<'s> { // === Body Block === // ================== -/// Build a body block from a sequence of lines; this includes: -/// - Reinterpret the input expressions in statement context (i.e. expressions at the top-level of -/// the block that involve the `=` operator will be reinterpreted as function/variable bindings). -/// - Combine sibling lines in case of multi-line statements, such as annotated statements and -/// documented statements. -pub fn body_from_lines<'s>(lines: impl IntoIterator>) -> Tree<'s> { - use crate::expression_to_statement; - let lines = lines.into_iter().map(|l| l.map_expression(expression_to_statement)); - let statements: Vec<_> = compound_lines(lines).collect(); - Tree::body_block(statements) +/// Parse the top-level of a module. +pub fn parse_module<'s>( + lines: impl IntoIterator>, + precedence: &mut operator::Precedence<'s>, +) -> Tree<'s> { + BodyBlockParser::default().parse_module(lines, precedence) } + // === Multi-line expression construction === /// Adapts a sequence of lines by combining sibling lines in case of multi-line statements, such as @@ -133,19 +132,19 @@ where I: Iterator> /// Representation used to build multi-line statements. #[derive(Debug)] enum Prefix<'s> { - Annotation { node: Annotated<'s>, span: Span<'s> }, - BuiltinAnnotation { node: AnnotatedBuiltin<'s>, span: Span<'s> }, - Documentation { node: Documented<'s>, span: Span<'s> }, + Annotation { node: Box>, span: Span<'s> }, + BuiltinAnnotation { node: Box>, span: Span<'s> }, + Documentation { node: Box>, span: Span<'s> }, } impl<'s> TryFrom> for Prefix<'s> { type Error = Tree<'s>; fn try_from(tree: Tree<'s>) -> Result { match tree.variant { - box Variant::Annotated(node) => Ok(Prefix::Annotation { node, span: tree.span }), - box Variant::AnnotatedBuiltin(node @ AnnotatedBuiltin { expression: None, .. }) => + Variant::Annotated(node) => Ok(Prefix::Annotation { node, span: tree.span }), + Variant::AnnotatedBuiltin(node @ box AnnotatedBuiltin { expression: None, .. }) => Ok(Prefix::BuiltinAnnotation { node, span: tree.span }), - box Variant::Documented(node) => Ok(Prefix::Documentation { node, span: tree.span }), + Variant::Documented(node) => Ok(Prefix::Documentation { node, span: tree.span }), _ => Err(tree), } } @@ -154,10 +153,10 @@ impl<'s> TryFrom> for Prefix<'s> { impl<'s> Prefix<'s> { fn push_newline(&mut self, newline: token::Newline<'s>) { let (newlines, span) = match self { - Prefix::Annotation { node: Annotated { newlines, .. }, span } - | Prefix::BuiltinAnnotation { node: AnnotatedBuiltin { newlines, .. }, span } + Prefix::Annotation { node: box Annotated { newlines, .. }, span } + | Prefix::BuiltinAnnotation { node: box AnnotatedBuiltin { newlines, .. }, span } | Prefix::Documentation { - node: Documented { documentation: DocComment { newlines, .. }, .. }, + node: box Documented { documentation: DocComment { newlines, .. }, .. }, span, } => (newlines, span), }; @@ -181,14 +180,11 @@ impl<'s> From> for Tree<'s> { fn from(prefix: Prefix<'s>) -> Self { match prefix { Prefix::Annotation { node, span } => - Tree { variant: Box::new(Variant::Annotated(node)), span, warnings: default() }, - Prefix::BuiltinAnnotation { node, span } => Tree { - variant: Box::new(Variant::AnnotatedBuiltin(node)), - span, - warnings: default(), - }, + Tree { variant: Variant::Annotated(node), span, warnings: default() }, + Prefix::BuiltinAnnotation { node, span } => + Tree { variant: Variant::AnnotatedBuiltin(node), span, warnings: default() }, Prefix::Documentation { node, span } => - Tree { variant: Box::new(Variant::Documented(node)), span, warnings: default() }, + Tree { variant: Variant::Documented(node), span, warnings: default() }, } } } @@ -217,9 +213,10 @@ fn to_operator_block_expression<'s>( if let Some(b) = items.get(1) && b.left_visible_offset().width_in_spaces != 0 && let Some(Item::Token(a)) = items.first() - && let token::Variant::Operator(op) = &a.variant + && let Some(properties) = &a.operator_properties() + && properties.can_form_section() { - let operator = Ok(Token(a.left_offset.clone(), a.code.clone(), *op)); + let operator = Ok(Token(a.left_offset.clone(), a.code.clone(), token::variant::Operator())); let mut items = items.into_iter(); items.next(); let expression = precedence.resolve(items).unwrap(); @@ -266,12 +263,7 @@ impl<'s> span::Builder<'s> for OperatorLine<'s> { // === Block Builder === // ===================== -/// Builds an AST block type from a sequence of lines. -/// -/// Note that the block type is not fully determined at this stage: We apply context information -/// later (see `apply_operator`) to distinguish the two non-operator block types, `BodyBlock` and -/// `ArgumentBlockApplication`. Here we treat every non-operator block as an argument block, -/// because creating a body block involves re-interpreting the expressions in statement context. +/// Builds an argument block or operator block from a sequence of lines. /// /// The implementation is a state machine. The only top-level transitions are: /// - `Indeterminate` -> `Operator` @@ -279,61 +271,36 @@ impl<'s> span::Builder<'s> for OperatorLine<'s> { /// /// The `Operator` state has two substates, and one possible transition: /// - `body_lines is empty` -> `body_lines is not empty` -#[derive(Debug)] -pub enum Builder<'s> { +#[derive(Debug, Default)] +pub struct Builder<'s> { + state: State, + empty_lines: Vec>, + operator_lines: Vec>, + body_lines: Vec>, +} + +#[derive(Debug, Default)] +enum State { /// The builder is in an indeterminate state until a non-empty line has been encountered, which /// would distinguish an operator-block from a non-operator block. - Indeterminate { - /// The `Newline` token introducing the block, and `Newline` tokens for any empty lines - /// that have been encountered. - empty_lines: Vec>, - }, + // `empty_lines` contains the `Newline` token introducing the block, and `Newline` tokens for + // any empty lines that have been encountered. + #[default] + Indeterminate, /// Building an operator block. If any line doesn't fit the operator-block syntax, that line /// and all following will be placed in `body_lines`. - Operator { - /// Valid operator-block expressions. - operator_lines: Vec>, - /// Any lines violating the expected operator-block syntax. - body_lines: Vec>, - }, - /// Building a non-operator block (either a body block or an argument block). - NonOperator { - /// The block content. - body_lines: Vec>, - }, + // `operator_lines` contains valid operator-block expressions. + // `body_lines` contains any lines violating the expected operator-block syntax. + Operator, + /// Building an argument block. + // `body_lines` contains the block content. + Argument, } impl<'s> Builder<'s> { /// Create a new instance, in initial state. pub fn new() -> Self { - Self::Indeterminate { empty_lines: default() } - } - - /// Create a new instance, in a state appropriate for the given expression. - fn new_with_expression( - empty_lines: impl IntoIterator>, - newline: token::Newline<'s>, - items: Vec>, - precedence: &mut operator::Precedence<'s>, - ) -> Self { - let empty_lines = empty_lines.into_iter(); - let new_lines = 1; - match to_operator_block_expression(items, precedence) { - Ok(expression) => { - let expression = Some(expression); - let mut operator_lines = Vec::with_capacity(empty_lines.size_hint().0 + new_lines); - operator_lines.extend(empty_lines.map(OperatorLine::from)); - operator_lines.push(OperatorLine { newline, expression }); - Self::Operator { operator_lines, body_lines: default() } - } - Err(expression) => { - let expression = Some(expression); - let mut body_lines = Vec::with_capacity(empty_lines.size_hint().0 + new_lines); - body_lines.extend(empty_lines.map(Line::from)); - body_lines.push(Line { newline, expression }); - Self::NonOperator { body_lines } - } - } + Self::default() } /// Apply a new line to the state. @@ -343,48 +310,52 @@ impl<'s> Builder<'s> { items: Vec>, precedence: &mut operator::Precedence<'s>, ) { - match self { - Builder::Indeterminate { empty_lines } if items.is_empty() => empty_lines.push(newline), - Builder::Indeterminate { empty_lines } => - *self = Self::new_with_expression(empty_lines.drain(..), newline, items, precedence), - Builder::NonOperator { body_lines, .. } => - body_lines.push(Line { newline, expression: precedence.resolve(items) }), - Builder::Operator { body_lines, .. } if !body_lines.is_empty() => { - body_lines.push(Line { newline, expression: precedence.resolve(items) }); - } - Builder::Operator { operator_lines, body_lines, .. } if !items.is_empty() => - match to_operator_block_expression(items, precedence) { + match &mut self.state { + State::Indeterminate if items.is_empty() => self.empty_lines.push(newline), + State::Indeterminate => { + self.state = match to_operator_block_expression(items, precedence) { Ok(expression) => { - let expression = Some(expression); - operator_lines.push(OperatorLine { newline, expression }); + self.operator_lines + .push(OperatorLine { newline, expression: Some(expression) }); + State::Operator } Err(expression) => { - let expression = Some(expression); - body_lines.push(Line { newline, expression }) + self.body_lines.push(Line { newline, expression: Some(expression) }); + State::Argument } - }, - Builder::Operator { operator_lines, .. } => operator_lines.push(newline.into()), + }; + } + State::Argument => + self.body_lines.push(Line { newline, expression: precedence.resolve(items) }), + State::Operator if !self.body_lines.is_empty() => + self.body_lines.push(Line { newline, expression: precedence.resolve(items) }), + State::Operator if items.is_empty() => self.operator_lines.push(newline.into()), + State::Operator => match to_operator_block_expression(items, precedence) { + Ok(expression) => + self.operator_lines.push(OperatorLine { newline, expression: Some(expression) }), + Err(expression) => + self.body_lines.push(Line { newline, expression: Some(expression) }), + }, } } /// Produce an AST node from the state. - pub fn build(self) -> Tree<'s> { - match self { - Builder::Indeterminate { empty_lines } => { - let empty_lines = empty_lines.into_iter(); - let lines = empty_lines.map(Line::from).collect(); - Tree::argument_block_application(None, lines) + pub fn build(&mut self) -> Tree<'s> { + match self.state { + State::Operator => { + let mut operator_lines = + Vec::with_capacity(self.empty_lines.len() + self.operator_lines.len()); + operator_lines.extend(self.empty_lines.drain(..).map(OperatorLine::from)); + operator_lines.append(&mut self.operator_lines); + Tree::operator_block_application(None, operator_lines, self.body_lines.split_off(0)) + } + State::Argument | State::Indeterminate => { + let mut body_lines = + Vec::with_capacity(self.empty_lines.len() + self.body_lines.len()); + body_lines.extend(self.empty_lines.drain(..).map(Line::from)); + body_lines.append(&mut self.body_lines); + Tree::argument_block_application(None, body_lines) } - Builder::Operator { operator_lines, body_lines } => - Tree::operator_block_application(None, operator_lines, body_lines), - Builder::NonOperator { body_lines } => - Tree::argument_block_application(None, body_lines), } } } - -impl<'s> Default for Builder<'s> { - fn default() -> Self { - Self::new() - } -} diff --git a/lib/rust/parser/src/syntax/treebuilding.rs b/lib/rust/parser/src/syntax/treebuilding.rs index 9100d88136..ac5b8bfa51 100644 --- a/lib/rust/parser/src/syntax/treebuilding.rs +++ b/lib/rust/parser/src/syntax/treebuilding.rs @@ -1,3 +1,4 @@ +use crate::syntax::tree; use crate::syntax::Token; use crate::syntax::Tree; @@ -5,7 +6,7 @@ use crate::syntax::Tree; mod block; mod compound_token; -mod consumer; +mod numbers; mod whitespace; @@ -14,21 +15,21 @@ mod whitespace; // =============== pub use block::FlattenBlockTrees; -pub use compound_token::AssembleCompoundTokens; -pub use consumer::Finish; -pub use consumer::ItemConsumer; -pub use consumer::TreeConsumer; +pub use compound_token::CompoundTokens; +pub use numbers::ParseNumbers; pub use whitespace::PeekSpacing; pub use whitespace::Spacing; pub use whitespace::SpacingLookaheadTokenConsumer; +pub use whitespace::SpacingLookaheadTreeConsumer; // =================== // === TokenOrTree === // =================== +#[allow(missing_docs)] #[derive(Debug)] -enum TokenOrTree<'s> { +pub enum TokenOrTree<'s> { Token(Token<'s>), Tree(Tree<'s>), } @@ -44,3 +45,12 @@ impl<'s> From> for TokenOrTree<'s> { TokenOrTree::Tree(tree) } } + +impl<'s> From> for Tree<'s> { + fn from(t: TokenOrTree<'s>) -> Self { + match t { + TokenOrTree::Token(token) => tree::to_ast(token), + TokenOrTree::Tree(tree) => tree, + } + } +} diff --git a/lib/rust/parser/src/syntax/treebuilding/block.rs b/lib/rust/parser/src/syntax/treebuilding/block.rs index b6f3239918..8f0ea30974 100644 --- a/lib/rust/parser/src/syntax/treebuilding/block.rs +++ b/lib/rust/parser/src/syntax/treebuilding/block.rs @@ -1,9 +1,15 @@ -use crate::syntax; +use crate::prelude::*; + +use crate::syntax::consumer::Finish; +use crate::syntax::consumer::ItemConsumer; +use crate::syntax::consumer::TokenConsumer; +use crate::syntax::consumer::TreeConsumer; +use crate::syntax::item; use crate::syntax::operator; -use crate::syntax::treebuilding::consumer::Finish; -use crate::syntax::treebuilding::consumer::ItemConsumer; -use crate::syntax::treebuilding::consumer::TokenConsumer; -use crate::syntax::treebuilding::consumer::TreeConsumer; +use crate::syntax::statement::BodyBlockParser; +use crate::syntax::token::TokenOperatorProperties; +use crate::syntax::tree::block; +use crate::syntax::GroupHierarchyConsumer; use crate::syntax::Item; @@ -15,30 +21,95 @@ use crate::syntax::Item; /// Consumes `Item`s and passes their content to a token/tree consumer, using an /// [`operator::Precedence`] parser to flatten blocks. #[derive(Debug, Default)] -pub struct FlattenBlockTrees<'s, T> { - inner: T, +pub struct FlattenBlockTrees<'s, Inner> { /// Consumes child blocks. Stores no semantic state, but is reused for performance. - child: Option>>, + child: Option>>, + block_context: BlockContext, + block_builder: block::Builder<'s>, + block_parser: BodyBlockParser<'s>, + inner: Inner, } -impl<'s, T: TokenConsumer<'s> + TreeConsumer<'s>> ItemConsumer<'s> for FlattenBlockTrees<'s, T> { +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] +enum BlockContext { + #[default] + Body, + ArgumentOrOperator, +} + +impl<'s, Inner> ItemConsumer<'s> for FlattenBlockTrees<'s, Inner> +where Inner: TokenConsumer<'s> + TreeConsumer<'s> + GroupHierarchyConsumer<'s> +{ fn push_item(&mut self, item: Item<'s>) { - match item { + self.block_context = match item { Item::Block(lines) => { let mut child = self.child.take().unwrap_or_default(); - self.inner.push_tree(syntax::item::build_block(lines, &mut child)); + self.inner.push_tree(match self.block_context { + BlockContext::Body => + self.block_parser.parse_body_block(lines.into_vec(), &mut child), + BlockContext::ArgumentOrOperator => { + for item::Line { newline, items } in lines.into_vec() { + self.block_builder.push(newline, items, &mut child); + } + self.block_builder.build() + } + }); self.child = Some(child); + BlockContext::ArgumentOrOperator } - Item::Token(token) => self.inner.push_token(token), - Item::Tree(tree) => self.inner.push_tree(tree), - } + Item::Token(token) => { + let properties = token.operator_properties(); + self.inner.push_token(token); + match properties { + Some(properties) if properties.rhs_is_expression() => BlockContext::Body, + _ => BlockContext::ArgumentOrOperator, + } + } + Item::Tree(tree) => { + self.inner.push_tree(tree); + BlockContext::ArgumentOrOperator + } + Item::Group(item::Group { open, body, mut close }) => { + self.inner.start_group(open); + let mut stack = vec![]; + let mut body = body.into_vec().into_iter(); + loop { + while let Some(item) = body.next() { + match item { + Item::Token(token) => self.inner.push_token(token), + Item::Tree(tree) => self.inner.push_tree(tree), + Item::Group(group) => { + self.inner.start_group(group.open); + let outer_body = + mem::replace(&mut body, group.body.into_vec().into_iter()); + let outer_close = mem::replace(&mut close, group.close); + stack.push((outer_body, outer_close)); + continue; + } + Item::Block(_) => unreachable!(), + } + } + if let Some(close) = close { + self.inner.end_group(close); + } + if let Some((outer_body, outer_close)) = stack.pop() { + body = outer_body; + close = outer_close; + } else { + break; + } + } + BlockContext::ArgumentOrOperator + } + }; } } -impl<'s, T: Finish> Finish for FlattenBlockTrees<'s, T> { - type Result = T::Result; +impl<'s, Inner: Finish> Finish for FlattenBlockTrees<'s, Inner> { + type Result = Inner::Result; fn finish(&mut self) -> Self::Result { + self.block_context = default(); self.inner.finish() } } diff --git a/lib/rust/parser/src/syntax/treebuilding/compound_token.rs b/lib/rust/parser/src/syntax/treebuilding/compound_token.rs index 1c3c2c1938..6d95c864ec 100644 --- a/lib/rust/parser/src/syntax/treebuilding/compound_token.rs +++ b/lib/rust/parser/src/syntax/treebuilding/compound_token.rs @@ -1,11 +1,14 @@ use enso_prelude::*; use crate::syntax; +use crate::syntax::consumer::Finish; +use crate::syntax::consumer::TokenConsumer; +use crate::syntax::consumer::TreeConsumer; +use crate::syntax::maybe_with_error; use crate::syntax::token; -use crate::syntax::treebuilding::consumer::Finish; -use crate::syntax::treebuilding::consumer::TokenConsumer; -use crate::syntax::treebuilding::consumer::TreeConsumer; +use crate::syntax::GroupHierarchyConsumer; use crate::syntax::Token; +use crate::syntax::Tree; @@ -15,88 +18,50 @@ use crate::syntax::Token; /// Recognizes lexical tokens that are indivisible, and assembles them into trees. #[derive(Default, Debug)] -pub struct AssembleCompoundTokens<'s, T> { +pub struct CompoundTokens<'s, Inner> { compounding: Option>, - inner: T, + inner: Inner, } -#[derive(Debug)] -enum CompoundToken<'s> { - TextLiteral(TextLiteralBuilder<'s>), -} - -impl<'s, T: TreeConsumer<'s> + TokenConsumer<'s>> TokenConsumer<'s> - for AssembleCompoundTokens<'s, T> -{ - fn push_token(&mut self, token: Token<'s>) { - match (&mut self.compounding, token.variant) { - (this @ None, token::Variant::TextStart(variant)) => { - let token = token.with_variant(variant); - *this = Some(CompoundToken::TextLiteral(TextLiteralBuilder { - open: token, - newline: default(), - elements: default(), - })); +impl<'s, Inner: TreeConsumer<'s> + TokenConsumer<'s>> CompoundTokens<'s, Inner> { + fn try_start(&mut self, token: Token<'s>) { + match CompoundToken::start(token) { + StartStep::Start(compounding) => self.compounding = Some(compounding), + StartStep::RejectButStart(compounding, token) => { + self.inner.push_token(token); + self.compounding = Some(compounding) } - ( - Some(CompoundToken::TextLiteral(TextLiteralBuilder { - newline: newline @ None, - .. - })), - token::Variant::TextInitialNewline(_), - ) => { - let token = token::newline(token.left_offset, token.code); - *newline = Some(token); - } - ( - Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })), - token::Variant::TextSection(variant), - ) => { - let token = token.with_variant(variant); - let element = syntax::tree::TextElement::Section { text: token }; - elements.push(element); - } - ( - Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })), - token::Variant::TextEscape(variant), - ) => { - let token = token.with_variant(variant); - let element = syntax::tree::TextElement::Escape { token }; - elements.push(element); - } - ( - Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })), - token::Variant::TextNewline(_), - ) => { - let token = token::newline(token.left_offset, token.code); - let element = syntax::tree::TextElement::Newline { newline: token }; - elements.push(element); - } - (this @ Some(CompoundToken::TextLiteral(_)), token::Variant::TextEnd(variant)) => { - let builder = match mem::take(this) { - Some(CompoundToken::TextLiteral(builder)) => builder, - _ => unreachable!(), - }; - let close = token.with_variant(variant); - self.inner.push_tree(builder.finish(Some(close))); - } - (_, token::Variant::TextStart(_)) => unreachable!(), - (_, token::Variant::TextInitialNewline(_)) => unreachable!(), - (_, token::Variant::TextSection(_)) => unreachable!(), - (_, token::Variant::TextEscape(_)) => unreachable!(), - (_, token::Variant::TextNewline(_)) => unreachable!(), - (_, token::Variant::TextEnd(_)) => unreachable!(), - _ => self.inner.push_token(token), + StartStep::Reject(token) => self.inner.push_token(token), } } } -impl<'s, T: TreeConsumer<'s>> TreeConsumer<'s> for AssembleCompoundTokens<'s, T> { - fn push_tree(&mut self, mut tree: syntax::Tree<'s>) { +impl<'s, Inner: TreeConsumer<'s> + TokenConsumer<'s>> TokenConsumer<'s> + for CompoundTokens<'s, Inner> +{ + fn push_token(&mut self, token: Token<'s>) { + if let Some(compounding) = self.compounding.take() { + match compounding.step(token) { + Step::Complete(tree) => self.inner.push_tree(tree), + Step::Accept(compounding) => self.compounding = Some(compounding), + Step::Reject(tree, token) => { + self.inner.push_tree(tree); + self.try_start(token); + } + Step::Return(token) => self.inner.push_token(token), + } + } else { + self.try_start(token); + } + } +} + +impl<'s, Inner: TreeConsumer<'s>> TreeConsumer<'s> for CompoundTokens<'s, Inner> { + fn push_tree(&mut self, mut tree: Tree<'s>) { match (&mut self.compounding, &mut tree.variant) { ( Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })), - box syntax::tree::Variant::TextLiteral(syntax::tree::TextLiteral { + syntax::tree::Variant::TextLiteral(box syntax::tree::TextLiteral { open: None, newline: None, elements: rhs_elements, @@ -118,16 +83,16 @@ impl<'s, T: TreeConsumer<'s>> TreeConsumer<'s> for AssembleCompoundTokens<'s, T> } } -impl<'s, T: TreeConsumer<'s>> AssembleCompoundTokens<'s, T> { +impl<'s, Inner: TreeConsumer<'s>> CompoundTokens<'s, Inner> { fn flush(&mut self) { - if let Some(CompoundToken::TextLiteral(builder)) = mem::take(&mut self.compounding) { - self.inner.push_tree(builder.finish(None)) + if let Some(tree) = self.compounding.take().and_then(|builder| builder.flush()) { + self.inner.push_tree(tree); } } } -impl<'s, T: TreeConsumer<'s> + Finish> Finish for AssembleCompoundTokens<'s, T> { - type Result = T::Result; +impl<'s, Inner: TreeConsumer<'s> + Finish> Finish for CompoundTokens<'s, Inner> { + type Result = Inner::Result; fn finish(&mut self) -> Self::Result { self.flush(); @@ -135,8 +100,112 @@ impl<'s, T: TreeConsumer<'s> + Finish> Finish for AssembleCompoundTokens<'s, T> } } +impl<'s, Inner> GroupHierarchyConsumer<'s> for CompoundTokens<'s, Inner> +where Inner: TreeConsumer<'s> + GroupHierarchyConsumer<'s> +{ + fn start_group(&mut self, open: token::OpenSymbol<'s>) { + self.flush(); + self.inner.start_group(open); + } -// === Text literal builder === + fn end_group(&mut self, close: token::CloseSymbol<'s>) { + self.flush(); + self.inner.end_group(close); + } +} + + +// ============================== +// === Compound token builder === +// ============================== + +trait CompoundTokenBuilder<'s>: Sized { + fn start(token: Token<'s>) -> StartStep>; + fn step(self, token: Token<'s>) -> Step, Tree<'s>>; + fn flush(self) -> Option>; +} + +enum StartStep { + Start(State), + RejectButStart(State, Input), + Reject(Input), +} + +enum Step { + Accept(State), + Reject(Output, Input), + Complete(Output), + Return(Input), +} + +impl Step { + fn map_state(self, f: impl FnOnce(State) -> State2) -> Step { + match self { + Step::Accept(state) => Step::Accept(f(state)), + Step::Reject(input, output) => Step::Reject(input, output), + Step::Complete(output) => Step::Complete(output), + Step::Return(input) => Step::Return(input), + } + } +} + +#[derive(Debug)] +enum CompoundToken<'s> { + TextLiteral(TextLiteralBuilder<'s>), + OperatorIdentifier(OperatorIdentifierBuilder), + Autoscope(AutoscopeBuilder<'s>), +} + +impl StartStep { + fn map_state(self, f: impl FnOnce(State) -> State1) -> StartStep { + match self { + StartStep::Start(state) => StartStep::Start(f(state)), + StartStep::RejectButStart(state, input) => StartStep::RejectButStart(f(state), input), + StartStep::Reject(input) => StartStep::Reject(input), + } + } + + fn or_else(self, f: impl FnOnce(Input) -> StartStep) -> StartStep { + match self { + StartStep::Start(state) => StartStep::Start(state), + StartStep::RejectButStart(state, input) => StartStep::RejectButStart(state, input), + StartStep::Reject(input) => f(input), + } + } +} + +impl<'s> CompoundTokenBuilder<'s> for CompoundToken<'s> { + fn start(token: Token<'s>) -> StartStep> { + use CompoundToken::*; + StartStep::Reject(token) + .or_else(|token| TextLiteralBuilder::start(token).map_state(TextLiteral)) + .or_else(|token| OperatorIdentifierBuilder::start(token).map_state(OperatorIdentifier)) + .or_else(|token| AutoscopeBuilder::start(token).map_state(Autoscope)) + } + + fn step(self, token: Token<'s>) -> Step, Tree<'s>> { + use CompoundToken::*; + match self { + TextLiteral(builder) => builder.step(token).map_state(TextLiteral), + OperatorIdentifier(builder) => builder.step(token).map_state(OperatorIdentifier), + Autoscope(builder) => builder.step(token).map_state(Autoscope), + } + } + + fn flush(self) -> Option> { + use CompoundToken::*; + match self { + TextLiteral(builder) => builder.flush(), + OperatorIdentifier(builder) => builder.flush(), + Autoscope(builder) => builder.flush(), + } + } +} + + +// ===================== +// === Text literals === +// ===================== #[derive(Debug)] struct TextLiteralBuilder<'s> { @@ -145,17 +214,158 @@ struct TextLiteralBuilder<'s> { elements: Vec>, } +impl<'s> CompoundTokenBuilder<'s> for TextLiteralBuilder<'s> { + fn start(token: Token<'s>) -> StartStep> { + match token.variant { + token::Variant::TextStart(variant) => { + let token = token.with_variant(variant); + StartStep::Start(Self { open: token, newline: default(), elements: default() }) + } + _ => StartStep::Reject(token), + } + } + + fn step(mut self, token: Token<'s>) -> Step, Tree<'s>> { + match token.variant { + token::Variant::TextInitialNewline(_) => { + let token = token::newline(token.left_offset, token.code); + self.newline = Some(token); + Step::Accept(self) + } + token::Variant::TextSection(variant) => { + let token = token.with_variant(variant); + let element = syntax::tree::TextElement::Section { text: token }; + self.elements.push(element); + Step::Accept(self) + } + token::Variant::TextEscape(variant) => { + let token = token.with_variant(variant); + let element = syntax::tree::TextElement::Escape { token }; + self.elements.push(element); + Step::Accept(self) + } + token::Variant::TextNewline(_) => { + let token = token::newline(token.left_offset, token.code); + let element = syntax::tree::TextElement::Newline { newline: token }; + self.elements.push(element); + Step::Accept(self) + } + token::Variant::TextEnd(variant) => { + let close = token.with_variant(variant); + Step::Complete(self.finish(close)) + } + _ => unreachable!(), + } + } + + fn flush(self) -> Option> { + let Self { open, newline, elements } = self; + Some(Tree::text_literal(Some(open), newline, elements, None)) + } +} + impl<'s> TextLiteralBuilder<'s> { - fn finish(self, close: Option>) -> syntax::Tree<'s> { + fn finish(self, close: token::TextEnd<'s>) -> Tree<'s> { let Self { open, newline, elements } = self; if open.code.starts_with('#') { assert_eq!(newline, None); let doc = syntax::tree::DocComment { open, elements, newlines: default() }; - syntax::Tree::documented(doc, default()) + Tree::documented(doc, default()) } else { - let close = - close.and_then(|close| if close.code.is_empty() { None } else { Some(close) }); - syntax::Tree::text_literal(Some(open), newline, elements, close) + let close = if close.code.is_empty() { None } else { Some(close) }; + Tree::text_literal(Some(open), newline, elements, close) } } } + + +// ============================ +// === Operator-identifiers === +// ============================ + +#[derive(Debug)] +struct OperatorIdentifierBuilder; + +impl<'s> CompoundTokenBuilder<'s> for OperatorIdentifierBuilder { + fn start(token: Token<'s>) -> StartStep> { + match token.variant { + token::Variant::DotOperator(_) => StartStep::RejectButStart(Self, token), + _ => StartStep::Reject(token), + } + } + + fn step(self, token: Token<'s>) -> Step, Tree<'s>> { + match token.variant { + token::Variant::Operator(_) + | token::Variant::NegationOperator(_) + | token::Variant::UnaryOperator(_) + if token.left_offset.visible.width_in_spaces == 0 => + Step::Return(token.with_variant(token::Variant::operator_ident().into())), + _ => Step::Return(token), + } + } + + fn flush(self) -> Option> { + None + } +} + + +// ================= +// === Autoscope === +// ================= + +#[derive(Debug)] +struct AutoscopeBuilder<'s> { + operator: token::AutoscopeOperator<'s>, +} + +impl<'s> CompoundTokenBuilder<'s> for AutoscopeBuilder<'s> { + fn start(token: Token<'s>) -> StartStep> { + match token.variant { + token::Variant::AutoscopeOperator(variant) => { + let operator = token.with_variant(variant); + StartStep::Start(Self { operator }) + } + _ => StartStep::Reject(token), + } + } + + fn step(self, token: Token<'s>) -> Step, Tree<'s>> { + match token.variant { + token::Variant::Ident(ident) if !token.is_spaced() => { + let Self { operator } = self; + let token = token.with_variant(ident); + let error = (!token.variant.is_type).then_some( + "The auto-scope operator may only be applied to a capitalized identifier.", + ); + let autoscope_application = Tree::autoscoped_identifier(operator, token); + Step::Complete(maybe_with_error(autoscope_application, error)) + } + _ => Step::Reject(self.into_error(), token), + } + } + + fn flush(self) -> Option> { + Some(self.into_error()) + } +} + +impl<'s> AutoscopeBuilder<'s> { + fn into_error(self) -> Tree<'s> { + let Self { operator } = self; + token_to_error(operator, "The autoscope operator must be applied to an identifier.") + } +} + + +// =============== +// === Helpers === +// =============== + +fn token_to_error<'s>( + token: impl Into>, + error: impl Into>, +) -> Tree<'s> { + syntax::tree::to_ast(token.into()).with_error(error) +} diff --git a/lib/rust/parser/src/syntax/treebuilding/consumer.rs b/lib/rust/parser/src/syntax/treebuilding/consumer.rs deleted file mode 100644 index a9e0b8b84a..0000000000 --- a/lib/rust/parser/src/syntax/treebuilding/consumer.rs +++ /dev/null @@ -1,23 +0,0 @@ -use crate::syntax::Item; -use crate::syntax::Token; -use crate::syntax::Tree; - - - -pub trait ItemConsumer<'s> { - fn push_item(&mut self, tree: Item<'s>); -} - -pub trait TreeConsumer<'s> { - fn push_tree(&mut self, tree: Tree<'s>); -} - -pub trait TokenConsumer<'s> { - fn push_token(&mut self, token: Token<'s>); -} - -pub trait Finish { - type Result; - - fn finish(&mut self) -> Self::Result; -} diff --git a/lib/rust/parser/src/syntax/treebuilding/numbers.rs b/lib/rust/parser/src/syntax/treebuilding/numbers.rs new file mode 100644 index 0000000000..2debca7b19 --- /dev/null +++ b/lib/rust/parser/src/syntax/treebuilding/numbers.rs @@ -0,0 +1,178 @@ +use crate::prelude::*; + +use crate::syntax::token; +use crate::syntax::tree; +use crate::syntax::Finish; +use crate::syntax::GroupHierarchyConsumer; +use crate::syntax::Token; +use crate::syntax::TokenConsumer; +use crate::syntax::Tree; +use crate::syntax::TreeConsumer; + + + +#[derive(Debug, Default)] +pub struct ParseNumbers<'s, Inner> { + state: State<'s>, + inner: Inner, +} + +#[derive(Debug, Default)] +struct State<'s> { + prev_item_in_expression: bool, + negation: Option>, + number: Option>, +} + +#[derive(Debug)] +enum Number<'s> { + Based { base: token::NumberBase<'s> }, + Fractional { digits: token::Digits<'s>, dot: Option> }, +} + +impl<'s, Inner: TokenConsumer<'s> + TreeConsumer<'s>> TokenConsumer<'s> + for ParseNumbers<'s, Inner> +{ + fn push_token(&mut self, token: Token<'s>) { + match (token.variant, &mut self.state) { + (token::Variant::Digits(variant), State { number: Some(Number::Based { .. }), .. }) => { + let State { negation, number: Some(Number::Based { base }), .. } = + mem::take(&mut self.state) + else { + unreachable!() + }; + self.inner.push_tree(maybe_negated( + negation, + Tree::number(Some(base), Some(token.with_variant(variant)), None), + )); + } + ( + token::Variant::Digits(variant), + State { number: Some(Number::Fractional { digits: _, dot: Some(_) }), .. }, + ) if token.left_offset.visible.width_in_spaces == 0 => { + let State { + negation, + number: Some(Number::Fractional { digits, dot: Some(dot) }), + .. + } = mem::take(&mut self.state) + else { + unreachable!() + }; + self.inner.push_tree(maybe_negated( + negation, + Tree::number( + None, + Some(digits), + Some(tree::FractionalDigits { dot, digits: token.with_variant(variant) }), + ), + )); + } + ( + token::Variant::Operator(_) | token::Variant::NegationOperator(_), + State { prev_item_in_expression, negation, number }, + ) if (token.is_spaced() || !*prev_item_in_expression) && token.code.repr.0 == "-" => { + if negation.is_some() || number.is_some() { + flush(&mut self.inner, negation, number); + } + self.state.negation = Some(token); + } + (token::Variant::NumberBase(variant), State { negation, number, .. }) => { + if number.is_some() { + flush(&mut self.inner, negation, number); + } else if token.left_offset.visible.width_in_spaces != 0 + && let Some(minus) = negation.take() + { + self.inner.push_token(minus.with_variant(token::Variant::operator())); + } + *number = Some(Number::Based { base: token.with_variant(variant) }) + } + (token::Variant::Digits(variant), State { negation, number, .. }) => { + if number.is_some() { + flush(&mut self.inner, negation, number); + } else if token.left_offset.visible.width_in_spaces != 0 + && let Some(minus) = negation.take() + { + self.inner.push_token(minus.with_variant(token::Variant::operator())); + } + *number = + Some(Number::Fractional { digits: token.with_variant(variant), dot: None }); + } + ( + token::Variant::DotOperator(_), + State { number: Some(Number::Fractional { digits: _, dot: dot @ None }), .. }, + ) if token.left_offset.visible.width_in_spaces == 0 => + *dot = Some(token.with_variant(token::variant::DotOperator())), + _ => { + self.flush(); + self.inner.push_token(token) + } + } + self.state.prev_item_in_expression = true; + } +} + +impl<'s, Inner: TokenConsumer<'s> + TreeConsumer<'s>> TreeConsumer<'s> for ParseNumbers<'s, Inner> { + fn push_tree(&mut self, tree: Tree<'s>) { + self.flush(); + self.inner.push_tree(tree); + self.state.prev_item_in_expression = true; + } +} + +impl<'s, Inner: TokenConsumer<'s> + TreeConsumer<'s>> ParseNumbers<'s, Inner> { + fn flush(&mut self) { + let State { negation, number, prev_item_in_expression: _ } = &mut self.state; + flush(&mut self.inner, negation, number); + } +} + +fn flush<'s, Inner: TokenConsumer<'s> + TreeConsumer<'s>>( + inner: &mut Inner, + negation: &mut Option>, + number: &mut Option>, +) { + if let Some(number) = number.take() { + let (number, trailing_token) = match number { + Number::Based { base } => (Tree::number(Some(base), None, None), None), + Number::Fractional { digits, dot } => (Tree::number(None, Some(digits), None), dot), + }; + inner.push_tree(maybe_negated(negation.take(), number)); + if let Some(trailing_token) = trailing_token { + inner.push_token(trailing_token.into()); + } + } else if let Some(minus) = negation.take() { + inner.push_token(minus); + } +} + +fn maybe_negated<'s>(minus: Option>, tree: Tree<'s>) -> Tree<'s> { + match minus { + Some(minus) => + Tree::unary_opr_app(minus.with_variant(token::variant::UnaryOperator()), Some(tree)), + None => tree, + } +} + +impl<'s, Inner: TokenConsumer<'s> + TreeConsumer<'s> + Finish> Finish for ParseNumbers<'s, Inner> { + type Result = Inner::Result; + + fn finish(&mut self) -> Self::Result { + self.flush(); + self.state.prev_item_in_expression = false; + self.inner.finish() + } +} + +impl<'s, Inner> GroupHierarchyConsumer<'s> for ParseNumbers<'s, Inner> +where Inner: TokenConsumer<'s> + TreeConsumer<'s> + GroupHierarchyConsumer<'s> +{ + fn start_group(&mut self, open: token::OpenSymbol<'s>) { + self.flush(); + self.inner.start_group(open); + } + + fn end_group(&mut self, close: token::CloseSymbol<'s>) { + self.flush(); + self.inner.end_group(close); + } +} diff --git a/lib/rust/parser/src/syntax/treebuilding/whitespace.rs b/lib/rust/parser/src/syntax/treebuilding/whitespace.rs index b881df062a..f087bb54d4 100644 --- a/lib/rust/parser/src/syntax/treebuilding/whitespace.rs +++ b/lib/rust/parser/src/syntax/treebuilding/whitespace.rs @@ -1,9 +1,11 @@ +use crate::syntax::consumer::Finish; +use crate::syntax::consumer::TokenConsumer; +use crate::syntax::consumer::TreeConsumer; use crate::syntax::token; use crate::syntax::tree; -use crate::syntax::treebuilding::consumer::Finish; -use crate::syntax::treebuilding::consumer::TokenConsumer; -use crate::syntax::treebuilding::consumer::TreeConsumer; use crate::syntax::treebuilding::TokenOrTree; +use crate::syntax::GroupHierarchyConsumer; +use crate::syntax::Item; use crate::syntax::Token; use crate::syntax::Tree; @@ -35,25 +37,32 @@ impl Spacing { true => Spacing::Spaced, } } + + pub fn of_item(item: &Item) -> Self { + match item { + Item::Token(token) => Spacing::of_token(token), + Item::Tree(tree) => Spacing::of_tree(tree), + Item::Group(group) => Spacing::of_token(&group.open), + Item::Block(_) => Spacing::Spaced, + } + } } // Returns `true` for an item if that item should not follow any other item in a no-space group // (i.e. the item has "space" before it). fn token_starts_new_no_space_group<'a: 'b, 'b, T: Into>>(token: T) -> bool { let token = token.into(); - match &token.data { - token::Variant::Operator(opr) if opr.properties.is_sequence() => true, - _ => token.left_offset.visible.width_in_spaces != 0, - } + token.left_offset.visible.width_in_spaces != 0 + || matches!(token.data, token::Variant::CommaOperator(_)) } fn tree_starts_new_no_space_group(tree: &Tree) -> bool { tree.span.left_offset.visible.width_in_spaces != 0 || matches!( &tree.variant, - box tree::Variant::BodyBlock(_) - | box tree::Variant::OperatorBlockApplication(_) - | box tree::Variant::ArgumentBlockApplication(_) + tree::Variant::BodyBlock(_) + | tree::Variant::OperatorBlockApplication(_) + | tree::Variant::ArgumentBlockApplication(_) ) } @@ -72,13 +81,13 @@ pub trait SpacingLookaheadTokenConsumer<'s> { /// Maintains 1-token whitespace lookahead. #[derive(Debug, Default)] -pub struct PeekSpacing<'s, T> { +pub struct PeekSpacing<'s, Inner> { current: Option>, - inner: T, + inner: Inner, } -impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> - PeekSpacing<'s, T> +impl<'s, Inner: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> + PeekSpacing<'s, Inner> { fn emit(&mut self, tt: Option>, rhs: Option) { match tt { @@ -87,22 +96,26 @@ impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s> None => {} } } -} -impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s> + Finish> Finish - for PeekSpacing<'s, T> -{ - type Result = T::Result; - - fn finish(&mut self) -> T::Result { + fn flush(&mut self) { let last = self.current.take(); self.emit(last, None); + } +} + +impl<'s, Inner: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s> + Finish> + Finish for PeekSpacing<'s, Inner> +{ + type Result = Inner::Result; + + fn finish(&mut self) -> Inner::Result { + self.flush(); self.inner.finish() } } -impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> TokenConsumer<'s> - for PeekSpacing<'s, T> +impl<'s, Inner: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> + TokenConsumer<'s> for PeekSpacing<'s, Inner> { fn push_token(&mut self, token: Token<'s>) { let rhs = Spacing::of_token(&token); @@ -111,8 +124,8 @@ impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s> } } -impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> TreeConsumer<'s> - for PeekSpacing<'s, T> +impl<'s, Inner: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> + TreeConsumer<'s> for PeekSpacing<'s, Inner> { fn push_tree(&mut self, tree: Tree<'s>) { let rhs = Spacing::of_tree(&tree); @@ -121,8 +134,19 @@ impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s> } } -impl<'s, T: TreeConsumer<'s>> SpacingLookaheadTreeConsumer<'s> for T { - fn push_tree(&mut self, tree: Tree<'s>, _: Option) { - self.push_tree(tree); +impl<'s, Inner> GroupHierarchyConsumer<'s> for PeekSpacing<'s, Inner> +where Inner: GroupHierarchyConsumer<'s> + + SpacingLookaheadTreeConsumer<'s> + + SpacingLookaheadTokenConsumer<'s> +{ + fn start_group(&mut self, open: token::OpenSymbol<'s>) { + let prev = self.current.take(); + self.emit(prev, Spacing::of_token(&open).into()); + self.inner.start_group(open); + } + + fn end_group(&mut self, close: token::CloseSymbol<'s>) { + self.flush(); + self.inner.end_group(close); } } diff --git a/lib/rust/prelude/src/vec.rs b/lib/rust/prelude/src/vec.rs index 65caff2cfb..23af50ed3a 100644 --- a/lib/rust/prelude/src/vec.rs +++ b/lib/rust/prelude/src/vec.rs @@ -143,10 +143,13 @@ pub struct ColdVec { impl ColdVec { pub fn push(&mut self, element: T) { - if self.elements.is_none() { - self.elements = Some(Default::default()); + self.elements_mut().push(element); + } + + pub fn append(&mut self, other: &mut Self) { + if let Some(other_elements) = other.elements.as_mut() { + self.elements_mut().append(other_elements) } - self.elements.as_mut().unwrap().push(element); } pub fn iter(&self) -> std::slice::Iter { @@ -155,6 +158,13 @@ impl ColdVec { None => [].iter(), } } + + fn elements_mut(&mut self) -> &mut Vec { + if self.elements.is_none() { + self.elements = Some(Default::default()); + } + self.elements.as_mut().unwrap() + } } impl> PartialEq> for ColdVec { diff --git a/test/Base_Tests/src/Data/List_Spec.enso b/test/Base_Tests/src/Data/List_Spec.enso index 69bcbd2c1b..da9e501376 100644 --- a/test/Base_Tests/src/Data/List_Spec.enso +++ b/test/Base_Tests/src/Data/List_Spec.enso @@ -40,7 +40,7 @@ add_specs suite_builder = suite_builder.group "List" group_builder-> Test.expect_panic_with (l.any "invalid arg") Type_Error group_builder.specify "should allow checking if all elements satisfy a predicate with `.all`" <| - all_even = l.all(x -> x % 2 == 0) + all_even = l.all x-> x % 2 == 0 all_less_than_four = l.all (< 4) all_even . should_be_false all_less_than_four . should_be_true diff --git a/test/Base_Tests/src/Data/Numbers_Spec.enso b/test/Base_Tests/src/Data/Numbers_Spec.enso index 22fba35969..ad4b073753 100644 --- a/test/Base_Tests/src/Data/Numbers_Spec.enso +++ b/test/Base_Tests/src/Data/Numbers_Spec.enso @@ -107,60 +107,60 @@ add_specs suite_builder = (10.div 0).should_fail_with Arithmetic_Error group_builder.specify "should support integral binary literals" <| - lit = 2_01101101 + lit = 0b01101101 lit . should_equal 109 group_builder.specify "should support integral hexadecimal literals" <| - lit = 16_6D + lit = 0x6D lit . should_equal 109 group_builder.specify "should support bitwise and" <| - left = 2_01101101 - right = 2_11000100 - big_left = 16_17ffffffffffffffa - big_right = 16_17ffffffffffffffc - left.bit_and right . should_equal 2_01000100 - left.bit_and big_right . should_equal 2_01101100 - big_left.bit_and right . should_equal 2_11000000 - big_left.bit_and big_right . should_equal 16_17ffffffffffffff8 + left = 0b01101101 + right = 0b11000100 + big_left = 0x17ffffffffffffffa + big_right = 0x17ffffffffffffffc + left.bit_and right . should_equal 0b01000100 + left.bit_and big_right . should_equal 0b01101100 + big_left.bit_and right . should_equal 0b11000000 + big_left.bit_and big_right . should_equal 0x17ffffffffffffff8 group_builder.specify "should support bitwise or" <| - left = 2_01101101 - right = 2_11000100 - big_left = 16_17ffffffffffffffa - big_right = 16_17ffffffffffffffc - left.bit_or right . should_equal 2_11101101 - left.bit_or big_right . should_equal 16_17ffffffffffffffd - big_left.bit_or right . should_equal 16_17ffffffffffffffe - big_left.bit_or right . should_equal 16_17ffffffffffffffe + left = 0b01101101 + right = 0b11000100 + big_left = 0x17ffffffffffffffa + big_right = 0x17ffffffffffffffc + left.bit_or right . should_equal 0b11101101 + left.bit_or big_right . should_equal 0x17ffffffffffffffd + big_left.bit_or right . should_equal 0x17ffffffffffffffe + big_left.bit_or right . should_equal 0x17ffffffffffffffe group_builder.specify "should support bitwise exclusive or" <| - left = 2_01101101 - right = 2_11000100 - big_left = 16_17ffffffffffffffa - big_right = 16_17ffffffffffffffc - left.bit_xor right . should_equal 2_10101001 - left.bit_xor big_right . should_equal 16_17fffffffffffff91 - big_left.bit_xor right . should_equal 16_17fffffffffffff3e - big_left.bit_xor big_right . should_equal 2_00000110 + left = 0b01101101 + right = 0b11000100 + big_left = 0x17ffffffffffffffa + big_right = 0x17ffffffffffffffc + left.bit_xor right . should_equal 0b10101001 + left.bit_xor big_right . should_equal 0x17fffffffffffff91 + big_left.bit_xor right . should_equal 0x17fffffffffffff3e + big_left.bit_xor big_right . should_equal 0b00000110 group_builder.specify "should support bitwise negation" <| - bits = 2_01101101 - big_bits = 16_17ffffffffffffffa - bits.bit_not . should_equal -2_01101110 + bits = 0b01101101 + big_bits = 0x17ffffffffffffffa + bits.bit_not . should_equal -0b01101110 bits.bit_not.bit_not . should_equal bits - big_bits.bit_not . should_equal -16_17ffffffffffffffb + big_bits.bit_not . should_equal -0x17ffffffffffffffb big_bits.bit_not.bit_not . should_equal big_bits group_builder.specify "should support left bit shifts" <| - positive_bits = 2_01101101 - negative_bits = -2_01101101 + positive_bits = 0b01101101 + negative_bits = -0b01101101 positive_big_bits = almost_max_long_times_three negative_big_bits = -almost_max_long_times_three - positive_bits.bit_shift_l 2 . should_equal 2_0110110100 - positive_bits.bit_shift_l 64 . should_equal 16_6d0000000000000000 - positive_bits.bit_shift_l -2 . should_equal 2_011011 + positive_bits.bit_shift_l 2 . should_equal 0b0110110100 + positive_bits.bit_shift_l 64 . should_equal 0x6d0000000000000000 + positive_bits.bit_shift_l -2 . should_equal 0b011011 positive_bits.bit_shift_l -64 . should_equal 0 (positive_bits.bit_shift_l positive_big_bits).should_fail_with Arithmetic_Error positive_bits.bit_shift_l negative_big_bits . should_equal 0 @@ -187,8 +187,8 @@ add_specs suite_builder = negative_big_bits.bit_shift_l negative_big_bits . should_equal -1 group_builder.specify "should support right bit shifts, preserving sign" <| - positive_bits = 2_01101101 - negative_bits = -2_01101101 + positive_bits = 0b01101101 + negative_bits = -0b01101101 positive_big_bits = almost_max_long_times_three negative_big_bits = -almost_max_long_times_three diff --git a/test/Base_Tests/src/Data/Text/Regex_Spec.enso b/test/Base_Tests/src/Data/Text/Regex_Spec.enso index e7ce093c35..1fb0048178 100644 --- a/test/Base_Tests/src/Data/Text/Regex_Spec.enso +++ b/test/Base_Tests/src/Data/Text/Regex_Spec.enso @@ -525,7 +525,7 @@ add_specs suite_builder = suite_builder.group "caching" group_builder-> group_builder.specify "Replacer cache drops old values" <| - pattern = Regex.compile('([a-c])') + pattern = Regex.compile '([a-c])' # Add enough values to flush out the first values. 0.up_to get_lru_size+1 . map i-> diff --git a/test/Base_Tests/src/Data/Time/Date_Spec.enso b/test/Base_Tests/src/Data/Time/Date_Spec.enso index 4b2d655fcd..e0d851c932 100644 --- a/test/Base_Tests/src/Data/Time/Date_Spec.enso +++ b/test/Base_Tests/src/Data/Time/Date_Spec.enso @@ -552,7 +552,7 @@ js_date year month=1 day=1 = js_array_date year month=1 day=1 = arr = Panic.catch Any (js_array_dateCreate year month day) (err -> Error.throw (Time_Error.Error err.payload)) - js_set_zone arr.at(0) + js_set_zone (arr.at 0) java_parse date_text pattern=Nothing = Panic.catch Any handler=(err -> Error.throw (Time_Error.Error err.payload.getMessage)) <| diff --git a/test/Base_Tests/src/Data/Time/Date_Time_Spec.enso b/test/Base_Tests/src/Data/Time/Date_Time_Spec.enso index a1bbfc4263..eb6e52592f 100644 --- a/test/Base_Tests/src/Data/Time/Date_Time_Spec.enso +++ b/test/Base_Tests/src/Data/Time/Date_Time_Spec.enso @@ -812,7 +812,7 @@ js_parse text format=Date_Time_Formatter.default_enso_zoned_date_time = js_array_datetime year month=1 day=1 hour=0 minute=0 second=0 nanosecond=0 zone=Time_Zone.system = arr = Panic.catch Any (js_array_datetimeCreate year month day hour minute second nanosecond) (err -> Error.throw (Time_Error.Error err.payload)) - js_set_zone arr.at(0) zone + js_set_zone (arr.at 0) zone foreign js js_array_datetimeCreate year month day hour minute second nanosecond = """ if (month > 12 || month < 1) { @@ -852,7 +852,7 @@ java_parse date_text_raw pattern=Nothing = utc_replaced = date_text_raw.replace "[UTC]" "Z" date_text = if utc_replaced.ends_with "ZZ" then date_text_raw else utc_replaced if pattern == Nothing then Panic.catch Any (maybe_parse_java_zoned date_text) (cause -> parse_java_local cause.payload date_text pattern) else - formatter = DateTimeFormatter.ofPattern(pattern) + formatter = DateTimeFormatter.ofPattern pattern Panic.catch Any (maybe_parse_java_zoned date_text formatter) (cause -> parse_java_local cause.payload date_text pattern) main filter=Nothing = diff --git a/test/Base_Tests/src/Semantic/Warnings_Spec.enso b/test/Base_Tests/src/Semantic/Warnings_Spec.enso index 3aeb6c53e9..e857eb9292 100644 --- a/test/Base_Tests/src/Semantic/Warnings_Spec.enso +++ b/test/Base_Tests/src/Semantic/Warnings_Spec.enso @@ -439,13 +439,13 @@ add_specs suite_builder = suite_builder.group "Dataflow Warnings" group_builder- Warning.get_all result_4 . map (x-> x.value.to_text) . should_equal ["Baz!", "Baz!", "Baz!"] group_builder.specify "should only report the first 100 unique warnings" <| - vec = (0.up_to 500).map(e -> Warning.attach "Foo!" e) - vec_plus_1 = vec.map(e -> e+1) + vec = (0.up_to 500).map e-> Warning.attach "Foo!" e + vec_plus_1 = vec.map e-> e+1 Warning.get_all vec_plus_1 . length . should_equal 100 Warning.limit_reached vec . should_equal True warn = Warning.attach "Boo!" 42 - vec_2 = (0.up_to 500).map(e -> if (e < 30) then Warning.attach "Foo!" e else (warn + e)) + vec_2 = (0.up_to 500).map e-> if e<30 then Warning.attach "Foo!" e else warn+e Warning.get_all vec_2 . length . should_equal 31 Warning.limit_reached vec_2 . should_equal False diff --git a/test/Benchmarks/src/Vector/Sort.enso b/test/Benchmarks/src/Vector/Sort.enso index 520170418f..adf4ea0884 100644 --- a/test/Benchmarks/src/Vector/Sort.enso +++ b/test/Benchmarks/src/Vector/Sort.enso @@ -55,7 +55,7 @@ type Int_Comparator compare a:Int b:Int = Ordering.compare a.v b.v hash i:Int = 3721 + Ordering.hash i.v -Comparable.from(that:Int) = Comparable.new that Int_Comparator +Comparable.from that:Int = Comparable.new that Int_Comparator # The Benchmarks ============================================================== diff --git a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso index cf70477ed6..66a39aaff4 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso @@ -198,13 +198,13 @@ add_specs suite_builder detailed setup = expression_test "#2020-12-23 12:34:56Z[UTC]#" (Date_Time.new 2020 12 23 12 34 56 zone=Time_Zone.utc) expression_test "#2020-12-23 12:34:56+02:30[UTC]#" (Date_Time.new 2020 12 23 10 04 56 zone=Time_Zone.utc) expression_test "#2020-12-23 12:34:56.157+01[UTC]#" (Date_Time.new 2020 12 23 11 34 56 157 zone=Time_Zone.utc) - expression_test "#2020-12-23T12:34[Europe/Warsaw]#" (Date_Time.new 2020 12 23 12 34 zone=Time_Zone.parse("Europe/Warsaw")) + expression_test "#2020-12-23T12:34[Europe/Warsaw]#" (Date_Time.new 2020 12 23 12 34 zone=(Time_Zone.parse "Europe/Warsaw")) group_builder.specify "should correctly handle timezones" pending=pending_datetime <| ## We cannot just test equality as the Database may change the timezone, so all we can do is check that the values are accepted and can be compared with other values in the database. - t = table_builder [["X", [Date_Time.new 2020 12 23 12 34 56 zone=Time_Zone.utc, Date_Time.new 2010 1 2 12 34 56 zone=Time_Zone.parse("Europe/Warsaw")]]] + t = table_builder [["X", [Date_Time.new 2020 12 23 12 34 56 zone=Time_Zone.utc, Date_Time.new 2010 1 2 12 34 56 zone=(Time_Zone.parse "Europe/Warsaw")]]] c1 = t.evaluate_expression "#2020-12-23 12:34:56Z[UTC]# == [X]" c1.to_vector . should_equal [True, False] c2 = t.evaluate_expression "#2010-01-02 12:34:56[Europe/Warsaw]# == [X]"