Improve parser contextualization (#10734)

This commit is contained in:
Kaz Wesley 2024-08-05 11:46:58 -04:00 committed by GitHub
parent c179701a00
commit aafdef1aeb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
72 changed files with 4805 additions and 2744 deletions

View File

@ -18,11 +18,13 @@
- [Renaming launcher executable to ensoup][10535]
- [Space-precedence does not apply to value-level operators][10597]
- [Must specify `--repl` to enable debug server][10709]
- [Improved parser error reporting and performance][10734]
[10468]: https://github.com/enso-org/enso/pull/10468
[10535]: https://github.com/enso-org/enso/pull/10535
[10597]: https://github.com/enso-org/enso/pull/10597
[10709]: https://github.com/enso-org/enso/pull/10709
[10734]: https://github.com/enso-org/enso/pull/10734
#### Enso IDE

27
Cargo.lock generated
View File

@ -27,6 +27,18 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "afl"
version = "0.15.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c21e10b6947189c5ff61343b5354e9ad1c1722bd47b69cd0a6b49e5fa7f7ecf6"
dependencies = [
"home",
"libc",
"rustc_version",
"xdg",
]
[[package]]
name = "ahash"
version = "0.7.8"
@ -1619,6 +1631,7 @@ dependencies = [
name = "enso-parser-debug"
version = "0.1.0"
dependencies = [
"clap 4.5.4",
"enso-metamodel",
"enso-metamodel-lexpr",
"enso-parser",
@ -1628,6 +1641,14 @@ dependencies = [
"serde_json",
]
[[package]]
name = "enso-parser-fuzz"
version = "0.1.0"
dependencies = [
"afl",
"enso-parser",
]
[[package]]
name = "enso-parser-generate-java"
version = "0.1.0"
@ -5480,6 +5501,12 @@ dependencies = [
"rustix",
]
[[package]]
name = "xdg"
version = "2.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213b7324336b53d2414b2db8537e56544d981803139155afa84f76eeebb7a546"
[[package]]
name = "xmlparser"
version = "0.13.6"

View File

@ -23,6 +23,7 @@ members = [
"lib/rust/parser/generate-java",
"lib/rust/parser/schema",
"lib/rust/parser/debug",
"lib/rust/parser/debug/fuzz",
"tools/language-server/logstat",
"tools/language-server/wstest",
]
@ -47,6 +48,11 @@ incremental = true
debug = false
debug-assertions = false
[profile.fuzz]
inherits = "release"
debug-assertions = true
overflow-checks = true
[profile.bench]
opt-level = 3
lto = true

View File

@ -40,14 +40,14 @@ pub fn is_ident_or_operator(code: &str) -> u32 {
#[wasm_bindgen]
pub fn is_numeric_literal(code: &str) -> bool {
let parsed = PARSER.with(|parser| parser.run(code));
let enso_parser::syntax::tree::Variant::BodyBlock(body) = *parsed.variant else { return false };
let enso_parser::syntax::tree::Variant::BodyBlock(body) = parsed.variant else { return false };
let [stmt] = &body.statements[..] else { return false };
stmt.expression.as_ref().map_or(false, |expr| match &*expr.variant {
stmt.expression.as_ref().map_or(false, |expr| match &expr.variant {
enso_parser::syntax::tree::Variant::Number(_) => true,
enso_parser::syntax::tree::Variant::UnaryOprApp(app) =>
app.opr.code == "-"
&& app.rhs.as_ref().map_or(false, |rhs| {
matches!(*rhs.variant, enso_parser::syntax::tree::Variant::Number(_))
matches!(rhs.variant, enso_parser::syntax::tree::Variant::Number(_))
}),
_ => false,
})

View File

@ -209,11 +209,6 @@ const baseCases: ApplySuggestionCase[] = [
suggestion: makeStaticMethod('Standard.Base.Data.Vector.new'),
expected: 'Data.Vector.new ',
},
{
code: 'Dat . V .',
suggestion: makeStaticMethod('Standard.Base.Data.Vector.new'),
expected: 'Data . Vector . new ',
},
{
code: '(type_method some_arg).Vector.',
suggestion: makeStaticMethod('Standard.Base.Data.Vector.new'),
@ -240,6 +235,15 @@ const baseCases: ApplySuggestionCase[] = [
expected: 'a -> a.get ',
},
]
const simpleCases: ApplySuggestionCase[] = [
...baseCases,
// This case would cause a syntax error if a spaced-operator suffix were added.
{
code: 'Dat . V .',
suggestion: makeStaticMethod('Standard.Base.Data.Vector.new'),
expected: 'Data . Vector . new ',
},
]
function makeComplexCase(prefix: string, suffix: string): ApplySuggestionCase[] {
return Array.from(baseCases, (aCase) => {

View File

@ -33,11 +33,10 @@ const validIdentifiers = [
'+',
'<=>',
'*',
'.',
'!=',
]
const invalidIdentifiers = ['', '1', '1Abc', '1_', 'abA!', '$a', 'a$']
// These are not valid identifiers but currently pass the qualified name regex: ['_', '.*']
// These are not valid identifiers but currently pass the qualified name regex: ['_', '.*', '.']
test.each(validIdentifiers)("'%s' is a valid identifier", (name) =>
expect(unwrap(tryIdentifierOrOperatorIdentifier(name))).toStrictEqual(

View File

@ -1373,7 +1373,7 @@ exports[`Parsing 'foo bar=baz' 1`] = `
"leftOffsetCodeStartUtf8": 7,
"lengthInCodeBuffer": 1,
"startInCodeBuffer": 7,
"type": "Operator",
"type": "AssignmentOperator",
"whitespaceLengthInCodeBuffer": 0,
"whitespaceStartInCodeBuffer": 7,
},

View File

@ -122,14 +122,9 @@ test.each([
{ type: Tree.Type.Ident, repr: 'foo' },
],
],
['(', [{ type: Tree.Type.Invalid, repr: '(' }]],
[
'(foo',
[
{ type: Tree.Type.Invalid, repr: '(' },
{ type: Tree.Type.Ident, repr: 'foo' },
],
],
// These are Invalid nodes, so the child is a subtree containing the whole expression.
['(', [{ type: Tree.Type.Group, repr: '(' }]],
['(foo', [{ type: Tree.Type.Group, repr: '(foo' }]],
])("Reading children of '%s'", (code, expected) => {
const ast = parseEnsoLine(code)
const children = Array.from(childrenAstNodes(ast))

View File

@ -17,7 +17,6 @@ export type HasAstRange = SourceRange | RawAst.Tree | RawAst.Token
*/
export function parseEnsoLine(code: string): RawAst.Tree {
const block = parseEnso(code)
assert(block.type === RawAst.Tree.Type.BodyBlock)
const soleExpression = tryGetSoleValue(block.statements)?.expression
assertDefined(soleExpression)
return soleExpression

View File

@ -639,10 +639,7 @@ lazy val rustParserTargetDirectory =
SettingKey[File]("target directory for the Rust parser")
(`syntax-rust-definition` / rustParserTargetDirectory) := {
// setting "debug" for release, because it isn't yet safely integrated into
// the parser definition
val versionName = if (BuildInfo.isReleaseMode) "debug" else "debug"
target.value / "rust" / versionName
target.value / "rust" / "parser-jni"
}
val generateRustParserLib =
@ -668,10 +665,13 @@ val generateRustParserLib =
target.foreach { t =>
Cargo.rustUp(t, log)
}
val baseArguments = Seq(
val profile = if (BuildInfo.isReleaseMode) "release" else "fuzz"
val arguments = Seq(
"build",
"-p",
"enso-parser-jni",
"--profile",
profile,
"-Z",
"unstable-options"
) ++ target.map(t => Seq("--target", t)).getOrElse(Seq()) ++
@ -679,20 +679,18 @@ val generateRustParserLib =
"--out-dir",
(`syntax-rust-definition` / rustParserTargetDirectory).value.toString
)
val adjustedArguments = baseArguments ++
(if (BuildInfo.isReleaseMode)
Seq("--release")
else Seq())
val envVars = target
.map(_ => Seq(("RUSTFLAGS", "-C target-feature=-crt-static")))
.getOrElse(Seq())
Cargo.run(adjustedArguments, log, envVars)
Cargo.run(arguments, log, envVars)
}
FileTreeView.default.list(Seq(libGlob)).map(_._1.toFile)
}
`syntax-rust-definition` / generateRustParserLib / fileInputs +=
(`syntax-rust-definition` / baseDirectory).value.toGlob / "jni" / "src" / "*.rs"
(`syntax-rust-definition` / baseDirectory).value.toGlob / "jni" / "src" / ** / "*.rs"
`syntax-rust-definition` / generateRustParserLib / fileInputs +=
(`syntax-rust-definition` / baseDirectory).value.toGlob / "src" / ** / "*.rs"
val generateParserJavaSources = TaskKey[Seq[File]](
"generateParserJavaSources",

View File

@ -247,7 +247,7 @@ impl Default for BuildConfigurationFlags {
build_project_manager_package: false,
build_launcher_bundle: false,
build_project_manager_bundle: false,
generate_java_from_rust: true,
generate_java_from_rust: false,
test_java_generated_from_rust: false,
verify_packages: false,
}

View File

@ -1,6 +1,6 @@
use crate::engine::StandardLibraryTestsSelection;
use crate::prelude::*;
use crate::engine::StandardLibraryTestsSelection;
use crate::paths::Paths;
use crate::paths::ENSO_ENABLE_ASSERTIONS;
use crate::paths::ENSO_META_TEST_ARGS;

View File

@ -155,11 +155,8 @@ impl IsTarget for Backend {
target_os == TARGET_OS,
"Enso Project Manager cannot be built on '{target_os}' for target '{TARGET_OS}'.",
);
let config = BuildConfigurationFlags {
build_project_manager_bundle: true,
generate_java_from_rust: true,
..default()
};
let config =
BuildConfigurationFlags { build_project_manager_bundle: true, ..default() };
let context = inner.prepare_context(context, config)?;
let artifacts = context.build().await?;
let project_manager =

View File

@ -43,11 +43,7 @@ impl IsTarget for Runtime {
context: Context,
job: WithDestination<Self::BuildInput>,
) -> BoxFuture<'static, Result<Self::Artifact>> {
let config = BuildConfigurationFlags {
build_engine_package: true,
generate_java_from_rust: true,
..default()
};
let config = BuildConfigurationFlags { build_engine_package: true, ..default() };
let this = *self;
let WithDestination { inner, destination } = job;
let triple = TargetTriple::new(inner.versions);

View File

@ -1,9 +1,11 @@
use super::*;
use crate::paths::generated::RepoRoot;
use ide_ci::programs::cargo;
use ide_ci::programs::Cargo;
use crate::paths::generated::RepoRoot;
const LINTER_CRATE_NAME: &str = "enso-parser-debug";
const LINTER_BIN_NAME: &str = "check_syntax";

View File

@ -12,7 +12,6 @@ use ide_ci::programs::Javac;
const GENERATOR_CRATE_NAME: &str = "enso-parser-generate-java";
const PARSER_JNI_CRATE_NAME: &str = "enso-parser-jni";
const GENERATOR_BIN_NAME: &str = GENERATOR_CRATE_NAME;
const TEST_GENERATOR_BIN_NAME: &str = "java-tests";
const GENERATED_CODE_NAMESPACE: [&str; 3] = ["org", "enso", "syntax2"];
@ -47,17 +46,8 @@ pub async fn generate_java(repo_root: &RepoRoot) -> Result {
generate_java_to(repo_root, &output_path).await
}
fn cargo_build_parser_jni(repo_root: &Path) -> Result<Command> {
let mut ret = Cargo.cmd()?;
ret.current_dir(repo_root)
.apply(&cargo::Command::Build)
.apply(&cargo::Options::Package(PARSER_JNI_CRATE_NAME.into()));
Ok(ret)
}
#[context("Running self-tests for the generated Java sources failed.")]
pub async fn run_self_tests(repo_root: &RepoRoot) -> Result {
cargo_build_parser_jni(repo_root)?.run_ok().await?;
let base = &repo_root.target.generated_java;
let lib = &repo_root.lib.rust.parser.generate_java.java;
let package = repo_root.target.generated_java.join_iter(GENERATED_CODE_NAMESPACE);

View File

@ -1119,9 +1119,9 @@ type Integer
of bits in the operands.
> Example
Computing the bitwise conjunction of 2_01101101 and 2_11110000.
Computing the bitwise conjunction of 0b01101101 and 0b11110000.
2_01101101.bit_and 2_11110000
0b01101101.bit_and 0b11110000
bit_and self that:Integer -> Integer = integer_bit_and self that
## GROUP Bitwise
@ -1131,9 +1131,9 @@ type Integer
The bitwise compliment negates the value of each bit in the operand.
> Example
Bitwise negation of 2_0110.
Bitwise negation of 0b0110.
2_0110.bit_not
0b0110.bit_not
bit_not self -> Integer = integer_bit_not self
## GROUP Bitwise
@ -1148,9 +1148,9 @@ type Integer
bits in the operands.
> Example
Computing the bitwise disjunction of 2_01101101 and 2_11110000.
Computing the bitwise disjunction of 0b01101101 and 0b11110000.
2_01101101.bit_or 2_11110000
0b01101101.bit_or 0b11110000
bit_or self that:Integer -> Integer = integer_bit_or self that
## GROUP Bitwise
@ -1164,9 +1164,9 @@ type Integer
corresponding bits in the operands.
> Example
Computing the bitwise exclusive or of 2_01101101 and 2_11110000.
Computing the bitwise exclusive or of 0b01101101 and 0b11110000.
2_01101101.bit_xor 2_11110000
0b01101101.bit_xor 0b11110000
bit_xor self that:Integer -> Integer = integer_bit_xor self that
## GROUP Bitwise

View File

@ -44,17 +44,6 @@ public class ErrorCompilerTest extends CompilerTest {
ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 14, 16);
}
@Test
public void unaryMinus() throws Exception {
var ir = parse("""
from Standard.Base import all
main = Date.new day=-
""");
assertSingleSyntaxError(ir, Syntax.UnrecognizedToken$.MODULE$, "Unrecognized token", 51, 52);
}
@Test
public void dotUnderscore2() throws Exception {
var ir = parse("""
@ -404,21 +393,21 @@ public class ErrorCompilerTest extends CompilerTest {
public void malformedExport9() throws Exception {
var ir = parse("from export all");
assertSingleSyntaxError(
ir, invalidExport("`all` not allowed in `export` statement"), null, 0, 15);
ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 15);
}
@Test
public void malformedExport10() throws Exception {
var ir = parse("from Foo export all hiding");
assertSingleSyntaxError(
ir, invalidExport("`hiding` not allowed in `export` statement"), null, 0, 26);
ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 26);
}
@Test
public void malformedExport11() throws Exception {
var ir = parse("from Foo export all hiding X.Y");
assertSingleSyntaxError(
ir, invalidExport("`hiding` not allowed in `export` statement"), null, 0, 30);
ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 30);
}
@Test
@ -596,8 +585,8 @@ public class ErrorCompilerTest extends CompilerTest {
var ir = parse("""
from project.Module export all
""");
var expectedReason = new Syntax.InvalidExport("`all` not allowed in `export` statement");
assertSingleSyntaxError(ir, expectedReason, null, 0, 30);
assertSingleSyntaxError(
ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 30);
}
@Test
@ -605,8 +594,8 @@ public class ErrorCompilerTest extends CompilerTest {
var ir = parse("""
from project.Module export all hiding Foo
""");
var expectedReason = new Syntax.InvalidExport("`hiding` not allowed in `export` statement");
assertSingleSyntaxError(ir, expectedReason, null, 0, 41);
assertSingleSyntaxError(
ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 41);
}
private void assertSingleSyntaxError(

View File

@ -93,7 +93,7 @@ class DataflowErrorsTest extends InterpreterTest {
|
|main =
| myErr = Error.throw (My_Error.Mk_My_Error 20)
| IO.println(myErr.catch_primitive .recover)
| IO.println (myErr.catch_primitive .recover)
|""".stripMargin
eval(code)
consumeOut shouldEqual List("(Mk_My_Recovered 20)")

View File

@ -97,17 +97,18 @@ final class TreeToIr {
case Tree.Import x -> null;
case Tree.Invalid x -> null;
case Tree.TypeSignature sig -> {
Expression methodReference;
try {
methodReference = translateMethodReference(sig.getVariable(), true);
} catch (SyntaxException ex) {
methodReference = translateExpression(sig.getVariable());
}
Expression methodReference;
try {
methodReference = translateMethodReference(sig.getVariable(), true);
} catch (SyntaxException ex) {
methodReference = ex.toError();
}
var signature = translateType(sig.getType());
var ascription = new Type.Ascription(methodReference, signature, Option.empty(),
getIdentifiedLocation(sig), meta(), diag());
yield ascription;
}
case Tree.TypeAnnotated anno -> translateTypeAnnotated(anno);
default -> translateExpression(exprTree);
};
if (expr != null) {
@ -1361,12 +1362,24 @@ final class TreeToIr {
case Tree.Group group -> translateType(group.getBody());
case Tree.UnaryOprApp un -> translateType(un.getRhs());
case Tree.Wildcard wild -> new Name.Blank(getIdentifiedLocation(wild), meta(), diag());
case Tree.TypeAnnotated anno -> translateTypeAnnotated(anno);
case Tree.TypeAnnotated anno -> translateTypeAnnotatedToOperator(anno);
default -> translateSyntaxError(tree, new Syntax.UnsupportedSyntax("translateType"));
};
}
/**
* Translate a type-annotated expression.
*/
Expression translateTypeAnnotated(Tree.TypeAnnotated anno) {
var type = translateType(anno.getType());
var expr = translateExpression(anno.getExpression());
return new Type.Ascription(expr, type, Option.empty(), getIdentifiedLocation(anno), meta(), diag());
}
/**
* Translate a type-annotated expression in a context where the IR is a generic binary operator.
*/
Expression translateTypeAnnotatedToOperator(Tree.TypeAnnotated anno) {
var type = translateTypeCallArgument(anno.getType());
var expr = translateCallArgument(anno.getExpression());
var opName = new Name.Literal(anno.getOperator().codeRepr(), true, Option.empty(),
@ -1835,12 +1848,6 @@ final class TreeToIr {
@SuppressWarnings("unchecked")
Export translateExport(Tree.Export exp) {
try {
if (exp.getHiding() != null) {
return translateSyntaxError(exp, invalidExportReason("`hiding` not allowed in `export` statement"));
}
if (exp.getAll() != null) {
return translateSyntaxError(exp, invalidExportReason("`all` not allowed in `export` statement"));
}
Option<Name.Literal> rename;
if (exp.getAs() == null) {
rename = Option.empty();

View File

@ -17,6 +17,7 @@ enso-reflect = { path = "../../reflect" }
lexpr = "0.2.6"
serde = { workspace = true }
serde_json = { workspace = true }
clap = { workspace = true }
[lints]
workspace = true

View File

@ -0,0 +1,17 @@
[package]
name = "enso-parser-fuzz"
version = "0.1.0"
authors = ["Enso Team <enso-dev@enso.org>"]
edition = "2021"
description = "Binary for fuzzing Enso parser with AFL"
readme = "README.md"
homepage = "https://github.com/enso-org/enso"
repository = "https://github.com/enso-org/enso"
license-file = "../../../LICENSE"
[dependencies]
afl = "0.15"
enso-parser = { path = "../../", features = ["debug"] }
[lints]
workspace = true

View File

@ -0,0 +1,19 @@
//! Build:
//! `cargo afl build --profile=fuzz -p enso-parser-fuzz`
//!
//! Run:
//! `cargo afl fuzz -i inputs/ -o outputs/ target/rust/fuzz/enso-parser-fuzz`
use afl::fuzz;
fn main() {
fuzz!(|code: &[u8]| {
if let Ok(code) = std::str::from_utf8(code) {
let parser = enso_parser::Parser::new();
let ast = parser.run(code);
assert_eq!(ast.code(), code);
}
});
}

View File

@ -1,5 +1,6 @@
//! Parses Enso sources, measuring time spent in the parser.
// === Features ===
#![feature(test)]
// === Non-Standard Linter Configuration ===
#![allow(clippy::option_map_unit_fn)]
@ -79,6 +80,7 @@ fn bench_std_lib(b: &mut test::Bencher) {
}
})
.unwrap();
sources.sort_unstable();
let parser = enso_parser::Parser::new();
b.bytes = sources.iter().map(|s| s.len() as u64).sum();
b.iter(|| {

View File

@ -3,6 +3,8 @@
//! Source files may be specified as command line arguments; if none are provided, source code will
//! be read from standard input.
// === Features ===
#![feature(box_patterns)]
// === Non-Standard Linter Configuration ===
#![allow(clippy::option_map_unit_fn)]
#![allow(clippy::precedence)]
@ -14,24 +16,41 @@
use enso_parser::prelude::*;
use clap::Parser;
use std::path::Path;
use std::path::PathBuf;
struct WithSourcePath<T> {
path: String,
path: PathBuf,
value: T,
}
#[derive(Parser)]
struct Cli {
/// Files to check. If none specified, code will be read from standard input.
files: Vec<PathBuf>,
/// Only check if the parser fails to parse the input.
#[arg(short, long)]
smoke_test: bool,
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = std::env::args().skip(1);
let cli = Cli::parse();
let mut to_read = vec![];
let mut to_parse = vec![];
if args.len() == 0 {
if cli.files.is_empty() {
use std::io::Read;
let mut data = String::new();
std::io::stdin().read_to_string(&mut data).unwrap();
to_parse.push(WithSourcePath { path: "<stdin>".into(), value: data });
} else {
to_read.extend(args);
to_read.extend(cli.files);
};
let cores = std::thread::available_parallelism()
.unwrap_or(std::num::NonZeroUsize::new(1).unwrap())
@ -79,7 +98,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
None => break,
}
};
let results = check_file(source, &mut parser);
let results = check_file(source, &mut parser, cli.smoke_test);
to_print.lock().unwrap().push(results);
}
}));
@ -109,21 +128,33 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
fn check_file(
file: WithSourcePath<String>,
parser: &mut enso_parser::Parser,
smoke_test: bool,
) -> WithSourcePath<Vec<String>> {
let mut code = file.value.as_str();
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_;
}
let ast = parser.run(code);
let mut messages = if smoke_test { vec![] } else { collect_messages(&ast, &file.path) };
if ast.code() != code {
messages.push(format!(
"Internal error: AST does not match source code. File: {}",
file.path.display()
));
}
WithSourcePath { path: file.path, value: messages }
}
fn collect_messages(ast: &enso_parser::syntax::Tree, path: impl AsRef<Path>) -> Vec<String> {
let errors = RefCell::new(vec![]);
let warnings = RefCell::new(vec![]);
ast.visit_trees(|tree| {
match &*tree.variant {
match &tree.variant {
enso_parser::syntax::tree::Variant::Invalid(err) => {
let error = format!("{}: {}", err.error.message, tree.code());
errors.borrow_mut().push((error, tree.span.clone()));
}
enso_parser::syntax::tree::Variant::OprApp(enso_parser::syntax::tree::OprApp {
enso_parser::syntax::tree::Variant::OprApp(box enso_parser::syntax::tree::OprApp {
opr: Err(e),
..
}) => {
@ -156,15 +187,19 @@ fn check_file(
warnings.borrow_mut().sort_unstable_by_key(|(_, span)| sort_key(span));
let mut messages = vec![];
for (message, span) in &*errors.borrow() {
messages.push(format!("E {}: {}", fmt_location(&file.path, span), &message));
messages.push(format!("E {}: {}", fmt_location(path.as_ref().display(), span), &message));
}
for (warning, span) in &*warnings.borrow() {
messages.push(format!("W {}: {}", fmt_location(&file.path, span), warning.message()));
messages.push(format!(
"W {}: {}",
fmt_location(path.as_ref().display(), span),
warning.message()
));
}
WithSourcePath { path: file.path, value: messages }
messages
}
fn fmt_location(path: &str, span: &enso_parser::source::Span) -> String {
fn fmt_location(path: impl Display, span: &enso_parser::source::Span) -> String {
let start = span.left_offset.code.position_after().start;
let end = start + span.code_length;
format!("{path} {}:{}-{}:{}", start.line + 1, start.col16, end.line + 1, end.col16)

View File

@ -36,8 +36,19 @@ where T: serde::Serialize + Reflect {
let code: Box<str> = Box::from(code);
let mut to_s_expr = ToSExpr::new(&graph);
to_s_expr.mapper(ast_ty, strip_hidden_fields);
let stringish_tokens =
vec![Digits::reflect(), NumberBase::reflect(), Operator::reflect(), TextSection::reflect()];
let stringish_tokens = vec![
Digits::reflect(),
NumberBase::reflect(),
TextSection::reflect(),
Operator::reflect(),
TypeAnnotationOperator::reflect(),
ArrowOperator::reflect(),
AutoscopeOperator::reflect(),
UnaryOperator::reflect(),
LambdaOperator::reflect(),
DotOperator::reflect(),
SuspensionOperator::reflect(),
];
let stringish_tokens = stringish_tokens.into_iter().map(|t| rust_to_meta[&t.id]);
let skip_tokens = vec![
SuspendedDefaultArguments::reflect(),
@ -48,16 +59,25 @@ where T: serde::Serialize + Reflect {
TextStart::reflect(),
Wildcard::reflect(),
Private::reflect(),
TypeKeyword::reflect(),
ForeignKeyword::reflect(),
CaseKeyword::reflect(),
OfKeyword::reflect(),
AnnotationOperator::reflect(),
AssignmentOperator::reflect(),
];
skip_tokens.into_iter().for_each(|token| to_s_expr.skip(rust_to_meta[&token.id]));
let ident_token = rust_to_meta[&Ident::reflect().id];
let identish_tokens = vec![Ident::reflect(), AllKeyword::reflect()];
let identish_tokens = identish_tokens.into_iter().map(|t| rust_to_meta[&t.id]);
let text_escape_token = rust_to_meta[&TextEscape::reflect().id];
let token_to_str = move |token: Value| {
let range = token_code_range(&token, base);
code[range].to_owned().into_boxed_str()
};
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(ident_token, move |token| Value::symbol(token_to_str_(token)));
for token in identish_tokens {
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(token, move |token| Value::symbol(token_to_str_(token)));
}
for token in stringish_tokens {
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(token, move |token| Value::string(token_to_str_(token)));
@ -66,22 +86,6 @@ where T: serde::Serialize + Reflect {
Value::Cons(cons) => cons.into_pair().0,
_ => panic!(),
};
let simplify_case = |list| {
let list = strip_hidden_fields(list);
let (_, list) = match list {
Value::Cons(cons) => cons.into_pair(),
_ => panic!(),
};
let (expression, list) = match list {
Value::Cons(cons) => cons.into_pair(),
_ => panic!(),
};
let (_, list) = match list {
Value::Cons(cons) => cons.into_pair(),
_ => panic!(),
};
Value::cons(expression, list)
};
let simplify_escape = |mut list| {
let mut last = None;
while let Value::Cons(cons) = list {
@ -98,11 +102,9 @@ where T: serde::Serialize + Reflect {
};
let line = rust_to_meta[&tree::block::Line::reflect().id];
let operator_line = rust_to_meta[&tree::block::OperatorLine::reflect().id];
let case = rust_to_meta[&tree::CaseOf::reflect().id];
let invalid = rust_to_meta[&tree::Invalid::reflect().id];
to_s_expr.mapper(line, into_car);
to_s_expr.mapper(operator_line, into_car);
to_s_expr.mapper(case, simplify_case);
to_s_expr.mapper(invalid, strip_invalid);
to_s_expr.mapper(text_escape_token, simplify_escape);
tuplify(to_s_expr.value(ast_ty, &value))
@ -199,26 +201,28 @@ pub fn validate_spans(
tree: &enso_parser::syntax::tree::Tree,
expected_span: std::ops::Range<u32>,
locations: &mut LocationCheck,
) {
) -> Result<(), String> {
let mut sum_span = None;
fn concat<T: PartialEq + std::fmt::Debug + Copy>(
a: &Option<std::ops::Range<T>>,
b: &std::ops::Range<T>,
) -> std::ops::Range<T> {
match a {
) -> Result<std::ops::Range<T>, String> {
Ok(match a {
Some(a) => {
assert_eq!(a.end, b.start);
if a.end != b.start {
return Err(format!("{:?} != {:?}", &a.end, b.start));
}
a.start..b.end
}
None => b.clone(),
}
})
}
sum_span = Some(concat(&sum_span, &tree.span.left_offset.code.range()));
sum_span = Some(concat(&sum_span, &tree.span.left_offset.code.range())?);
tree.visit_items(|item| match item {
enso_parser::syntax::item::Ref::Token(token) => {
if !(token.left_offset.is_empty() && token.code.is_empty()) {
sum_span = Some(concat(&sum_span, &token.left_offset.code.range()));
sum_span = Some(concat(&sum_span, &token.code.range()));
sum_span = Some(concat(&sum_span, &token.left_offset.code.range()).unwrap());
sum_span = Some(concat(&sum_span, &token.code.range()).unwrap());
}
let left_offset = token.left_offset.code.range();
let code = token.code.range();
@ -226,10 +230,10 @@ pub fn validate_spans(
}
enso_parser::syntax::item::Ref::Tree(tree) => {
let children_span =
concat(&Some(tree.span.left_offset.code.range()), &tree.span.range());
concat(&Some(tree.span.left_offset.code.range()), &tree.span.range()).unwrap();
let children_span_ = children_span.start.utf16..children_span.end.utf16;
validate_spans(tree, children_span_, locations);
sum_span = Some(concat(&sum_span, &children_span));
validate_spans(tree, children_span_, locations).unwrap();
sum_span = Some(concat(&sum_span, &children_span).unwrap());
let left_offset = tree.span.left_offset.code.range();
let code = tree.span.range();
locations.extend(&[left_offset.start, left_offset.end, code.start, code.end]);
@ -242,4 +246,5 @@ pub fn validate_spans(
let sum_span = sum_span.start.utf16..sum_span.end.utf16;
assert_eq!(sum_span, expected_span);
}
Ok(())
}

View File

@ -33,7 +33,9 @@ fn check_file(path: &str, mut code: &str) {
let ast = enso_parser::Parser::new().run(code);
let expected_span = 0..(code.encode_utf16().count() as u32);
let mut locations = enso_parser::source::code::debug::LocationCheck::new();
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations);
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations)
.map_err(|e| format!("{e} in {path}"))
.unwrap();
for (parsed, original) in ast.code().lines().zip(code.lines()) {
assert_eq!(parsed, original, "Bug: dropped tokens, while parsing: {path}");
}

File diff suppressed because it is too large Load Diff

View File

@ -51,7 +51,7 @@ fn extract_docs(_filename: &str, mut code: &str) -> Vec<String> {
}
let ast = enso_parser::Parser::new().run(code);
let docs = RefCell::new(vec![]);
ast.visit_trees(|tree| match &*tree.variant {
ast.visit_trees(|tree| match &tree.variant {
enso_parser::syntax::tree::Variant::Documented(doc) => {
docs.borrow_mut().push(doc.documentation.clone());
}

View File

@ -44,10 +44,11 @@ public final class Parser implements AutoCloseable {
}
System.load(path.getAbsolutePath());
} catch (NullPointerException | IllegalArgumentException | LinkageError e) {
if (searchFromDirToTop(e, root, "target", "rust", "debug", name)) {
if (searchFromDirToTop(e, root, "target", "rust", "parser-jni", name)) {
return;
}
if (searchFromDirToTop(e, new File(".").getAbsoluteFile(), "target", "rust", "debug", name)) {
if (searchFromDirToTop(
e, new File(".").getAbsoluteFile(), "target", "rust", "parser-jni", name)) {
return;
}
throw new IllegalStateException("Cannot load parser from " + root, e);

View File

@ -11,6 +11,7 @@ use crate::syntax::*;
use crate::source::code::Length;
use crate::source::code::Location;
use crate::syntax::token::Codepoint;
use crate::syntax::token::OperatorProperties;
use std::str;
@ -82,15 +83,15 @@ pattern_impl_for_char_slice!(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
/// syntax errors.
#[derive(Debug, Deref, DerefMut)]
#[allow(missing_docs)]
pub struct Lexer<'s> {
pub struct Lexer<'s, Inner> {
#[deref]
#[deref_mut]
state: LexerState,
input: &'s str,
iterator: str::CharIndices<'s>,
output: Vec<Token<'s>>,
/// Memory for storing tokens, reused as an optimization.
token_storage: VecAllocation<Token<'s>>,
token_storage: VecAllocation<token::Newline<'s>>,
inner: Inner,
}
/// Internal state of the [`Lexer`].
@ -103,7 +104,6 @@ pub struct LexerState {
last_spaces_visible_offset: VisibleOffset,
current_block_indent: VisibleOffset,
block_indent_stack: Vec<VisibleOffset>,
internal_error: Option<String>,
stack: Vec<State>,
}
@ -127,15 +127,13 @@ struct Mark<'s> {
offset: Offset<'s>,
}
impl<'s> Lexer<'s> {
impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> {
/// Constructor.
pub fn new(input: &'s str) -> Self {
pub fn new(input: &'s str, inner: Inner) -> Self {
let iterator = input.char_indices();
let capacity = input.len() / AVERAGE_TOKEN_LEN;
let output = Vec::with_capacity(capacity);
let state = default();
let token_storage = default();
Self { input, iterator, output, state, token_storage }.init()
Self { input, iterator, state, token_storage, inner }.init()
}
fn init(mut self) -> Self {
@ -230,18 +228,6 @@ impl<'s> Lexer<'s> {
Token(offset, Code::empty(start), elem)
}
/// Push the [`token`] to the result stream.
#[inline(always)]
fn submit_token(&mut self, token: Token<'s>) {
self.output.push(token);
}
/// Push the [`tokens`] to the result stream.
#[inline(always)]
fn submit_tokens<T: IntoIterator<Item = Token<'s>>>(&mut self, tokens: T) {
self.output.extend(tokens);
}
/// Start a new block.
#[inline(always)]
fn start_block(&mut self, new_indent: VisibleOffset) {
@ -267,7 +253,7 @@ impl<'s> Lexer<'s> {
// === Basic Parsers ===
// =====================
impl<'s> Lexer<'s> {
impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> {
/// Consume the next character, unconditionally.
#[inline(always)]
fn take_next(&mut self) -> bool {
@ -361,7 +347,7 @@ fn is_space_char(t: char) -> bool {
space_char_visible_size(t).is_some()
}
impl<'s> Lexer<'s> {
impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> {
/// Consume a visible space character and return its visible offset.
#[inline(always)]
fn space(&mut self) -> Option<VisibleOffset> {
@ -442,7 +428,7 @@ fn decode_hexadecimal_digit(c: char) -> Option<u8> {
})
}
impl<'s> Lexer<'s> {
impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> {
#[inline(always)]
fn take_rest_of_line(&mut self) {
self.take_while(|t| !is_newline_char(t))
@ -598,7 +584,7 @@ impl token::Variant {
}
}
impl<'s> Lexer<'s> {
impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> {
/// Parse an identifier.
fn ident(&mut self) {
if let Some(token) = self.token(|this| {
@ -606,12 +592,14 @@ impl<'s> Lexer<'s> {
this.take_while_1(is_ident_char);
}
}) {
if token.left_offset.is_empty() {
self.unspaced_term();
if token.code == "private" {
let token = token.with_variant(token::Variant::private());
self.inner.push_token(token);
return;
}
let tp = token::Variant::new_ident_or_wildcard_unchecked(&token.code);
let token = token.with_variant(tp);
self.submit_token(token);
self.inner.push_token(token);
}
}
@ -635,7 +623,7 @@ impl<'s> Lexer<'s> {
// === Operator ===
// ================
impl<'s> Lexer<'s> {
impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> {
/// Parse an operator.
fn operator(&mut self) {
let token = self.token(|this| {
@ -656,171 +644,73 @@ impl<'s> Lexer<'s> {
"+-" => {
let (left, right) = token.split_at(Length::of("+"));
let lhs = analyze_operator(&left.code);
self.submit_token(left.with_variant(token::Variant::operator(lhs)));
self.inner.push_token(left.with_variant(lhs));
// The `-` in this case is not identical to a free `-`: It is only allowed a
// unary interpretation.
let rhs = token::OperatorProperties::new()
.with_unary_prefix_mode(token::Precedence::unary_minus());
self.submit_token(right.with_variant(token::Variant::operator(rhs)));
self.inner.push_token(right.with_variant(token::Variant::negation_operator()));
}
// Composed of operator characters, but not an operator node.
"..." => {
let token = token.with_variant(token::Variant::suspended_default_arguments());
self.submit_token(token);
}
// Decimal vs. method-application must be distinguished before parsing because they
// have different precedences; this is a special case here because the distinction
// requires lookahead.
"." if self.last_spaces_visible_offset.width_in_spaces == 0
&& let Some(char) = self.current_char
&& char.is_ascii_digit() =>
{
let opr = token::OperatorProperties::new()
.with_binary_infix_precedence(81)
.as_decimal();
let token = token.with_variant(token::Variant::operator(opr));
self.submit_token(token);
}
// Operator-identifiers.
_ if self.prev_token_is_dot_operator() => {
let properties = analyze_operator(&token.code);
if properties.is_compile_time_operation() {
self.submit_token(token.with_variant(token::Variant::operator(properties)));
} else {
self.submit_token(
token.with_variant(token::Variant::operator_ident().into()),
);
}
}
// The unary-negation operator binds tighter to numeric literals than other
// expressions.
"-" if self.last_spaces_visible_offset.width_in_spaces == 0
&& let Some(char) = self.current_char
&& char.is_ascii_digit() =>
{
let opr = token::OperatorProperties::new()
.with_unary_prefix_mode(token::Precedence::unary_minus_numeric_literal())
.with_binary_infix_precedence(15);
let token = token.with_variant(token::Variant::operator(opr));
self.submit_token(token);
self.inner.push_token(token);
}
// Normally-structured operator.
_ => {
let tp = token::Variant::operator(analyze_operator(&token.code));
let tp = analyze_operator(&token.code);
let token = token.with_variant(tp);
self.submit_token(token);
self.inner.push_token(token);
}
}
}
}
fn prev_token_is_dot_operator(&self) -> bool {
match self.output.last() {
Some(Token { variant: token::Variant::Operator(operator), .. }) =>
operator.properties.is_dot(),
_ => false,
}
}
fn unspaced_term(&mut self) {
if let Some(Token {
variant:
variant @ token::Variant::Ident(token::variant::Ident {
is_operator_lexically: true,
..
}),
..
}) = self.output.last_mut()
{
*variant = token::Variant::invalid();
}
}
}
// === Precedence ===
fn analyze_operator(token: &str) -> token::OperatorProperties {
let mut operator = token::OperatorProperties::new();
fn analyze_operator(token: &str) -> token::Variant {
match token {
"\\" => token::Variant::lambda_operator(),
"~" => token::Variant::suspension_operator(),
".." => token::Variant::autoscope_operator(),
"@" => token::Variant::annotation_operator(),
"=" => token::Variant::assignment_operator(),
":" => token::Variant::type_annotation_operator(),
"->" => token::Variant::arrow_operator(),
"," => token::Variant::comma_operator(),
"." => token::Variant::dot_operator(),
_ => token::Variant::operator(),
}
}
/// Analyze an operator that has been determined not to be any syntactically-special operator.
pub fn analyze_non_syntactic_operator(token: &str) -> OperatorProperties {
match token {
"-" => OperatorProperties::value()
.with_unary_prefix_mode(token::Precedence::unary_minus())
.with_binary_infix_precedence(15),
"!" => OperatorProperties::value().with_binary_infix_precedence(3),
"||" | "\\\\" | "&&" => OperatorProperties::value().with_binary_infix_precedence(4),
">>" | "<<" => OperatorProperties::functional().with_binary_infix_precedence(5),
"|>" | "|>>" => OperatorProperties::functional().with_binary_infix_precedence(6),
"<|" | "<<|" =>
OperatorProperties::functional().with_binary_infix_precedence(6).as_right_associative(),
"<=" | ">=" => OperatorProperties::value().with_binary_infix_precedence(14),
"==" | "!=" => OperatorProperties::value().with_binary_infix_precedence(5),
_ => analyze_user_operator(token),
}
}
fn analyze_user_operator(token: &str) -> OperatorProperties {
let mut operator = OperatorProperties::new();
let has_right_arrow = token.ends_with("->");
let has_left_arrow = token.starts_with("<-");
if has_right_arrow && !has_left_arrow {
operator = operator.as_right_associative();
}
if token.ends_with('=') && !token.bytes().all(|c| c == b'=') {
match token {
// Inclusive comparison operators are not modifiers.
">=" | "<=" => (),
// Any other operator ending with "=" is a modifier.
_ => operator = operator.as_modifier(),
}
}
match token {
// Operators that can be unary.
"\\" =>
return operator
.with_unary_prefix_mode(token::Precedence::min_valid())
.as_compile_time_operation(),
"~" =>
return operator
.with_unary_prefix_mode(token::Precedence::max())
.as_compile_time_operation()
.as_suspension(),
".." =>
return operator
.with_unary_prefix_mode(token::Precedence::min_valid())
.as_compile_time_operation()
.as_autoscope(),
"@" =>
return operator
.with_unary_prefix_mode(token::Precedence::max())
.as_compile_time_operation()
.as_annotation(),
"-" =>
return operator
.as_value_operation()
.with_unary_prefix_mode(token::Precedence::unary_minus())
.with_binary_infix_precedence(15),
// "There are a few operators with the lowest precedence possible."
// - These 3 "consume everything to the right".
"=" =>
return operator
.with_binary_infix_precedence(1)
.as_right_associative()
.with_lhs_section_termination(operator::SectionTermination::Unwrap)
.as_assignment(),
":" =>
return operator
.with_binary_infix_precedence(2)
.as_right_associative()
.with_lhs_section_termination(operator::SectionTermination::Reify)
.as_compile_time_operation()
.as_type_annotation(),
"->" =>
return operator
.with_binary_infix_precedence(2)
.as_right_associative()
.with_lhs_section_termination(operator::SectionTermination::Unwrap)
.as_compile_time_operation()
.as_arrow(),
"!" => return operator.with_binary_infix_precedence(3).as_value_operation(),
"||" | "\\\\" | "&&" =>
return operator.with_binary_infix_precedence(4).as_value_operation(),
">>" | "<<" => return operator.with_binary_infix_precedence(5),
"|>" | "|>>" => return operator.with_binary_infix_precedence(6),
"<|" | "<<|" => return operator.with_binary_infix_precedence(6).as_right_associative(),
// Other special operators.
"<=" | ">=" => return operator.with_binary_infix_precedence(14).as_value_operation(),
"==" | "!=" => return operator.with_binary_infix_precedence(5).as_value_operation(),
"," =>
return operator
.with_binary_infix_precedence(1)
.as_compile_time_operation()
.as_special()
.as_sequence(),
"." => return operator.with_binary_infix_precedence(80).as_dot(),
_ => (),
// Note that inclusive comparison operators (matched above) are not modifiers.
operator = operator.as_modifier();
}
// "The precedence of all other operators is determined by the operator's Precedence Character:"
let mut precedence_char = None;
@ -845,7 +735,7 @@ fn analyze_operator(token: &str) -> token::OperatorProperties {
'^' => 17,
_ => 18,
};
let operator = operator.with_binary_infix_precedence(binary);
operator = operator.with_binary_infix_precedence(binary);
if !has_right_arrow && !has_left_arrow {
operator.as_value_operation()
} else {
@ -859,14 +749,22 @@ fn analyze_operator(token: &str) -> token::OperatorProperties {
// === Symbols ===
// ===============
impl<'s> Lexer<'s> {
impl<'s, Inner: TokenConsumer<'s> + GroupHierarchyConsumer<'s>> Lexer<'s, Inner> {
/// Parse a symbol.
fn symbol(&mut self) {
if let Some(token) = self.token(|this| this.take_1(&['(', '{', '['])) {
self.submit_token(token.with_variant(token::Variant::open_symbol()));
if let Some(token) = self.token(|this| this.take_1('(')) {
self.inner.start_group(token.with_variant(token::variant::OpenSymbol()));
return;
}
if let Some(token) = self.token(|this| this.take_1(&[')', '}', ']'])) {
self.submit_token(token.with_variant(token::Variant::close_symbol()));
if let Some(token) = self.token(|this| this.take_1(')')) {
self.inner.end_group(token.with_variant(token::variant::CloseSymbol()));
return;
}
if let Some(token) = self.token(|this| this.take_1(&['{', '['])) {
self.inner.push_token(token.with_variant(token::Variant::open_symbol()));
}
if let Some(token) = self.token(|this| this.take_1(&['}', ']'])) {
self.inner.push_token(token.with_variant(token::Variant::close_symbol()));
}
}
}
@ -877,30 +775,22 @@ impl<'s> Lexer<'s> {
// === Number ===
// ==============
impl<'s> Lexer<'s> {
impl<'s, Inner: TokenConsumer<'s>> Lexer<'s, Inner> {
/// Parse a number.
fn number(&mut self) {
let mut base = None;
let token = self.token(|this| {
let mut old_hex_chars_matched = 0;
let mut old_bin_chars_matched = 0;
let mut new_based_chars_matched = 0;
let mut base_chars_matched = 0;
match this.current_char {
Some('0') => new_based_chars_matched = 1,
Some('1') => old_hex_chars_matched = 1,
Some('2') => old_bin_chars_matched = 1,
Some('0') => base_chars_matched = 1,
Some(d) if is_decimal_digit(d) => (),
_ => return,
}
this.next_input_char();
let mut prev_was_underscore = false;
match this.current_char {
Some('_') if old_bin_chars_matched == 1 => base = Some(token::Base::Binary),
Some('_') => prev_was_underscore = true,
Some('b') if new_based_chars_matched == 1 => base = Some(token::Base::Binary),
Some('o') if new_based_chars_matched == 1 => base = Some(token::Base::Octal),
Some('x') if new_based_chars_matched == 1 => base = Some(token::Base::Hexadecimal),
Some('6') if old_hex_chars_matched == 1 => old_hex_chars_matched = 2,
Some('b') if base_chars_matched == 1 => base = Some(token::Base::Binary),
Some('o') if base_chars_matched == 1 => base = Some(token::Base::Octal),
Some('x') if base_chars_matched == 1 => base = Some(token::Base::Hexadecimal),
Some(d) if is_decimal_digit(d) => (),
_ => return,
}
@ -908,19 +798,7 @@ impl<'s> Lexer<'s> {
if base.is_some() {
return;
}
let mut was_underscore = false;
match this.current_char {
Some('_') if old_hex_chars_matched == 2 => {
base = Some(token::Base::Hexadecimal);
this.next_input_char();
return;
}
Some('_') if !prev_was_underscore => was_underscore = true,
Some(d) if is_decimal_digit(d) => (),
_ => return,
}
prev_was_underscore = was_underscore;
this.next_input_char();
let mut prev_was_underscore = false;
loop {
let mut was_underscore = false;
match this.current_char {
@ -933,33 +811,18 @@ impl<'s> Lexer<'s> {
}
});
if let Some(token) = token {
if token.left_offset.is_empty() {
self.unspaced_term();
}
if let Some(base) = base {
self.submit_token(token.with_variant(token::Variant::number_base()));
let after_base = self.current_offset;
self.inner.push_token(token.with_variant(token::Variant::number_base()));
if let Some(digits) = match base {
token::Base::Binary => self.token(|this| this.take_while(is_binary_digit)),
token::Base::Octal => self.token(|this| this.take_while(is_octal_digit)),
token::Base::Hexadecimal =>
self.token(|this| this.take_while(is_hexadecimal_digit)),
} {
// The base and the digits are separate tokens so that they can have separate
// spans. A pseudo-token binds them together tightly so that the parser can
// assemble them into one number node.
let joiner = token::OperatorProperties::new()
.with_binary_infix_precedence(u32::MAX)
.as_token_joiner();
self.submit_token(Token(
Code::empty(after_base),
Code::empty(after_base),
token::Variant::operator(joiner),
));
self.submit_token(digits.with_variant(token::Variant::digits(Some(base))));
self.inner.push_token(digits.with_variant(token::Variant::digits(Some(base))));
}
} else {
self.submit_token(token.with_variant(token::Variant::digits(None)));
self.inner.push_token(token.with_variant(token::Variant::digits(None)));
}
}
}
@ -971,7 +834,9 @@ impl<'s> Lexer<'s> {
// === Text ===
// ============
impl<'s> Lexer<'s> {
impl<'s, Inner> Lexer<'s, Inner>
where Inner: TokenConsumer<'s> + BlockHierarchyConsumer + NewlineConsumer<'s>
{
/// Read a text literal.
fn text(&mut self) {
let (quote_char, text_type) = match self.current_char {
@ -982,15 +847,12 @@ impl<'s> Lexer<'s> {
self.end_splice(state);
} else {
let token = self.token(|this| this.take_next()).unwrap();
self.submit_token(token.with_variant(token::Variant::invalid()));
self.inner.push_token(token.with_variant(token::Variant::invalid()));
}
return;
}
_ => return,
};
if self.last_spaces_visible_offset == VisibleOffset(0) {
self.unspaced_term();
}
let indent = self.current_block_indent;
let open_quote_start = self.mark();
self.take_next();
@ -1021,17 +883,17 @@ impl<'s> Lexer<'s> {
close_quote_start.clone(),
token::Variant::text_start(),
);
self.submit_token(token);
self.inner.push_token(token);
let token =
self.make_token(close_quote_start, close_quote_end, token::Variant::text_end());
self.submit_token(token);
self.inner.push_token(token);
}
} else {
// One quote followed by non-quote character: Inline quote.
let open_quote_end = self.mark_without_whitespace();
let token =
self.make_token(open_quote_start, open_quote_end, token::Variant::text_start());
self.submit_token(token);
self.inner.push_token(token);
self.inline_quote(quote_char, text_type);
}
self.spaces_after_lexeme();
@ -1045,12 +907,12 @@ impl<'s> Lexer<'s> {
) {
let open_quote_end = self.mark_without_whitespace();
let token = self.make_token(open_quote_start, open_quote_end, token::Variant::text_start());
self.submit_token(token);
self.inner.push_token(token);
let mut initial_indent = None;
if text_type.expects_initial_newline()
&& let Some(newline) = self.line_break()
{
self.submit_token(newline.with_variant(token::Variant::text_initial_newline()));
self.inner.push_token(newline.with_variant(token::Variant::text_initial_newline()));
if self.last_spaces_visible_offset > block_indent {
initial_indent = self.last_spaces_visible_offset.into();
}
@ -1072,7 +934,7 @@ impl<'s> Lexer<'s> {
let splice_quote_end = self.mark_without_whitespace();
let token =
self.make_token(splice_quote_start, splice_quote_end, token::Variant::close_symbol());
self.submit_token(token);
self.inner.push_token(token);
match state {
State::InlineText => self.inline_quote('\'', TextType::Interpolated),
State::MultilineText { .. } => {
@ -1120,14 +982,13 @@ impl<'s> Lexer<'s> {
// If `token.code.is_empty()`, we ignore the `token.left_offset` here even if
// it is non-empty, because it will be attached to the newline token.
if !token.code.is_empty() {
self.submit_token(token);
self.inner.push_token(token);
} else {
before_newline = text_start;
}
self.advance_line_pos();
let newline_end = self.mark_without_whitespace();
let token =
self.make_token(before_newline, newline_end, token::Variant::newline());
let token = self.make_newline(before_newline, newline_end);
newlines.push(token);
if let Some(initial) = *initial_indent {
let trim = std::cmp::max(initial, *block_indent + MIN_TEXT_TRIM);
@ -1155,19 +1016,19 @@ impl<'s> Lexer<'s> {
let offset = Offset(VisibleOffset(0), location.clone());
Token(offset, location, token::Variant::text_end())
};
self.submit_token(text_end);
self.end_blocks(indent, newlines.first().as_ref().unwrap());
self.submit_tokens(newlines);
self.inner.push_token(text_end);
self.end_blocks(indent);
newlines.into_iter().for_each(|newline| self.inner.push_newline(newline));
if self.current_offset == text_start.location {
self.last_spaces_visible_offset = text_start.offset.visible;
self.last_spaces_offset = text_start.offset.code.range().start;
}
return TextEndedAt::End;
}
let newlines = newlines
newlines
.into_iter()
.map(|token| token.with_variant(token::Variant::text_newline()));
self.submit_tokens(newlines);
.map(|token| token.with_variant(token::Variant::text_newline()))
.for_each(|newline| self.inner.push_token(newline));
continue;
}
}
@ -1183,7 +1044,7 @@ impl<'s> Lexer<'s> {
if token.code.is_empty() {
backslash_start = text_start.clone();
} else {
self.submit_token(token);
self.inner.push_token(token);
}
self.last_spaces_offset = self.current_offset;
text_start = self.text_escape(backslash_start, char);
@ -1202,7 +1063,7 @@ impl<'s> Lexer<'s> {
if token.code.is_empty() {
splice_quote_start = text_start;
} else {
self.submit_token(token);
self.inner.push_token(token);
}
self.take_next();
let splice_quote_end = self.mark_without_whitespace();
@ -1211,7 +1072,7 @@ impl<'s> Lexer<'s> {
splice_quote_end,
token::Variant::open_symbol(),
);
self.submit_token(token);
self.inner.push_token(token);
self.stack.push(state);
self.last_spaces_offset = self.current_offset;
return TextEndedAt::Splice;
@ -1221,7 +1082,7 @@ impl<'s> Lexer<'s> {
let text_end = self.mark_without_whitespace();
let token = self.make_token(text_start, text_end.clone(), token::Variant::text_section());
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
self.submit_token(token);
self.inner.push_token(token);
}
let end_token = if self.current_char == closing_char {
self.take_next();
@ -1233,7 +1094,7 @@ impl<'s> Lexer<'s> {
Code::empty(self.current_offset),
))
};
self.submit_token(end_token);
self.inner.push_token(end_token);
TextEndedAt::End
}
@ -1271,7 +1132,7 @@ impl<'s> Lexer<'s> {
sequence_end.clone(),
token::Variant::text_escape(value.map(Codepoint::from_u32).unwrap_or_default()),
);
self.submit_token(token);
self.inner.push_token(token);
sequence_end
} else {
let value = match char {
@ -1297,7 +1158,7 @@ impl<'s> Lexer<'s> {
escape_end.clone(),
token::Variant::text_escape(value.map(Codepoint::from_char).unwrap_or_default()),
);
self.submit_token(token);
self.inner.push_token(token);
escape_end
}
}
@ -1329,6 +1190,18 @@ impl<'s> Lexer<'s> {
let end8 = usize_from(end.utf8);
Token(offset, Code::from_str_at_location(&self.input[start8..end8], start), variant)
}
fn make_newline(&self, from: Mark<'s>, to: Mark<'s>) -> token::Newline<'s> {
let Mark { location: start, offset } = from;
let end = to.location;
let start8 = usize_from(start.utf8);
let end8 = usize_from(end.utf8);
Token(
offset,
Code::from_str_at_location(&self.input[start8..end8], start),
token::variant::Newline(),
)
}
}
#[derive(PartialEq, Eq)]
@ -1360,12 +1233,14 @@ impl TextType {
// === Comments ===
// ================
impl<'s> Lexer<'s> {
impl<'s, Inner> Lexer<'s, Inner>
where Inner: TokenConsumer<'s> + BlockHierarchyConsumer + NewlineConsumer<'s>
{
#[inline(always)]
fn submit_line_as(&mut self, kind: token::Variant) {
let token = self.token(|this| this.take_rest_of_line());
if let Some(token) = token {
self.submit_token(token.with_variant(kind));
self.inner.push_token(token.with_variant(kind));
}
}
@ -1380,7 +1255,7 @@ impl<'s> Lexer<'s> {
} else {
self.take_rest_of_line();
let end_line = self.mark();
let token = self.make_token(start, end_line, token::Variant::newline());
let token = self.make_newline(start, end_line);
self.newlines_starting_with(token.into());
}
}
@ -1393,8 +1268,13 @@ impl<'s> Lexer<'s> {
// === Block ===
// =============
impl<'s> Lexer<'s> {
impl<'s, Inner> Lexer<'s, Inner>
where Inner: TokenConsumer<'s> + BlockHierarchyConsumer + NewlineConsumer<'s>
{
fn line_break(&mut self) -> Option<Token<'s, ()>> {
if let Some(state) = self.stack.pop() {
self.end_splice(state);
}
let token = self.token(|this| {
let matched = if this.take_1('\n') {
true
@ -1420,30 +1300,25 @@ impl<'s> Lexer<'s> {
self.newlines_starting_with(None);
}
fn newlines_starting_with(&mut self, first: Option<Token<'s>>) {
fn newlines_starting_with(&mut self, first: Option<token::Newline<'s>>) {
let mut newlines = self.token_storage.take();
newlines.extend(first);
while let Some(token) = self.line_break() {
newlines.push(token.with_variant(token::Variant::newline()));
newlines.push(token.with_variant(token::variant::Newline()));
}
if let Some(first) = newlines.first() {
if !newlines.is_empty() {
let block_indent = self.last_spaces_visible_offset;
if block_indent > self.current_block_indent {
let block_start = {
let location = first.left_offset.code.position_before();
let offset = Offset(VisibleOffset(0), location.clone());
Token(offset, location, token::Variant::block_start())
};
self.submit_token(block_start);
self.inner.start_block();
self.start_block(block_indent);
}
self.end_blocks(block_indent, newlines.first().as_ref().unwrap());
newlines.drain(..).for_each(|token| self.submit_token(token));
self.end_blocks(block_indent);
newlines.drain(..).for_each(|token| self.inner.push_newline(token));
}
self.token_storage.set_from(newlines);
}
fn end_blocks(&mut self, block_indent: VisibleOffset, newline: &Token<'s>) {
fn end_blocks(&mut self, block_indent: VisibleOffset) {
while block_indent < self.current_block_indent {
let Some(previous_indent) = self.block_indent_stack.last().copied() else {
// If the file starts at indent > 0, we treat that as the root indent level
@ -1458,12 +1333,7 @@ impl<'s> Lexer<'s> {
break;
}
self.end_block();
let block_end = {
let location = newline.left_offset.code.position_before();
let offset = Offset(VisibleOffset(0), location.clone());
Token(offset, location, token::Variant::block_end())
};
self.submit_token(block_end);
self.inner.end_block();
}
}
}
@ -1474,39 +1344,56 @@ impl<'s> Lexer<'s> {
// === Glue ===
// ============
/// All defined parsers in order they should be fired. The order is determined by two factors:
/// 1. The most common parsers should be first in order to minimize comparison for each new char.
/// 2. Some parsers could consume input even if it should be qualified as something else. Thus, some
/// parsers should be run first in order to make the token consuming process correct.
const PARSERS: &[for<'r> fn(&'r mut Lexer<'_>)] = &[
|t| t.number(),
|t| t.ident(),
|t| t.operator(),
|t| t.newlines(),
|t| t.symbol(),
|t| t.comment(),
|t| t.text(),
];
impl<'s, Inner> Lexer<'s, Inner>
where Inner: TokenConsumer<'s>
+ Debug
+ BlockHierarchyConsumer
+ GroupHierarchyConsumer<'s>
+ NewlineConsumer<'s>
{
/// Run all defined parsers. The order is determined by two factors:
/// 1. The most common parsers should be first in order to minimize comparison for each new
/// char.
/// 2. Some parsers could consume input even if it should be qualified as something else. Thus,
/// some parsers should be run first in order to make the token consuming process correct.
fn parse_token(&mut self) -> bool {
self.run_and_check_if_progressed(|this| this.number())
|| self.run_and_check_if_progressed(|this| this.ident())
|| self.run_and_check_if_progressed(|this| this.operator())
|| self.run_and_check_if_progressed(|this| this.newlines())
|| self.run_and_check_if_progressed(|this| this.symbol())
|| self.run_and_check_if_progressed(|this| this.comment())
|| self.run_and_check_if_progressed(|this| this.text())
}
}
impl<'s, Inner> Finish for Lexer<'s, Inner>
where Inner: TokenConsumer<'s>
+ Finish
+ Debug
+ BlockHierarchyConsumer
+ GroupHierarchyConsumer<'s>
+ NewlineConsumer<'s>
{
type Result = ParseResult<Inner::Result>;
impl<'s> Lexer<'s> {
/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented
/// as start and end tokens).
pub fn run(mut self) -> ParseResult<Vec<Token<'s>>> {
fn finish(&mut self) -> Self::Result {
// If the first line is indented, open a block for it.
self.spaces_after_lexeme();
let first_block_indent = self.last_spaces_visible_offset;
if first_block_indent.width_in_spaces != 0 {
let start = Location::default();
self.submit_token(token::block_start(Code::empty(start), Code::empty(start)).into());
self.inner.start_block();
self.start_block(first_block_indent);
self.submit_token(token::newline(Code::empty(start), Code::empty(start)).into());
self.inner.push_newline(token::newline(Code::empty(start), Code::empty(start)));
}
// Main parsing loop.
while PARSERS.iter().any(|f| self.run_and_check_if_progressed(f)) {}
while self.parse_token() {}
// If any blocks were still open at EOF, close them.
while self.end_block().is_some() {
let block_end = self.marker_token(token::Variant::block_end());
self.submit_token(block_end);
self.inner.end_block();
}
// If the last line ended in whitespace, ensure it is represented; we'll attach it to a
// phantom newline token.
@ -1517,17 +1404,17 @@ impl<'s> Lexer<'s> {
let visible_offset = self.last_spaces_visible_offset;
let offset =
Offset(visible_offset, Code::from_str_at_location(offset_code, left_offset_start));
let eof = token::variant::Variant::Newline(token::variant::Newline());
self.submit_token(Token(offset, Code::empty(self.current_offset), eof));
}
// Sanity check.
let mut internal_error = self.internal_error.take();
if self.current_char.is_some() {
let message = format!("Lexer did not consume all input. State: {self:?}");
internal_error.get_or_insert(message);
let eof = token::variant::Newline();
self.inner.push_newline(Token(offset, Code::empty(self.current_offset), eof));
}
let internal_error = if self.current_char.is_some() {
format!("Lexer did not consume all input. State: {self:?}").into()
} else {
None
};
debug_assert!(internal_error.is_none());
let value = self.output;
let value = self.inner.finish();
ParseResult { value, internal_error }
}
}
@ -1535,11 +1422,10 @@ impl<'s> Lexer<'s> {
/// Run the lexer. Return non-hierarchical list of tokens (the token groups will be represented
/// as start and end tokens).
pub fn run(input: &'_ str) -> ParseResult<Vec<Token<'_>>> {
Lexer::new(input).run()
Lexer::new(input, vec![]).finish()
}
// =============
// === Tests ===
// =============
@ -1583,7 +1469,7 @@ pub mod test {
/// Constructor.
pub fn operator_<'s>(left_offset: &'s str, code: &'s str) -> Token<'s> {
let variant = Variant::operator(analyze_operator(code));
let variant = analyze_operator(code);
let left_offset = test_code(left_offset);
let code = test_code(code);
Token(left_offset, code, variant)
@ -1617,6 +1503,13 @@ pub mod debug {
}
let mut locations = code::debug::LocationCheck::new();
for token in &tokens {
if matches!(
token.variant,
// Not a token; only constructed as a debug representation.
token::Variant::BlockStart(_) | token::Variant::BlockEnd(_)
) {
continue;
}
let left_offset = token.left_offset.code.range();
let code = token.code.range();
sum_span = Some(concat(&sum_span, &left_offset));
@ -2062,8 +1955,7 @@ mod benches {
let str = &str[..str.len() - 1];
b.iter(move || {
let lexer = Lexer::new(str);
assert_eq!(lexer.run().unwrap().len(), reps);
assert_eq!(run(str).unwrap().len(), reps);
});
}
}

View File

@ -93,6 +93,12 @@
use crate::prelude::*;
use crate::lexer::Lexer;
use crate::source::Code;
use crate::syntax::token;
use crate::syntax::tree::SyntaxError;
use crate::syntax::Finish;
// ==============
// === Export ===
@ -163,11 +169,9 @@ impl Parser {
/// Main entry point.
pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
let tokens = lexer::run(code);
let mut resolver = macros::resolver::Resolver::new_statement();
let result = tokens.map(|tokens| resolver.run(&self.macros, tokens));
let value = result.value;
if let Some(error) = result.internal_error {
let resolver = macros::resolver::Resolver::new(&self.macros);
let ParseResult { value, internal_error } = Lexer::new(code, resolver).finish();
if let Some(error) = internal_error {
return value.with_error(format!("Internal error: {error}"));
}
value
@ -183,271 +187,89 @@ impl Default for Parser {
// == Parsing helpers ==
/// Reinterpret an expression in a statement context (i.e. as a top level member of a block).
///
/// In statement context, an expression that has an assignment operator at its top level is
/// interpreted as a variable assignment or method definition.
fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
use syntax::tree::*;
match &mut *tree.variant {
Variant::Annotated(annotated) => {
annotated.expression = annotated.expression.take().map(expression_to_statement);
}
Variant::AnnotatedBuiltin(annotated) => {
annotated.expression = annotated.expression.take().map(expression_to_statement);
}
Variant::Documented(documented) => {
documented.expression = documented.expression.take().map(expression_to_statement);
}
Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs: None, .. }) => {
return tree.with_error("Expected expression before indented block.");
}
Variant::TypeAnnotated(typed) => {
tree.variant = Box::new(Variant::TypeSignature(TypeSignature {
variable: mem::take(&mut typed.expression),
operator: mem::take(&mut typed.operator),
type_: mem::take(&mut typed.type_),
}));
}
Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs })
if opr.properties.is_assignment() =>
{
let (lhs, return_spec) = match &mut *lhs.variant {
Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) })
if opr.properties.is_arrow() =>
(
lhs,
Some(ReturnSpecification {
arrow: mem::take(opr),
r#type: mem::take(rhs),
}),
),
_ => (lhs, None),
};
let (leftmost, args) = collect_arguments(lhs.clone());
if return_spec.is_none() {
if let Some(rhs) = rhs {
if let Variant::Ident(ident) = &*leftmost.variant
&& ident.token.variant.is_type
{
// If the LHS is a type, this is a (destructuring) assignment.
let lhs = expression_to_pattern(mem::take(lhs));
tree.variant = Box::new(Variant::Assignment(Assignment {
pattern: lhs,
equals: mem::take(opr),
expr: mem::take(rhs),
}));
return tree;
}
if !is_invalid_pattern(&leftmost) && args.is_empty() && !is_body_block(rhs) {
// If the LHS has no arguments, and there is a RHS, and the RHS is not a
// body block, this is a variable assignment.
tree.variant = Box::new(Variant::Assignment(Assignment {
pattern: leftmost,
equals: mem::take(opr),
expr: mem::take(rhs),
}));
return tree;
}
}
}
if is_qualified_name(&leftmost) {
// If this is not a variable assignment, and the leftmost leaf of the `App` tree is
// a qualified name, this is a function definition.
tree.variant = Box::new(Variant::Function(Function {
name: leftmost,
args,
returns: return_spec,
equals: mem::take(opr),
body: mem::take(rhs),
}));
return tree;
}
return tree.with_error("Invalid use of assignment operator `=`.");
}
_ => (),
}
tree
}
/// If this function returns `true`, the input is not valid where a pattern is expected.
fn is_invalid_pattern(tree: &syntax::Tree) -> bool {
use syntax::tree::*;
match &*tree.variant {
Variant::App(App { func: Tree { variant: box Variant::Ident(ident), .. }, arg }) =>
!ident.token.is_type || is_invalid_pattern(arg),
Variant::App(App { func, arg }) => is_invalid_pattern(func) || is_invalid_pattern(arg),
Variant::TypeAnnotated(TypeAnnotated { expression, .. }) => is_invalid_pattern(expression),
_ => false,
}
}
fn is_qualified_name(tree: &syntax::Tree) -> bool {
use syntax::tree::*;
match &*tree.variant {
match &tree.variant {
Variant::Ident(_) => true,
Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) })
if matches!(&*rhs.variant, Variant::Ident(_)) && opr.properties.is_dot() =>
Variant::OprApp(box OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) })
if matches!(rhs.variant, Variant::Ident(_)) && opr.code.repr.0 == "." =>
is_qualified_name(lhs),
_ => false,
}
}
fn expect_qualified_name(tree: syntax::Tree) -> syntax::Tree {
if is_qualified_name(&tree) {
tree
} else {
tree.with_error(SyntaxError::ExpectedQualifiedName)
}
}
fn empty_tree(location: Code) -> syntax::Tree {
syntax::Tree::ident(token::ident(location.clone(), location, false, 0, false, false, false))
}
fn expression_to_pattern(mut input: syntax::Tree<'_>) -> syntax::Tree<'_> {
use syntax::tree::*;
if let Variant::Wildcard(wildcard) = &mut *input.variant {
if let Variant::Wildcard(wildcard) = &mut input.variant {
wildcard.de_bruijn_index = None;
return input;
}
let mut out = match input.variant {
box Variant::TemplateFunction(TemplateFunction { ast, .. }) => expression_to_pattern(ast),
box Variant::Group(Group { open, body: Some(body), close }) =>
Tree::group(open, Some(expression_to_pattern(body)), close),
box Variant::App(App { func, arg }) =>
Tree::app(expression_to_pattern(func), expression_to_pattern(arg)),
box Variant::TypeAnnotated(TypeAnnotated { expression, operator, type_ }) =>
Tree::type_annotated(expression_to_pattern(expression), operator, type_),
box Variant::AutoscopedIdentifier(_) =>
return input.with_error("The autoscope operator (..) cannot be used in a pattern."),
_ => return input,
let mut error = None;
match input.variant {
// === Special-case errors ===
Variant::App(box App { func: Tree { variant: Variant::Ident(ref ident), .. }, .. })
if !ident.token.is_type =>
error = Some(SyntaxError::PatternUnexpectedExpression),
// === Recursions ===
Variant::Group(box Group { body: Some(ref mut body), .. }) =>
transform_tree(body, expression_to_pattern),
Variant::App(box App { ref mut func, ref mut arg }) => {
transform_tree(func, expression_to_pattern);
transform_tree(arg, expression_to_pattern);
}
Variant::TypeAnnotated(box TypeAnnotated { ref mut expression, .. }) =>
transform_tree(expression, expression_to_pattern),
Variant::OprApp(box OprApp { opr: Ok(ref opr), .. }) if opr.code == "." =>
if !is_qualified_name(&input) {
error = Some(SyntaxError::PatternUnexpectedDot);
},
// === Transformations ===
Variant::TemplateFunction(box TemplateFunction { ast, .. }) => {
let mut out = expression_to_pattern(ast);
out.span.left_offset += input.span.left_offset;
return out;
}
// === Unconditional and fallthrough errors ===
Variant::AutoscopedIdentifier(_) => error = Some(SyntaxError::PatternUnexpectedExpression),
Variant::OprApp(_) => error = Some(SyntaxError::PatternUnexpectedExpression),
// === Unhandled ===
_ => {}
};
out.span.left_offset += input.span.left_offset;
out
maybe_with_error(input, error)
}
fn collect_arguments(tree: syntax::Tree) -> (syntax::Tree, Vec<syntax::tree::ArgumentDefinition>) {
let mut args = vec![];
let tree = unroll_arguments(tree, &mut args);
args.reverse();
(tree, args)
thread_local! {
static DEFAULT_TREE: RefCell<Option<syntax::Tree<'static>>> = default();
}
fn collect_arguments_inclusive(tree: syntax::Tree) -> Vec<syntax::tree::ArgumentDefinition> {
let mut args = vec![];
let first = unroll_arguments(tree, &mut args);
args.push(parse_argument_definition(first));
args.reverse();
args
}
fn unroll_arguments<'s>(
mut tree: syntax::Tree<'s>,
args: &mut Vec<syntax::tree::ArgumentDefinition<'s>>,
) -> syntax::Tree<'s> {
while let Some(arg) = parse_argument_application(&mut tree) {
args.push(arg);
}
tree
}
/// Try to parse the expression as an application of a function to an `ArgumentDefinition`. If it
/// matches, replace the expression with its LHS, and return the `ArgumentDefinition` node.
pub fn parse_argument_application<'s>(
expression: &'_ mut syntax::Tree<'s>,
) -> Option<syntax::tree::ArgumentDefinition<'s>> {
use syntax::tree::*;
match &mut expression.variant {
box Variant::App(App { func, arg }) => {
let arg = parse_argument_definition(arg.clone());
func.span.left_offset += expression.span.left_offset.take_as_prefix();
*expression = func.clone();
Some(arg)
}
box Variant::NamedApp(NamedApp { func, open, name, equals, arg, close }) => {
let open = mem::take(open);
let close = mem::take(close);
let equals = equals.clone();
let pattern = Tree::ident(name.clone());
let open2 = default();
let suspension = default();
let close2 = default();
let type_ = default();
let default = Some(ArgumentDefault { equals, expression: arg.clone() });
func.span.left_offset += expression.span.left_offset.take_as_prefix();
*expression = func.clone();
Some(ArgumentDefinition {
open,
open2,
pattern,
suspension,
default,
close2,
type_,
close,
})
}
_ => None,
}
}
/// Interpret the expression as an element of an argument definition sequence.
pub fn parse_argument_definition(mut pattern: syntax::Tree) -> syntax::tree::ArgumentDefinition {
use syntax::tree::*;
let mut open1 = default();
let mut close1 = default();
if let box Variant::Group(Group { mut open, body: Some(mut body), close }) = pattern.variant {
*(if let Some(open) = open.as_mut() {
&mut open.left_offset
} else {
&mut body.span.left_offset
}) += pattern.span.left_offset;
open1 = open;
close1 = close;
pattern = body;
}
let mut default_ = default();
if let Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }) =
&*pattern.variant
&& opr.properties.is_assignment()
{
let left_offset = pattern.span.left_offset;
default_ = Some(ArgumentDefault { equals: opr.clone(), expression: rhs.clone() });
pattern = lhs.clone();
pattern.span.left_offset += left_offset;
}
let mut open2 = default();
let mut close2 = default();
if let box Variant::Group(Group { mut open, body: Some(mut body), close }) = pattern.variant {
*(if let Some(open) = open.as_mut() {
&mut open.left_offset
} else {
&mut body.span.left_offset
}) += pattern.span.left_offset;
open2 = open;
close2 = close;
pattern = body;
}
let mut type__ = default();
if let box Variant::TypeAnnotated(TypeAnnotated { mut expression, operator, type_ }) =
pattern.variant
{
expression.span.left_offset += pattern.span.left_offset;
type__ = Some(ArgumentType { operator, type_ });
pattern = expression;
}
let mut suspension = default();
if let box Variant::TemplateFunction(TemplateFunction { mut ast, .. }) = pattern.variant {
ast.span.left_offset += pattern.span.left_offset;
pattern = ast;
}
if let Variant::UnaryOprApp(UnaryOprApp { opr, rhs: Some(rhs) }) = &*pattern.variant
&& opr.properties.is_suspension()
{
let mut opr = opr.clone();
opr.left_offset += pattern.span.left_offset;
suspension = Some(opr);
pattern = rhs.clone();
}
let pattern = expression_to_pattern(pattern);
let open = open1;
let close = close1;
let type_ = type__;
ArgumentDefinition { open, open2, pattern, suspension, default: default_, close2, type_, close }
}
/// Return whether the expression is a body block.
fn is_body_block(expression: &syntax::tree::Tree<'_>) -> bool {
matches!(&*expression.variant, syntax::tree::Variant::BodyBlock { .. })
fn transform_tree(tree: &mut syntax::Tree, f: impl FnOnce(syntax::Tree) -> syntax::Tree) {
let default: syntax::Tree<'static> =
DEFAULT_TREE.with(|default| default.borrow_mut().take()).unwrap_or_default();
let original = mem::replace(tree, default);
let transformed = f(original);
let default_returned = mem::replace(tree, transformed);
// This lifetime cast is sound because this is the same value as `default` above; its lifetime
// was narrowed by the type system when it was stored in the `tree` reference.
#[allow(unsafe_code)]
let default_returned =
unsafe { mem::transmute::<syntax::Tree<'_>, syntax::Tree<'static>>(default_returned) };
DEFAULT_TREE.with(|default| *default.borrow_mut() = Some(default_returned));
}

View File

@ -3,8 +3,14 @@
use crate::macros::pattern::*;
use crate::macros::*;
use crate::empty_tree;
use crate::expect_qualified_name;
use crate::source::Code;
use crate::syntax::operator;
use crate::syntax::token;
use crate::syntax::tree::SyntaxError;
use crate::syntax::Item;
use crate::syntax::Token;
@ -22,7 +28,6 @@ fn expression() -> resolver::SegmentMap<'static> {
let mut macro_map = resolver::SegmentMap::default();
macro_map.register(if_then());
macro_map.register(if_then_else());
macro_map.register(group());
macro_map.register(lambda());
macro_map.register(case());
macro_map.register(array());
@ -38,9 +43,6 @@ fn statement() -> resolver::SegmentMap<'static> {
let mut macro_map = resolver::SegmentMap::default();
register_import_macros(&mut macro_map);
register_export_macros(&mut macro_map);
macro_map.register(type_def());
macro_map.register(private());
macro_map.register(foreign());
macro_map
}
@ -93,7 +95,7 @@ fn import_body<'s>(
body = Some(
precedence
.resolve(tokens)
.map(expect_qualified)
.map(expect_qualified_name)
.unwrap_or_else(|| expected_nonempty(header.code.position_after())),
);
&mut from
@ -101,7 +103,7 @@ fn import_body<'s>(
"import" => {
let expect = match from {
Some(_) => expect_ident,
None => expect_qualified,
None => expect_qualified_name,
};
body = sequence_tree(precedence, tokens, expect);
incomplete_import = body.is_none();
@ -109,7 +111,7 @@ fn import_body<'s>(
}
"all" => {
debug_assert!(tokens.is_empty());
all = Some(into_ident(header));
all = Some(header.with_variant(token::variant::AllKeyword()));
incomplete_import = false;
continue;
}
@ -180,7 +182,7 @@ fn export_body<'s>(
body = Some(
precedence
.resolve(tokens)
.map(expect_qualified)
.map(expect_qualified_name)
.unwrap_or_else(|| expected_nonempty(header.code.position_after())),
);
&mut from
@ -188,7 +190,7 @@ fn export_body<'s>(
"export" => {
let expect = match from {
Some(_) => expect_ident,
None => expect_qualified,
None => expect_qualified_name,
};
body = sequence_tree(precedence, tokens, expect);
incomplete_export = body.is_none();
@ -196,9 +198,9 @@ fn export_body<'s>(
}
"all" => {
debug_assert!(tokens.is_empty());
all = Some(into_ident(header));
body = None;
incomplete_export = false;
continue;
&mut all
}
"as" => {
body = Some(
@ -220,11 +222,23 @@ fn export_body<'s>(
};
*field = Some(syntax::tree::MultiSegmentAppSegment { header, body });
}
let export = syntax::Tree::export(from, export.unwrap(), all, as_, hiding);
if incomplete_export {
return export.with_error("Expected name or `all` keyword following `export` keyword.");
}
export
let export = export.unwrap();
let error = if all.is_some() {
SyntaxError::ImportsNoAllInExport
} else if hiding.is_some() {
SyntaxError::ImportsNoHidingInExport
} else if incomplete_export {
SyntaxError::ImportsExpectedNameInExport
} else {
return syntax::Tree::export(from, export, as_);
};
let mut segments = vec![];
segments.extend(from);
segments.push(export);
segments.extend(all);
segments.extend(as_);
segments.extend(hiding);
return syntax::Tree::multi_segment_app(segments.try_into().unwrap()).with_error(error);
}
/// If-then-else macro definition.
@ -242,200 +256,7 @@ fn if_body<'s>(
segments: NonEmptyVec<MatchedSegment<'s>>,
precedence: &mut operator::Precedence<'s>,
) -> syntax::Tree<'s> {
use syntax::tree::*;
let segments = segments.mapped(|s| {
let header = s.header;
let body = s.result.tokens();
let body = match precedence.resolve(body) {
Some(Tree {
variant:
box Variant::ArgumentBlockApplication(ArgumentBlockApplication {
lhs: None,
arguments,
}),
span,
..
}) => {
let mut block = block::body_from_lines(arguments);
block.span.left_offset += span.left_offset;
Some(block)
}
e => e,
};
MultiSegmentAppSegment { header, body }
});
Tree::multi_segment_app(segments)
}
/// Group macro definition.
pub fn group<'s>() -> Definition<'s> {
crate::macro_definition! {("(", everything(), ")", nothing()) group_body}
}
fn group_body<'s>(
segments: NonEmptyVec<MatchedSegment<'s>>,
precedence: &mut operator::Precedence<'s>,
) -> syntax::Tree<'s> {
let (close, mut segments) = segments.pop();
let close = into_close_symbol(close.header);
let segment = segments.pop().unwrap();
let open = into_open_symbol(segment.header);
let body = segment.result.tokens();
let body = precedence.resolve(body);
syntax::Tree::group(Some(open), body, Some(close))
}
/// Type definitions.
fn type_def<'s>() -> Definition<'s> {
crate::macro_definition! {("type", everything()) type_def_body}
}
fn type_def_body<'s>(
matched_segments: NonEmptyVec<MatchedSegment<'s>>,
precedence: &mut operator::Precedence<'s>,
) -> syntax::Tree<'s> {
use syntax::tree::*;
let segment = matched_segments.pop().0;
let header = into_ident(segment.header);
let mut tokens = segment.result.tokens();
let mut block = vec![];
if let Some(syntax::Item::Block(lines)) = tokens.last_mut() {
block = mem::take(lines);
tokens.pop();
}
let mut tokens = tokens.into_iter();
let name = match tokens.next() {
Some(syntax::Item::Token(syntax::Token {
left_offset,
code,
variant: syntax::token::Variant::Ident(ident),
})) => syntax::Token(left_offset, code, ident),
_ => return Tree::ident(header).with_error("Expected identifier after `type` keyword."),
};
let params = precedence
.resolve_non_section(tokens)
.map(crate::collect_arguments_inclusive)
.unwrap_or_default();
for line in &mut block {
if let Some(syntax::Item::Token(syntax::Token { variant, .. })) = line.items.first_mut()
&& let syntax::token::Variant::Operator(operator) = variant
&& !operator.properties.is_annotation()
{
let opr_ident =
syntax::token::variant::Ident { is_operator_lexically: true, ..default() };
*variant = syntax::token::Variant::Ident(opr_ident);
}
}
let parse_line = |syntax::item::Line { newline, items }| block::Line {
newline,
expression: precedence.resolve(items),
};
let body = block::compound_lines(block.into_iter().map(parse_line))
.map(|line| line.map_expression(to_body_statement))
.collect();
Tree::type_def(header, name, params, body)
}
fn to_body_statement(mut line_expression: syntax::Tree<'_>) -> syntax::Tree<'_> {
use syntax::tree::*;
// Unwrap `Private` tree from any `Invalid` added in expression context; it will be revalidated
// in the new context.
if let Tree {
variant:
box Variant::Invalid(Invalid {
ast: mut inner @ Tree { variant: box Variant::Private(_), .. },
..
}),
span,
..
} = line_expression
{
inner.span = span;
return to_body_statement(inner);
}
// Recurse into body of `Private` keyword; validate usage of the keyword in type-body context.
if let Tree { variant: box Variant::Private(ref mut private), .. } = &mut line_expression {
let body_statement = private.body.take().map(to_body_statement);
let error = match body_statement.as_ref().map(|tree| &*tree.variant) {
Some(Variant::ConstructorDefinition(_)) => None,
Some(Variant::Function(_)) => None,
None => Some("Expected declaration after `private` keyword in type definition."),
_ => Some("The `private` keyword inside a type definition may only be applied to a constructor definition or a method."),
};
private.body = body_statement;
return match error {
Some(error) => line_expression.with_error(error),
None => line_expression,
};
}
if let Tree { variant: box Variant::Documented(Documented { expression, .. }), .. } =
&mut line_expression
{
*expression = expression.take().map(to_body_statement);
return line_expression;
}
if let Tree { variant: box Variant::Annotated(Annotated { expression, .. }), .. } =
&mut line_expression
{
*expression = expression.take().map(to_body_statement);
return line_expression;
}
let mut last_argument_default = default();
let mut left_offset = line_expression.span.left_offset.position_before();
let lhs = match &line_expression {
Tree {
variant: box Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }),
span,
..
} if opr.properties.is_assignment() => {
left_offset = span.left_offset.clone();
last_argument_default = Some((opr.clone(), rhs.clone()));
lhs
}
Tree {
variant:
box Variant::ArgumentBlockApplication(ArgumentBlockApplication {
lhs: Some(Tree { variant: box Variant::Ident(ident), span: span_, .. }),
arguments,
}),
span,
..
} => {
let mut constructor = ident.token.clone();
constructor.left_offset += &span.left_offset;
constructor.left_offset += &span_.left_offset;
let block = arguments
.iter()
.cloned()
.map(|block::Line { newline, expression }| ArgumentDefinitionLine {
newline,
argument: expression.map(crate::parse_argument_definition),
})
.collect();
let arguments = default();
return Tree::constructor_definition(constructor, arguments, block);
}
_ => &line_expression,
};
let (constructor, mut arguments) = crate::collect_arguments(lhs.clone());
if let Tree { variant: box Variant::Ident(Ident { token }), span, .. } = constructor
&& token.is_type
{
let mut constructor = token;
constructor.left_offset += left_offset;
constructor.left_offset += span.left_offset;
if let Some((equals, expression)) = last_argument_default
&& let Some(ArgumentDefinition { open: None, default, close: None, .. }) =
arguments.last_mut()
&& default.is_none()
{
*default = Some(ArgumentDefault { equals, expression });
}
let block = default();
return Tree::constructor_definition(constructor, arguments, block);
}
crate::expression_to_statement(line_expression)
capture_expressions(segments, precedence)
}
/// Lambda expression.
@ -452,9 +273,8 @@ fn lambda_body<'s>(
) -> syntax::Tree<'s> {
let (segment, _) = segments.pop();
let operator = segment.header;
let syntax::token::Token { left_offset, code, .. } = operator;
let properties = syntax::token::OperatorProperties::default();
let operator = syntax::token::operator(left_offset, code, properties);
let Token { left_offset, code, .. } = operator;
let operator = token::lambda_operator(left_offset, code);
let arrow = segment.result.tokens();
let arrow = precedence.resolve(arrow);
syntax::Tree::lambda(operator, arrow)
@ -472,25 +292,25 @@ fn case_body<'s>(
use syntax::tree::*;
let (of, mut rest) = segments.pop();
let case = rest.pop().unwrap();
let case_ = into_ident(case.header);
let case_ = case.header.with_variant(token::variant::CaseKeyword());
let expression = case.result.tokens();
let expression = precedence.resolve(expression);
let of_ = into_ident(of.header);
let of_ = of.header.with_variant(token::variant::OfKeyword());
let mut case_builder = CaseBuilder::default();
let mut initial_case = vec![];
let mut block = default();
for item in of.result.tokens() {
match item {
syntax::Item::Block(lines) => block = lines,
Item::Block(lines) => block = lines,
_ => initial_case.push(item),
}
}
if !initial_case.is_empty() {
let location = of_.code.position_after();
let newline = syntax::token::newline(location.clone(), location);
let newline = token::newline(location.clone(), location);
case_builder.push(syntax::item::Line { newline, items: initial_case });
}
block.into_iter().for_each(|line| case_builder.push(line));
block.into_vec().into_iter().for_each(|line| case_builder.push(line));
let (case_lines, any_invalid) = case_builder.finish();
let tree = Tree::case_of(case_, expression, of_, case_lines);
if any_invalid {
@ -504,10 +324,10 @@ struct CaseBuilder<'s> {
// Case components
documentation: Option<syntax::tree::DocComment<'s>>,
pattern: Option<syntax::Tree<'s>>,
arrow: Option<syntax::token::Operator<'s>>,
arrow: Option<token::ArrowOperator<'s>>,
// Within-case state
spaces: bool,
tokens: Vec<syntax::Item<'s>>,
tokens: Vec<Item<'s>>,
resolver: operator::Precedence<'s>,
// Output
case_lines: Vec<syntax::tree::CaseLine<'s>>,
@ -519,22 +339,16 @@ impl<'s> CaseBuilder<'s> {
let syntax::item::Line { newline, items } = line;
self.case_lines.push(syntax::tree::CaseLine { newline: newline.into(), ..default() });
for token in items {
if self.arrow.is_none()
&& let syntax::Item::Token(syntax::Token {
left_offset,
code,
variant: syntax::token::Variant::Operator(op),
}) = &token
&& op.properties.is_arrow()
&& !left_offset.is_empty()
{
self.resolver.extend(self.tokens.drain(..));
self.arrow =
Some(syntax::token::operator(left_offset.clone(), code.clone(), op.properties));
self.pattern = self.resolver.finish().map(crate::expression_to_pattern);
continue;
}
if let syntax::Item::Token(syntax::Token { left_offset, .. }) = &token {
if let Item::Token(token @ Token { left_offset, variant, .. }) = &token {
if self.arrow.is_none()
&& let token::Variant::ArrowOperator(arrow_op) = variant
&& !left_offset.is_empty()
{
self.resolver.extend(self.tokens.drain(..));
self.arrow = Some(token.clone().with_variant(*arrow_op));
self.pattern = self.resolver.finish().map(crate::expression_to_pattern);
continue;
}
self.spaces = self.spaces || (!left_offset.is_empty() && !self.tokens.is_empty());
}
self.tokens.push(token);
@ -545,18 +359,11 @@ impl<'s> CaseBuilder<'s> {
fn finish_line(&mut self) {
if self.arrow.is_none() && !self.spaces {
for (i, token) in self.tokens.iter().enumerate() {
if let syntax::Item::Token(syntax::Token {
left_offset,
code,
variant: syntax::token::Variant::Operator(op),
}) = &token
&& op.properties.is_arrow()
if let Item::Token(
token @ Token { variant: token::Variant::ArrowOperator(arrow_op), .. },
) = token
{
self.arrow = Some(syntax::token::operator(
left_offset.clone(),
code.clone(),
op.properties,
));
self.arrow = Some(token.clone().with_variant(*arrow_op));
let including_arrow = self.tokens.drain(..=i);
self.resolver.extend(including_arrow.take(i));
self.pattern = self.resolver.finish().map(crate::expression_to_pattern);
@ -572,7 +379,7 @@ impl<'s> CaseBuilder<'s> {
Some(syntax::Tree {
span,
variant:
box syntax::tree::Variant::Documented(syntax::tree::Documented {
syntax::tree::Variant::Documented(box syntax::tree::Documented {
mut documentation,
expression: None,
}),
@ -586,18 +393,6 @@ impl<'s> CaseBuilder<'s> {
case.documentation = documentation.into();
return;
}
Some(syntax::Tree {
span,
variant:
box syntax::tree::Variant::ArgumentBlockApplication(
syntax::tree::ArgumentBlockApplication { lhs: None, arguments },
),
..
}) => {
let mut block = syntax::tree::block::body_from_lines(arguments);
block.span.left_offset += span.left_offset;
Some(block)
}
e => e,
};
if pattern.is_none() && arrow.is_none() && expression.is_none() {
@ -647,10 +442,10 @@ fn tuple_body<'s>(
}
struct GroupedSequence<'s> {
left: syntax::token::OpenSymbol<'s>,
left: token::OpenSymbol<'s>,
first: Option<syntax::Tree<'s>>,
rest: Vec<syntax::tree::OperatorDelimitedTree<'s>>,
right: syntax::token::CloseSymbol<'s>,
right: token::CloseSymbol<'s>,
}
fn grouped_sequence<'s>(
@ -667,23 +462,19 @@ fn grouped_sequence<'s>(
fn sequence<'s>(
precedence: &mut operator::Precedence<'s>,
tokens: impl IntoIterator<Item = syntax::Item<'s>>,
tokens: impl IntoIterator<Item = Item<'s>>,
) -> (Option<syntax::Tree<'s>>, Vec<syntax::tree::OperatorDelimitedTree<'s>>) {
use syntax::tree::*;
let mut first = None;
let mut rest: Vec<OperatorDelimitedTree<'s>> = default();
for token in tokens {
match token {
syntax::Item::Token(syntax::Token {
left_offset,
code,
variant: syntax::token::Variant::Operator(op),
}) if op.properties.is_sequence() => {
Item::Token(Token { left_offset, code, variant: token::Variant::CommaOperator(_) }) => {
*(match rest.last_mut() {
Some(rest) => &mut rest.body,
None => &mut first,
}) = precedence.finish();
let operator = syntax::Token(left_offset, code, op);
let operator = Token(left_offset, code, token::variant::Operator());
rest.push(OperatorDelimitedTree { operator, body: default() });
}
_ => {
@ -700,7 +491,7 @@ fn sequence<'s>(
fn sequence_tree<'s>(
precedence: &mut operator::Precedence<'s>,
tokens: impl IntoIterator<Item = syntax::Item<'s>>,
tokens: impl IntoIterator<Item = Item<'s>>,
mut f: impl FnMut(syntax::Tree<'s>) -> syntax::Tree<'s>,
) -> Option<syntax::Tree<'s>> {
use syntax::tree::*;
@ -735,14 +526,6 @@ fn splice_body<'s>(
syntax::Tree::text_literal(default(), default(), vec![splice], default())
}
fn foreign<'s>() -> Definition<'s> {
crate::macro_definition! {("foreign", everything()) foreign_body}
}
fn private<'s>() -> Definition<'s> {
crate::macro_definition! {("private", everything()) private_keyword}
}
fn skip<'s>() -> Definition<'s> {
crate::macro_definition! {("SKIP", everything()) capture_expressions}
}
@ -751,35 +534,6 @@ fn freeze<'s>() -> Definition<'s> {
crate::macro_definition! {("FREEZE", everything()) capture_expressions}
}
/// private can be either specified as the very first statement in the module, marking the
/// whole module as private. Or it can be prepended to some definitions. For example it can
/// be prepended to atom constructor definition and a method.
fn private_keyword<'s>(
segments: NonEmptyVec<MatchedSegment<'s>>,
precedence: &mut operator::Precedence<'s>,
) -> syntax::Tree<'s> {
use syntax::tree::*;
let segment = segments.pop().0;
let keyword = into_private(segment.header);
let body_opt = precedence.resolve(segment.result.tokens());
match body_opt {
Some(body) => {
let statement = crate::expression_to_statement(body);
match statement.variant {
box Variant::ConstructorDefinition(_) => Tree::private(keyword, Some(statement)),
box Variant::Function(_) => Tree::private(keyword, Some(statement)),
_ => Tree::private(keyword, Some(statement))
.with_error("The 'private' keyword cannot be applied to this expression"),
}
}
None => {
// Just a private keyword without a body. This is valid as the first statement in the
// module, to declare the module as private.
Tree::private(keyword, None)
}
}
}
/// Macro body builder that just parses the tokens of each segment as expressions, and places them
/// in a [`MultiSegmentApp`].
fn capture_expressions<'s>(
@ -795,131 +549,57 @@ fn capture_expressions<'s>(
}))
}
fn foreign_body<'s>(
segments: NonEmptyVec<MatchedSegment<'s>>,
precedence: &mut operator::Precedence<'s>,
) -> syntax::Tree<'s> {
let segment = segments.pop().0;
let keyword = into_ident(segment.header);
let tokens = segment.result.tokens().into_iter();
match try_foreign_body(keyword.clone(), tokens.clone(), precedence) {
Ok(foreign) => foreign,
Err(error) => (match precedence.resolve(tokens) {
Some(rhs) => syntax::Tree::app(keyword.into(), rhs),
None => keyword.into(),
})
.with_error(error),
}
}
fn try_foreign_body<'s>(
keyword: syntax::token::Ident<'s>,
tokens: impl IntoIterator<Item = syntax::Item<'s>>,
precedence: &mut operator::Precedence<'s>,
) -> Result<syntax::Tree<'s>, &'static str> {
let mut tokens = tokens.into_iter();
let language = tokens
.next()
.and_then(try_into_token)
.and_then(try_token_into_ident)
.ok_or("Expected an identifier specifying foreign method's language.")?;
let expected_name = "Expected an identifier specifying foreign function's name.";
let function = precedence.resolve(tokens).ok_or(expected_name)?;
let expected_function = "Expected a function definition after foreign declaration.";
let box syntax::tree::Variant::OprApp(syntax::tree::OprApp {
lhs: Some(lhs),
opr: Ok(equals),
rhs: Some(body),
}) = function.variant
else {
return Err(expected_function);
};
if !equals.properties.is_assignment() {
return Err(expected_function);
};
if !matches!(body.variant, box syntax::tree::Variant::TextLiteral(_)) {
return Err("Expected a text literal as body of `foreign` declaration.");
}
let (name, args) = crate::collect_arguments(lhs);
let mut name = try_tree_into_ident(name).ok_or(expected_name)?;
name.left_offset += function.span.left_offset;
Ok(syntax::Tree::foreign_function(keyword, language, name, args, equals, body))
}
// === Token conversions ===
fn try_into_token(item: syntax::Item) -> Option<syntax::Token> {
fn try_into_token(item: Item) -> Option<Token> {
match item {
syntax::Item::Token(token) => Some(token),
Item::Token(token) => Some(token),
_ => None,
}
}
fn try_token_into_ident(token: syntax::Token) -> Option<syntax::token::Ident> {
fn try_token_into_ident(token: Token) -> Option<token::Ident> {
match token.variant {
syntax::token::Variant::Ident(ident) => {
let syntax::token::Token { left_offset, code, .. } = token;
Some(syntax::Token(left_offset, code, ident))
token::Variant::Ident(ident) => {
let Token { left_offset, code, .. } = token;
Some(Token(left_offset, code, ident))
}
_ => None,
}
}
fn try_tree_into_ident(tree: syntax::Tree) -> Option<syntax::token::Ident> {
fn try_tree_into_ident(tree: syntax::Tree) -> Option<token::Ident> {
match tree.variant {
box syntax::tree::Variant::Ident(syntax::tree::Ident { token }) => Some(token),
syntax::tree::Variant::Ident(box syntax::tree::Ident { token }) => Some(token),
_ => None,
}
}
fn into_open_symbol(token: syntax::token::Token) -> syntax::token::OpenSymbol {
let syntax::token::Token { left_offset, code, .. } = token;
syntax::token::open_symbol(left_offset, code)
fn into_open_symbol(token: Token) -> token::OpenSymbol {
let Token { left_offset, code, .. } = token;
token::open_symbol(left_offset, code)
}
fn into_close_symbol(token: syntax::token::Token) -> syntax::token::CloseSymbol {
let syntax::token::Token { left_offset, code, .. } = token;
syntax::token::close_symbol(left_offset, code)
fn into_close_symbol(token: Token) -> token::CloseSymbol {
let Token { left_offset, code, .. } = token;
token::close_symbol(left_offset, code)
}
fn into_ident(token: syntax::token::Token) -> syntax::token::Ident {
let syntax::token::Token { left_offset, code, .. } = token;
syntax::token::ident(left_offset, code, false, 0, false, false, false)
}
fn into_private(token: syntax::token::Token) -> syntax::token::Private {
let syntax::token::Token { left_offset, code, .. } = token;
syntax::token::private(left_offset, code)
fn into_ident<T>(token: Token<T>) -> token::Ident {
token.with_variant(token::variant::Ident(false, 0, false, false, false))
}
// === Validators ===
fn expect_ident(tree: syntax::Tree) -> syntax::Tree {
if matches!(&*tree.variant, syntax::tree::Variant::Ident(_)) {
if matches!(tree.variant, syntax::tree::Variant::Ident(_)) {
tree
} else {
tree.with_error("Expected identifier.")
}
}
fn expect_qualified(tree: syntax::Tree) -> syntax::Tree {
if crate::is_qualified_name(&tree) {
tree
} else {
tree.with_error("Expected qualified name.")
}
}
fn expected_nonempty(location: Code) -> syntax::Tree {
let empty = syntax::Tree::ident(syntax::token::ident(
location.clone(),
location,
false,
0,
false,
false,
false,
));
empty.with_error("Expected tokens.")
empty_tree(location).with_error("Expected tokens.")
}

View File

@ -185,7 +185,7 @@ pub enum Match<'s> {
Identifier(syntax::Item<'s>),
Expected(String, Box<Match<'s>>),
Named(String, Box<Match<'s>>),
Block(Vec<syntax::item::Line<'s>>),
Block(Box<[syntax::item::Line<'s>]>),
NotBlock(syntax::Item<'s>),
}

View File

@ -30,6 +30,12 @@ use crate::source::Code;
use crate::syntax;
use crate::syntax::token;
use crate::syntax::token::Token;
use crate::syntax::BlockHierarchyConsumer;
use crate::syntax::Finish;
use crate::syntax::GroupHierarchyConsumer;
use crate::syntax::Item;
use crate::syntax::NewlineConsumer;
use crate::syntax::TokenConsumer;
use enso_data_structures::im_list::List;
use std::collections::HashMap;
@ -122,16 +128,17 @@ impl<'a> SegmentMap<'a> {
/// Macro resolver capable of resolving nested macro usages. See the docs of the main parser module
/// to learn more about the macro resolution steps.
#[derive(Debug)]
pub struct Resolver<'s> {
struct ResolverState<'s> {
blocks: Vec<Block>,
/// The lines of all currently-open blocks. This is partitioned by `blocks`.
lines: Vec<syntax::item::Line<'s>>,
groups: Vec<OpenGroup<'s>>,
/// All currently-open macros. These are partitioned into scopes by `blocks`.
macros: Vec<PartiallyMatchedMacro<'s>>,
/// Segments of all currently-open macros. These are partitioned by `macros`.
segments: Vec<MatchedSegment<'s>>,
/// Items of all segments of all currently-open macros. These are partitioned by `segments`.
items: Vec<syntax::Item<'s>>,
items: Vec<Item<'s>>,
context: Context,
precedence: syntax::operator::Precedence<'s>,
}
@ -139,46 +146,101 @@ pub struct Resolver<'s> {
// === Public API ===
impl<'s> Resolver<'s> {
impl<'s> ResolverState<'s> {
/// Create a new resolver, in statement context.
pub fn new_statement() -> Self {
fn new_statement() -> Self {
Self {
context: Context::Statement,
precedence: syntax::operator::Precedence::new(),
blocks: default(),
lines: default(),
lines: vec![initial_line()],
groups: default(),
macros: default(),
segments: default(),
items: default(),
}
}
}
/// Run the resolver. Returns the resolved AST.
pub fn run(
&mut self,
root_macro_map: &MacroMap,
tokens: impl IntoIterator<Item = Token<'s>>,
) -> syntax::Tree<'s> {
let start = crate::source::code::Location::default();
self.lines.push(syntax::item::Line {
newline: token::newline(Code::empty(start), Code::empty(start)),
items: default(),
});
tokens.into_iter().for_each(|t| self.push(root_macro_map, t));
fn initial_line<'s>() -> syntax::item::Line<'s> {
syntax::item::Line {
newline: token::newline(Code::empty(default()), Code::empty(default())),
items: default(),
}
}
impl<'s> Finish for ResolverState<'s> {
type Result = syntax::Tree<'s>;
fn finish(&mut self) -> Self::Result {
self.finish_current_line();
let lines = self.lines.drain(..).map(|syntax::item::Line { newline, items }| {
syntax::tree::block::Line { newline, expression: self.precedence.resolve(items) }
});
let tree = syntax::tree::block::body_from_lines(lines);
let tree = syntax::tree::block::parse_module(self.lines.drain(..), &mut self.precedence);
debug_assert!(self.blocks.is_empty());
debug_assert!(self.lines.is_empty());
debug_assert!(self.groups.is_empty());
debug_assert!(self.macros.is_empty());
debug_assert!(self.segments.is_empty());
debug_assert!(self.items.is_empty());
self.context = Context::Statement;
self.lines.push(initial_line());
tree
}
}
/// Resolves macros.
#[derive(Debug)]
pub struct Resolver<'s, 'macros> {
resolver: ResolverState<'s>,
root_macro_map: &'macros MacroMap,
}
impl<'s, 'macros> Resolver<'s, 'macros> {
/// Creates a macro resolver to use with the given macro map.
pub fn new(root_macro_map: &'macros MacroMap) -> Self {
Self { resolver: ResolverState::new_statement(), root_macro_map }
}
}
impl<'s, 'macros> TokenConsumer<'s> for Resolver<'s, 'macros> {
fn push_token(&mut self, token: Token<'s>) {
self.resolver.push(self.root_macro_map, token);
}
}
impl<'s, 'macros> NewlineConsumer<'s> for Resolver<'s, 'macros> {
fn push_newline(&mut self, newline: token::Newline<'s>) {
self.resolver.push_newline(newline);
}
}
impl<'s, 'macros> BlockHierarchyConsumer for Resolver<'s, 'macros> {
fn start_block(&mut self) {
self.resolver.start_block()
}
fn end_block(&mut self) {
self.resolver.end_block()
}
}
impl<'s, 'macros> GroupHierarchyConsumer<'s> for Resolver<'s, 'macros> {
fn start_group(&mut self, open: token::OpenSymbol<'s>) {
self.resolver.start_group(open);
}
fn end_group(&mut self, close: token::CloseSymbol<'s>) {
self.resolver.close_group(close);
}
}
impl<'s, 'macros> Finish for Resolver<'s, 'macros> {
type Result = syntax::Tree<'s>;
fn finish(&mut self) -> Self::Result {
self.resolver.finish()
}
}
// === Implementation ===
@ -186,8 +248,8 @@ impl<'s> Resolver<'s> {
#[derive(Clone, Debug)]
enum Step<'s> {
StartSegment(Token<'s>),
NormalToken(syntax::Item<'s>),
MacroStackPop(syntax::Item<'s>),
NormalToken(Item<'s>),
MacroStackPop(Item<'s>),
}
/// Information about macro resolution state that is stored while processing a deeper indentation
@ -204,16 +266,33 @@ struct Block {
items: usize,
}
impl<'s> Resolver<'s> {
#[derive(Debug)]
struct OpenGroup<'s> {
open: token::OpenSymbol<'s>,
/// Index in `macro_stack` after the last element in the enclosing scope.
macros_start: usize,
/// Index in `items` after the last element in the enclosing scope.
items: usize,
}
impl<'s> ResolverState<'s> {
/// Returns the index of the first element in `self.macro_stack` that is active in the current
/// scope. Any macros before that index are active in some block that contains the current
/// block, so they will not match tokens within this block.
fn macro_scope_start(&self) -> usize {
self.blocks.last().map(|scope| scope.macros_start).unwrap_or_default()
self.groups
.last()
.map(|scope| scope.macros_start)
.or_else(|| self.blocks.last().map(|scope| scope.macros_start))
.unwrap_or_default()
}
fn items_start(&self) -> usize {
self.blocks.last().map(|scope| scope.items).unwrap_or_default()
self.groups
.last()
.map(|scope| scope.items)
.or_else(|| self.blocks.last().map(|scope| scope.items))
.unwrap_or_default()
}
/// Pop the macro stack if the current token is reserved. For example, when matching the
@ -225,61 +304,86 @@ impl<'s> Resolver<'s> {
reserved.and_option_from(|| self.macros.pop())
}
fn start_block(&mut self) {
while let Some(group) = self.groups.pop() {
self.end_group(group, None);
}
let macros_start = self.macros.len();
let outputs_start = self.lines.len();
let items = self.items.len();
self.blocks.push(Block { macros_start, outputs_start, items });
self.context = Context::Statement;
}
fn end_block(&mut self) {
self.finish_current_line();
if let Some(Block { macros_start, outputs_start, items }) = self.blocks.pop() {
debug_assert_eq!(macros_start, self.macros.len());
debug_assert_eq!(items, self.items.len());
let block = self.lines.drain(outputs_start..).collect();
self.items.push(Item::Block(block));
}
}
fn start_group(&mut self, open: token::OpenSymbol<'s>) {
let macros_start = self.macros.len();
let items = self.items.len();
self.groups.push(OpenGroup { open, macros_start, items });
self.context = Context::Expression;
}
fn close_group(&mut self, close: token::CloseSymbol<'s>) {
match self.groups.pop() {
Some(group) => self.end_group(group, close.into()),
None => self.items.push(Item::Token(close.into())),
}
}
fn end_group(&mut self, group: OpenGroup<'s>, close: Option<token::CloseSymbol<'s>>) {
let OpenGroup { open, macros_start, items } = group;
while self.macros.len() > macros_start {
let mac = self.macros.pop().unwrap();
self.resolve(mac);
}
let body = self.items.drain(items..).collect();
self.items.push(syntax::item::Group { open, body, close }.into());
}
fn push_newline(&mut self, newline: token::Newline<'s>) {
self.finish_current_line();
self.lines.push(syntax::item::Line { newline, items: default() });
self.context = Context::Statement;
}
/// Append a token to the state.
fn push(&mut self, root_macro_map: &MacroMap, token: Token<'s>) {
match token.variant {
token::Variant::Newline(newline) => {
if !self.lines.is_empty() {
self.finish_current_line();
fn push(&mut self, root_macro_map: &MacroMap, mut token: Token<'s>) {
debug_assert!(!matches!(token.variant, token::Variant::Newline(_)));
loop {
token = match self.process_token(root_macro_map, token, self.context) {
Step::MacroStackPop(Item::Token(t)) => t,
Step::MacroStackPop(item) => {
self.items.push(item);
break;
}
let newline = token.with_variant(newline);
self.lines.push(syntax::item::Line { newline, items: default() });
self.context = Context::Statement;
}
token::Variant::BlockStart(_) => {
let macros_start = self.macros.len();
let outputs_start = self.lines.len();
let items = self.items.len();
let scope = Block { macros_start, outputs_start, items };
self.blocks.push(scope);
self.context = Context::Statement;
}
token::Variant::BlockEnd(_) => {
self.finish_current_line();
if let Some(Block { macros_start, outputs_start, items }) = self.blocks.pop() {
debug_assert_eq!(macros_start, self.macros.len());
debug_assert_eq!(items, self.items.len());
let block = self.lines.drain(outputs_start..).collect();
self.items.push(syntax::Item::Block(block));
Step::StartSegment(header) => {
let items_start = self.items.len();
self.segments.push(MatchedSegment { header, items_start });
self.context = Context::Expression;
break;
}
}
_ => {
let mut token = token;
loop {
token = match self.process_token(root_macro_map, token, self.context) {
Step::MacroStackPop(syntax::Item::Token(t)) => t,
Step::MacroStackPop(item) => {
self.items.push(item);
break;
}
Step::StartSegment(header) => {
let items_start = self.items.len();
self.segments.push(MatchedSegment { header, items_start });
self.context = Context::Expression;
break;
}
Step::NormalToken(item) => {
self.items.push(item);
self.context = Context::Expression;
break;
}
}
Step::NormalToken(item) => {
self.items.push(item);
self.context = Context::Expression;
break;
}
}
}
}
fn finish_current_line(&mut self) {
while let Some(group) = self.groups.pop() {
self.end_group(group, None);
}
let macros_start = self.macro_scope_start();
let items_start = self.items_start();
while self.macros.len() > macros_start {
@ -365,8 +469,7 @@ impl<'s> Resolver<'s> {
});
let out = if all_tokens_consumed {
let unwrap_match = |(header, match_result)| {
let match_result: Result<pattern::MatchResult, VecDeque<syntax::item::Item>> =
match_result;
let match_result: Result<pattern::MatchResult, VecDeque<Item>> = match_result;
pattern::MatchedSegment::new(header, match_result.unwrap().matched)
};
let parser = &mut self.precedence;

View File

@ -8,14 +8,19 @@
pub mod item;
pub mod operator;
pub mod statement;
pub mod token;
pub mod tree;
mod consumer;
mod treebuilding;
pub use consumer::*;
pub use item::Item;
pub use token::Token;
pub use tree::maybe_with_error;
pub use tree::Tree;
pub use tree::WARNINGS;
pub use treebuilding::TokenOrTree;

View File

@ -0,0 +1,234 @@
use crate::prelude::*;
use crate::syntax::token;
use crate::syntax::treebuilding::Spacing;
use crate::syntax::treebuilding::SpacingLookaheadTokenConsumer;
use crate::syntax::treebuilding::SpacingLookaheadTreeConsumer;
use crate::syntax::Item;
use crate::syntax::Token;
use crate::syntax::Tree;
/// Item consumer.
pub trait ItemConsumer<'s> {
/// Push an item.
fn push_item(&mut self, tree: Item<'s>);
}
/// Tree consumer.
pub trait TreeConsumer<'s> {
/// Push a tree.
fn push_tree(&mut self, tree: Tree<'s>);
}
/// Token consumer.
pub trait TokenConsumer<'s> {
/// Push a token.
fn push_token(&mut self, token: Token<'s>);
}
/// Newline consumer.
pub trait NewlineConsumer<'s> {
/// Push a newline.
fn push_newline(&mut self, newline: token::Newline<'s>);
}
/// Block hierarchy consumer.
pub trait BlockHierarchyConsumer {
/// Start a block.
fn start_block(&mut self);
/// End the block.
fn end_block(&mut self);
}
/// Parenthesized-group hierarchy consumer.
pub trait GroupHierarchyConsumer<'s> {
/// Start a parenthesized group.
fn start_group(&mut self, open: token::OpenSymbol<'s>);
/// End the parenthesized group.
fn end_group(&mut self, close: token::CloseSymbol<'s>);
}
/// Trait for a token consumer to enter a scope that will be handled independently.
pub trait ScopeHierarchyConsumer {
/// The result of the scope ending.
type Result;
/// Start a scope.
fn start_scope(&mut self);
/// End the scope.
fn end_scope(&mut self) -> Self::Result;
}
/// An operation that can be finished.
pub trait Finish {
/// The output.
type Result;
/// Indicates end of input.
fn finish(&mut self) -> Self::Result;
}
/// Trait for a type that wraps another type, and exposes it.
pub trait HasInner {
/// The inner type.
type Inner;
/// Access the inner type.
fn inner_mut(&mut self) -> &mut Self::Inner;
}
/// Process all retained state.
pub trait Flush {
/// Process all retained state.
fn flush(&mut self);
}
// ================
// === Adapters ===
// ================
/// Adapts a parser that consumes only tokens to fit into a more complex pipeline stages.
#[derive(Debug, Default)]
pub struct TokenOnlyParser<Parser> {
parser: Parser,
}
impl<'s, Inner, Parser> SpacingLookaheadTokenConsumer<'s> for TokenOnlyParser<Parser>
where
Parser: HasInner<Inner = Inner> + SpacingLookaheadTokenConsumer<'s>,
Inner: SpacingLookaheadTokenConsumer<'s>,
{
fn push_token(&mut self, token: Token<'s>, following_spacing: Option<Spacing>) {
self.parser.push_token(token, following_spacing);
}
}
impl<'s, Parser, Inner> SpacingLookaheadTreeConsumer<'s> for TokenOnlyParser<Parser>
where
Parser: HasInner<Inner = Inner> + Flush,
Inner: SpacingLookaheadTreeConsumer<'s>,
{
fn push_tree(&mut self, tree: Tree<'s>, following_spacing: Option<Spacing>) {
self.parser.flush();
self.parser.inner_mut().push_tree(tree, following_spacing)
}
}
impl<'s, Parser, Inner> GroupHierarchyConsumer<'s> for TokenOnlyParser<Parser>
where
Parser: HasInner<Inner = Inner> + Flush,
Inner: GroupHierarchyConsumer<'s>,
{
fn start_group(&mut self, open: token::OpenSymbol<'s>) {
self.parser.flush();
self.parser.inner_mut().start_group(open)
}
fn end_group(&mut self, close: token::CloseSymbol<'s>) {
self.parser.flush();
self.parser.inner_mut().end_group(close)
}
}
impl<Inner: Finish> Finish for TokenOnlyParser<Inner>
where Inner: Finish
{
type Result = Inner::Result;
fn finish(&mut self) -> Self::Result {
self.parser.finish()
}
}
// =================
// === Debugging ===
// =================
/// Debugging tool. Can be inserted into parsing pipeline at different stages to debug components.
#[derive(Debug, Default)]
pub struct Inspect<Inner>(pub(crate) Inner);
impl<Inner> Inspect<Inner> {
pub(crate) fn observe(&self, event: &impl Debug) {
eprintln!("-> {:?}", event);
}
pub(crate) fn observe_received(&self, event: &impl Debug) {
eprintln!("<- {:?}", event);
}
pub(crate) fn observe_token<'a: 'b, 'b, T: Into<token::Ref<'a, 'b>>>(&self, token: T) {
eprintln!("-> Token({})", token.into().code.repr.0);
}
}
impl<T: Debug, Inner: ScopeHierarchyConsumer<Result = T>> ScopeHierarchyConsumer
for Inspect<Inner>
{
type Result = Inner::Result;
fn start_scope(&mut self) {
self.observe(&"StartScope");
self.0.start_scope();
}
fn end_scope(&mut self) -> Self::Result {
self.observe(&"EndScope");
let result = self.0.end_scope();
self.observe_received(&result);
result
}
}
impl<'s, Inner: GroupHierarchyConsumer<'s>> GroupHierarchyConsumer<'s> for Inspect<Inner> {
fn start_group(&mut self, open: token::OpenSymbol<'s>) {
self.observe_token(&open);
self.0.start_group(open);
}
fn end_group(&mut self, close: token::CloseSymbol<'s>) {
self.observe_token(&close);
self.0.end_group(close);
}
}
impl<'s, Inner: SpacingLookaheadTokenConsumer<'s>> SpacingLookaheadTokenConsumer<'s>
for Inspect<Inner>
{
fn push_token(&mut self, token: Token<'s>, following_spacing: Option<Spacing>) {
self.observe_token(&token);
self.0.push_token(token, following_spacing);
}
}
impl<'s, Inner: SpacingLookaheadTreeConsumer<'s>> SpacingLookaheadTreeConsumer<'s>
for Inspect<Inner>
{
fn push_tree(&mut self, tree: Tree<'s>, following_spacing: Option<Spacing>) {
self.observe(&tree);
self.0.push_tree(tree, following_spacing);
}
}
impl<'s, Inner: ItemConsumer<'s>> ItemConsumer<'s> for Inspect<Inner> {
fn push_item(&mut self, item: Item<'s>) {
match &item {
Item::Token(token) => self.observe_token(token),
_ => self.observe(&item),
}
self.0.push_item(item);
}
}
impl<T: Debug, Inner: Finish<Result = T>> Finish for Inspect<Inner> {
type Result = Inner::Result;
fn finish(&mut self) -> Self::Result {
self.observe(&"Finish");
let result = self.0.finish();
self.observe_received(&result);
result
}
}

View File

@ -18,8 +18,20 @@ use crate::syntax::*;
#[allow(missing_docs)]
pub enum Item<'s> {
Token(Token<'s>),
Block(Vec<Line<'s>>),
Block(Box<[Line<'s>]>),
Tree(Tree<'s>),
Group(Group<'s>),
}
/// A parenthesized subtree.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Group<'s> {
/// The opening parenthesis.
pub open: token::OpenSymbol<'s>,
/// The parenthesized subtree.
pub body: Box<[Item<'s>]>,
/// The closing parenthesis.
pub close: Option<token::CloseSymbol<'s>>,
}
/// A line.
@ -46,6 +58,7 @@ impl<'s> Item<'s> {
match self {
Self::Token(t) => t.span().left_offset.visible,
Self::Tree(t) => t.span.left_offset.visible,
Self::Group(t) => t.open.left_offset.visible,
Self::Block(_) => default(),
}
}
@ -73,18 +86,16 @@ impl<'s> From<Tree<'s>> for Item<'s> {
}
}
/// Given a sequence of [`Line`]s belonging to one block, create an AST block node, of a type
/// determined by the syntax of the lines in the block.
pub fn build_block<'s>(
lines: impl IntoIterator<Item = Line<'s>>,
parser: &mut operator::Precedence<'s>,
) -> Tree<'s> {
let mut block_builder = tree::block::Builder::new();
for Line { newline, items } in lines {
block_builder.push(newline, items, parser);
impl<'s> From<Group<'s>> for Item<'s> {
fn from(group: Group<'s>) -> Self {
Item::Group(group)
}
}
impl<'s> AsRef<Item<'s>> for Item<'s> {
fn as_ref(&self) -> &Self {
self
}
block_builder.build()
}

View File

@ -2,15 +2,16 @@
mod annotations;
mod application;
mod apply;
mod arity;
mod operand;
mod group;
mod named_app;
mod precedence_resolver;
mod reducer;
mod section;
mod types;
// ===============
// === Exports ===
// ===============

View File

@ -0,0 +1,134 @@
use crate::lexer::test::Precedence;
use crate::prelude::*;
use crate::syntax::operator::reducer::ApplyToOperand;
use crate::syntax::operator::section::MaybeSection;
use crate::syntax::operator::types::Arity;
use crate::syntax::operator::types::ModifiedPrecedence;
use crate::syntax::operator::types::Operator;
use crate::syntax::operator::types::OperatorConsumer;
use crate::syntax::token::AnnotationOperator;
use crate::syntax::token::Associativity;
use crate::syntax::token::Ident;
use crate::syntax::token::Variant;
use crate::syntax::tree;
use crate::syntax::tree::SyntaxError;
use crate::syntax::treebuilding::Spacing;
use crate::syntax::treebuilding::SpacingLookaheadTokenConsumer;
use crate::syntax::treebuilding::SpacingLookaheadTreeConsumer;
use crate::syntax::Finish;
use crate::syntax::Flush;
use crate::syntax::HasInner;
use crate::syntax::Token;
use crate::syntax::TokenOnlyParser;
use crate::syntax::Tree;
// ===================
// === Annotations ===
// ===================
pub type ParseAnnotations<'s, Inner> = TokenOnlyParser<AnnotationParser<'s, Inner>>;
#[derive(Debug, Default)]
pub struct AnnotationParser<'s, Inner> {
operator: Option<AnnotationOperator<'s>>,
inner: Inner,
}
#[derive(Debug)]
pub struct Annotation<'s> {
operator: AnnotationOperator<'s>,
ident: Ident<'s>,
}
impl<'s> Annotation<'s> {
fn apply(self, operand: Option<MaybeSection<Tree<'s>>>) -> Tree<'s> {
let Self { operator, ident } = self;
let operand = operand.map(Tree::from);
if ident.is_type {
Tree::annotated_builtin(operator, ident, default(), operand)
} else {
Tree::annotated(operator, ident, operand, default(), default())
}
}
}
impl<'s> ApplyToOperand<'s> for Annotation<'s> {
fn apply_to_operand(self, operand: Option<MaybeSection<Tree<'s>>>) -> MaybeSection<Tree<'s>> {
self.apply(operand).into()
}
}
impl<'s> Annotation<'s> {
pub fn spacing(&self) -> Spacing {
Spacing::of_token(&self.operator)
}
}
impl<'s, Inner> SpacingLookaheadTokenConsumer<'s> for AnnotationParser<'s, Inner>
where Inner:
SpacingLookaheadTokenConsumer<'s> + SpacingLookaheadTreeConsumer<'s> + OperatorConsumer<'s>
{
fn push_token(&mut self, token: Token<'s>, following_spacing: Option<Spacing>) {
match (self.operator.as_mut(), token.variant) {
(None, Variant::AnnotationOperator(variant))
if following_spacing == Some(Spacing::Unspaced) =>
self.operator = token.with_variant(variant).into(),
(Some(_), Variant::Ident(variant)) => {
let operator = self.operator.take().unwrap();
let ident = token.with_variant(variant);
let annotation = Annotation { operator, ident };
if following_spacing.is_some() {
self.inner.push_operator(Operator {
left_precedence: None,
right_precedence: ModifiedPrecedence::new(
following_spacing.unwrap_or_default(),
Precedence::annotation(),
false,
),
associativity: Associativity::Left,
arity: Arity::Annotation(annotation),
});
} else {
self.inner.push_tree(annotation.apply(None), following_spacing);
}
}
_ => {
self.flush();
self.inner.push_token(token, following_spacing);
}
}
}
}
impl<'s, Inner> Flush for AnnotationParser<'s, Inner>
where Inner: SpacingLookaheadTreeConsumer<'s>
{
fn flush(&mut self) {
if let Some(operator) = self.operator.take() {
let tree = tree::to_ast(operator.into())
.with_error(SyntaxError::AnnotationOpMustBeAppliedToIdent);
self.inner.push_tree(tree, Some(Spacing::Unspaced));
}
}
}
impl<'s, Inner> Finish for AnnotationParser<'s, Inner>
where Inner: Finish + SpacingLookaheadTreeConsumer<'s>
{
type Result = Inner::Result;
fn finish(&mut self) -> Self::Result {
self.flush();
self.inner.finish()
}
}
impl<'s, Inner> HasInner for AnnotationParser<'s, Inner> {
type Inner = Inner;
fn inner_mut(&mut self) -> &mut Self::Inner {
&mut self.inner
}
}

View File

@ -1,10 +1,12 @@
use crate::syntax::operator::types::*;
use enso_prelude::*;
use crate::syntax::operator::operand::Operand;
use crate::syntax::operator::named_app::NamedApp;
use crate::syntax::token;
use crate::syntax::treebuilding::Finish;
use crate::syntax::treebuilding::Spacing;
use crate::syntax::Finish;
use crate::syntax::GroupHierarchyConsumer;
use crate::syntax::ScopeHierarchyConsumer;
use crate::syntax::Tree;
@ -17,17 +19,61 @@ use crate::syntax::Tree;
#[derive(Default, Debug)]
pub struct InsertApps<Inner> {
prev_applicable: bool,
stack: Vec<bool>,
inner: Inner,
}
impl<'s, Inner: OperatorConsumer<'s> + OperandConsumer<'s>> OperandConsumer<'s>
for InsertApps<Inner>
impl<'s, Inner> NamedOperandConsumer<'s> for InsertApps<Inner>
where Inner: OperatorConsumer<'s> + OperandConsumer<'s>
{
fn push_operand(&mut self, operand: Operand<Tree<'s>>) {
if mem::replace(&mut self.prev_applicable, true) {
self.inner.push_operator(application(Spacing::of_tree(&operand.value)));
fn push_maybe_named_operand(&mut self, operand: OperandMaybeNamed<'s>) {
match operand {
OperandMaybeNamed::Unnamed(operand) => {
if mem::replace(&mut self.prev_applicable, true) {
self.inner.push_operator(application(Spacing::of_tree(&operand.value)));
}
self.inner.push_operand(operand)
}
OperandMaybeNamed::Named { parens, name, equals, expression } => {
if mem::replace(&mut self.prev_applicable, true) {
let spacing = if let Some((open, _)) = &parens {
Spacing::of_token(open)
} else {
Spacing::of_token(&name)
};
let precedence =
ModifiedPrecedence::new(spacing, token::Precedence::application(), false);
let right_precedence = ModifiedPrecedence::new(
// Named applications always have unspaced right-precedence; if it reads
// from left to right as a named application, a following operator can't
// cause the interpretation to change.
Spacing::Unspaced,
token::Precedence::application(),
false,
);
let operator = Operator {
left_precedence: Some(precedence),
right_precedence,
associativity: token::Associativity::Left,
arity: Arity::NamedApp(
NamedApp { parens, name, equals, expression }.into(),
),
};
self.inner.push_operator(operator);
} else {
let mut tree = Tree::opr_app(
Tree::ident(name).into(),
Ok(equals.with_variant(token::variant::Operator())),
expression.into(),
);
if let Some((open, close)) = parens {
tree = Tree::group(Some(open), tree.into(), close);
}
// After removing support for old lambdas, we can make this an error.
self.inner.push_operand(tree.into())
}
}
}
self.inner.push_operand(operand)
}
}
@ -59,19 +105,45 @@ impl<Inner: Finish> Finish for InsertApps<Inner> {
}
fn application<'s>(spacing: Spacing) -> Operator<'s> {
let precedence = ModifiedPrecedence {
spacing,
precedence: token::Precedence::application(),
is_value_operation: false,
};
let precedence = ModifiedPrecedence::new(spacing, token::Precedence::application(), false);
Operator {
left_precedence: Some(precedence),
right_precedence: precedence,
associativity: token::Associativity::Left,
arity: Arity::Binary {
tokens: default(),
missing: None,
reify_rhs_section: true,
},
arity: Arity::App,
}
}
impl<'s, Inner> GroupHierarchyConsumer<'s> for InsertApps<Inner>
where Inner: OperatorConsumer<'s> + GroupHierarchyConsumer<'s>
{
fn start_group(&mut self, open: token::OpenSymbol<'s>) {
if mem::replace(&mut self.prev_applicable, false) {
self.inner.push_operator(application(Spacing::of_token(&open)));
}
self.inner.start_group(open);
}
fn end_group(&mut self, close: token::CloseSymbol<'s>) {
self.prev_applicable = true;
self.inner.end_group(close);
}
}
impl<'s, Inner> ScopeHierarchyConsumer for InsertApps<Inner>
where Inner: OperandConsumer<'s> + OperatorConsumer<'s> + ScopeHierarchyConsumer
{
type Result = Inner::Result;
fn start_scope(&mut self) {
let state = mem::replace(&mut self.prev_applicable, false);
self.stack.push(state);
self.inner.start_scope();
}
fn end_scope(&mut self) -> Self::Result {
let state = self.stack.pop().unwrap();
self.prev_applicable = state;
self.inner.end_scope()
}
}

View File

@ -2,8 +2,11 @@ use crate::prelude::*;
use crate::syntax::operator::types::*;
use crate::syntax;
use crate::syntax::operator::operand::Operand;
use crate::syntax::maybe_with_error;
use crate::syntax::operator::section::MaybeSection;
use crate::syntax::token;
use crate::syntax::token::TokenOperatorProperties;
use crate::syntax::Token;
use crate::syntax::Tree;
@ -17,15 +20,15 @@ use crate::syntax::Tree;
#[derive(Debug)]
pub struct ApplyOperator<'s> {
tokens: Vec<token::Operator<'s>>,
lhs: Option<Operand<Tree<'s>>>,
rhs: Option<Operand<Tree<'s>>>,
tokens: Vec<Token<'s>>,
lhs: Option<MaybeSection<Tree<'s>>>,
rhs: Option<MaybeSection<Tree<'s>>>,
reify_rhs_section: bool,
warnings: Option<Warnings>,
}
impl<'s> ApplyOperator<'s> {
pub fn tokens(tokens: Vec<token::Operator<'s>>) -> Self {
pub fn tokens(tokens: Vec<Token<'s>>) -> Self {
Self {
tokens,
lhs: default(),
@ -35,15 +38,15 @@ impl<'s> ApplyOperator<'s> {
}
}
pub fn token(token: token::Operator<'s>) -> Self {
pub fn token(token: Token<'s>) -> Self {
Self::tokens(vec![token])
}
pub fn with_lhs(self, lhs: Option<Operand<Tree<'s>>>) -> Self {
pub fn with_lhs(self, lhs: Option<MaybeSection<Tree<'s>>>) -> Self {
Self { lhs, ..self }
}
pub fn with_rhs(self, rhs: Option<Operand<Tree<'s>>>, reify_rhs_section: bool) -> Self {
pub fn with_rhs(self, rhs: Option<MaybeSection<Tree<'s>>>, reify_rhs_section: bool) -> Self {
Self { rhs, reify_rhs_section, ..self }
}
@ -51,10 +54,11 @@ impl<'s> ApplyOperator<'s> {
Self { warnings: Some(warnings), ..self }
}
pub fn finish(self) -> Operand<Tree<'s>> {
pub fn finish(self) -> MaybeSection<Tree<'s>> {
let Self { tokens, lhs, rhs: rhs_, reify_rhs_section, warnings } = self;
let mut operand = if let Some(lhs_termination) =
tokens.first().and_then(|token| token.properties.lhs_section_termination())
let mut operand = if let Some(lhs_termination) = tokens
.first()
.and_then(|token| token.operator_properties().unwrap().lhs_section_termination())
{
let lhs = match lhs_termination {
SectionTermination::Reify => lhs.map(Tree::from),
@ -62,10 +66,10 @@ impl<'s> ApplyOperator<'s> {
};
let rhs = rhs_.map(Tree::from);
let ast = syntax::tree::apply_operator(lhs, tokens, rhs);
Operand::from(ast)
MaybeSection::from(ast)
} else if tokens.len() < 2
&& let Some(opr) = tokens.first()
&& opr.properties.can_form_section()
&& !opr.is_syntactic_binary_operator()
{
let mut rhs = None;
let mut elided = 0;
@ -81,18 +85,18 @@ impl<'s> ApplyOperator<'s> {
}
elided += lhs.is_none() as u32 + rhs.is_none() as u32;
let mut operand =
Operand::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs));
MaybeSection::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs));
operand.elided += elided;
operand.wildcards += wildcards;
operand
} else {
let rhs = rhs_.map(Tree::from);
let mut elided = 0;
if tokens.len() != 1 || tokens[0].properties.can_form_section() {
if tokens.len() != 1 || !tokens[0].is_syntactic_binary_operator() {
elided += lhs.is_none() as u32 + rhs.is_none() as u32;
}
let mut operand =
Operand::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs));
MaybeSection::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs));
operand.elided += elided;
operand
};
@ -108,18 +112,18 @@ impl<'s> ApplyOperator<'s> {
#[derive(Debug)]
pub struct ApplyUnaryOperator<'s> {
token: token::Operator<'s>,
rhs: Option<Operand<Tree<'s>>>,
token: token::UnaryOperator<'s>,
rhs: Option<MaybeSection<Tree<'s>>>,
error: Option<Cow<'static, str>>,
warnings: Option<Warnings>,
}
impl<'s> ApplyUnaryOperator<'s> {
pub fn token(token: token::Operator<'s>) -> Self {
pub fn token(token: token::UnaryOperator<'s>) -> Self {
Self { token, rhs: default(), error: default(), warnings: default() }
}
pub fn with_rhs(self, rhs: Option<Operand<Tree<'s>>>) -> Self {
pub fn with_rhs(self, rhs: Option<MaybeSection<Tree<'s>>>) -> Self {
Self { rhs, ..self }
}
@ -131,17 +135,19 @@ impl<'s> ApplyUnaryOperator<'s> {
Self { warnings: Some(warnings), ..self }
}
pub fn finish(self) -> Operand<Tree<'s>> {
pub fn finish(self) -> MaybeSection<Tree<'s>> {
let Self { token, rhs, error, warnings } = self;
Operand::new(rhs).map(|rhs| {
let mut tree = syntax::tree::apply_unary_operator(token, rhs);
MaybeSection::new(rhs).map(|rhs| {
let mut tree = match rhs {
Some(rhs) => Tree::unary_opr_app(token, Some(rhs)),
None =>
Tree::opr_app(None, Ok(token.with_variant(token::variant::Operator())), None)
.with_error("Operator must be applied to an operand."),
};
if let Some(warnings) = warnings {
warnings.apply(&mut tree);
}
match error {
None => tree,
Some(error) => tree.with_error(error),
}
maybe_with_error(tree, error)
})
}
}

View File

@ -2,15 +2,16 @@ use crate::syntax::operator::apply::*;
use crate::syntax::operator::types::*;
use enso_prelude::*;
use crate::syntax::operator::operand::Operand;
use crate::syntax::token;
use crate::syntax::token::OperatorProperties;
use crate::syntax::token::TokenOperatorProperties;
use crate::syntax::tree;
use crate::syntax::treebuilding::Finish;
use crate::syntax::treebuilding::Spacing;
use crate::syntax::treebuilding::SpacingLookaheadTokenConsumer;
use crate::syntax::treebuilding::TreeConsumer;
use crate::syntax::Finish;
use crate::syntax::GroupHierarchyConsumer;
use crate::syntax::ScopeHierarchyConsumer;
use crate::syntax::Token;
use crate::syntax::Tree;
@ -23,65 +24,64 @@ use crate::syntax::Tree;
pub struct ClassifyArity<'s, Inner> {
/// Next item that will be emitted. If it is an operator, it may still be extended with
/// additional operators to become a multiple-operator error.
lhs_item: Option<OperatorOrOperand<'s>>,
lhs_item: Option<MaybeOperator<'s>>,
inner: Inner,
}
impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> SpacingLookaheadTokenConsumer<'s>
for ClassifyArity<'s, Inner>
impl<'s, Inner> SpacingLookaheadTokenConsumer<'s> for ClassifyArity<'s, Inner>
where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s>
{
fn push_token(&mut self, tt: Token<'s>, rhs: Option<Spacing>) {
match tt {
Token { variant: token::Variant::Operator(opr), left_offset, code } =>
self.operator(Token(left_offset, code, opr), rhs),
token => self.push_tree(tree::to_ast(token)),
fn push_token(&mut self, token: Token<'s>, rhs: Option<Spacing>) {
let properties = token.operator_properties();
match properties {
Some(properties) => self.operator(token, properties, rhs),
None => self.push_operand(tree::to_ast(token).into()),
}
}
}
impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> TreeConsumer<'s>
for ClassifyArity<'s, Inner>
impl<'s, Inner> NamedOperandConsumer<'s> for ClassifyArity<'s, Inner>
where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s>
{
fn push_tree(&mut self, tree: Tree<'s>) {
self.emit(Operand::from(tree))
fn push_maybe_named_operand(&mut self, operand: OperandMaybeNamed<'s>) {
self.emit(MaybeOperator::Operand);
self.inner.push_maybe_named_operand(operand);
}
}
impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s> + Finish> Finish
for ClassifyArity<'s, Inner>
impl<'s, Inner> Finish for ClassifyArity<'s, Inner>
where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s> + Finish
{
type Result = Inner::Result;
fn finish(&mut self) -> Self::Result {
self.step(None);
self.flush();
self.inner.finish()
}
}
impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, Inner> {
fn emit<T: Into<OperatorOrOperand<'s>>>(&mut self, item: T) {
impl<'s, Inner> ClassifyArity<'s, Inner>
where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s>
{
fn emit<T: Into<MaybeOperator<'s>>>(&mut self, item: T) {
self.step(Some(item.into()));
}
fn step(&mut self, item: Option<OperatorOrOperand<'s>>) {
match mem::replace(&mut self.lhs_item, item) {
Some(OperatorOrOperand::Operand(item)) => self.inner.push_operand(item),
Some(OperatorOrOperand::Operator(item)) => self.inner.push_operator(item),
None => (),
fn flush(&mut self) {
self.step(None);
}
fn step(&mut self, item: Option<MaybeOperator<'s>>) {
if let Some(MaybeOperator::Operator(item)) = mem::replace(&mut self.lhs_item, item) {
self.inner.push_operator(item)
}
}
fn operator(&mut self, token: token::Operator<'s>, rhs: Option<Spacing>) {
let properties = &token.variant.properties;
let lhs = match self.lhs_item {
Some(
OperatorOrOperand::Operand(_)
| OperatorOrOperand::Operator(Operator {
arity: Arity::Binary { missing: Some(BinaryOperand::Right), .. },
..
}),
) => Some(Spacing::of_token(&token)),
_ => None,
fn operator(&mut self, token: Token<'s>, properties: OperatorProperties, rhs: Option<Spacing>) {
let lhs = if self.lhs_item.as_ref().is_some_and(|item| !item.expects_rhs()) {
Some(Spacing::of_token(&token))
} else {
None
};
// Asymmetric whitespace creates operator sections.
// Exception: If an operator cannot form sections, and its LHS is unspaced, a spaced RHS is
@ -99,7 +99,7 @@ impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, In
(_, Some(unary), None, Some(Spacing::Unspaced)) =>
self.unary_operator_applied(unary, assoc, token),
(Some(binary), _, _, _) => self.binary_operator(binary, assoc, token, lhs, rhs),
(_, Some(_), _, _) => self.unary_operator_section(token, rhs),
(_, Some(_), _, _) => self.unary_operator_section(token),
(None, None, _, _) => unreachable!(),
}
}
@ -108,87 +108,74 @@ impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, In
&mut self,
precedence: token::Precedence,
associativity: token::Associativity,
token: token::Operator<'s>,
token: Token<'s>,
) {
let error = match self.lhs_item {
Some(OperatorOrOperand::Operand(_))
if token.left_offset.visible.width_in_spaces == 0 =>
Some("Space required between term and unary-operator expression.".into()),
_ => None,
};
let is_value_operation = token.properties.is_value_operation();
let is_value_operation = token.operator_properties().unwrap().is_value_operation();
self.emit(Operator {
left_precedence: None,
right_precedence: ModifiedPrecedence {
spacing: Spacing::Unspaced,
right_precedence: ModifiedPrecedence::new(
Spacing::Unspaced,
precedence,
is_value_operation,
},
),
associativity,
arity: Arity::Unary { token, error },
arity: Arity::Unary(token.with_variant(token::variant::UnaryOperator())),
});
}
fn unary_operator_section(&mut self, token: token::Operator<'s>, rhs: Option<Spacing>) {
match &mut self.lhs_item {
Some(OperatorOrOperand::Operator(Operator {
arity: Arity::Binary { tokens, .. },
..
})) if !(tokens.first().unwrap().left_offset.visible.width_in_spaces == 0
&& token.left_offset.visible.width_in_spaces == 0) =>
self.multiple_operator_error(token, rhs),
_ => self.emit(ApplyUnaryOperator::token(token).finish()),
}
fn unary_operator_section(&mut self, token: Token<'s>) {
self.emit(MaybeOperator::Operand);
self.inner.push_maybe_named_operand(OperandMaybeNamed::Unnamed(
ApplyUnaryOperator::token(token.with_variant(token::variant::UnaryOperator())).finish(),
));
}
fn binary_operator(
&mut self,
precedence: token::Precedence,
associativity: token::Associativity,
token: token::Operator<'s>,
token: Token<'s>,
lhs: Option<Spacing>,
rhs: Option<Spacing>,
) {
if let Some(OperatorOrOperand::Operator(Operator {
arity: Arity::Binary { missing: None | Some(BinaryOperand::Left), .. },
..
})) = &self.lhs_item
&& !matches!(rhs, Some(Spacing::Unspaced))
if self.lhs_item.as_ref().is_some_and(|item| item.expects_rhs())
&& rhs != Some(Spacing::Unspaced)
{
self.multiple_operator_error(token, rhs);
return;
}
let missing = match (lhs, rhs) {
(None, None) => {
self.emit(ApplyOperator::token(token).finish());
self.inner.push_maybe_named_operand(OperandMaybeNamed::Unnamed(
ApplyOperator::token(token).finish(),
));
self.emit(MaybeOperator::Operand);
return;
}
(Some(_), None) => Some(BinaryOperand::Right),
(None, Some(_)) => Some(BinaryOperand::Left),
(Some(_), Some(_)) => None,
};
let reify_rhs_section = token.properties.can_form_section()
let properties = token.operator_properties().unwrap();
let reify_rhs_section = properties.can_form_section()
&& (lhs == Some(Spacing::Spaced) || rhs == Some(Spacing::Spaced));
let is_value_operation = missing.is_none() && token.properties.is_value_operation();
let is_value_operation = missing.is_none() && properties.is_value_operation();
self.emit(Operator {
left_precedence: lhs.map(|spacing| ModifiedPrecedence {
spacing,
left_precedence: lhs
.map(|spacing| ModifiedPrecedence::new(spacing, precedence, is_value_operation)),
right_precedence: ModifiedPrecedence::new(
rhs.or(lhs).unwrap(),
precedence,
is_value_operation,
}),
right_precedence: ModifiedPrecedence {
spacing: rhs.or(lhs).unwrap(),
precedence,
is_value_operation,
},
),
associativity,
arity: Arity::Binary { tokens: vec![token], missing, reify_rhs_section },
});
}
fn multiple_operator_error(&mut self, token: token::Operator<'s>, rhs: Option<Spacing>) {
fn multiple_operator_error(&mut self, token: Token<'s>, rhs: Option<Spacing>) {
match &mut self.lhs_item {
Some(OperatorOrOperand::Operator(Operator {
Some(MaybeOperator::Operator(Operator {
arity: Arity::Binary { tokens, missing, .. },
..
})) => {
@ -196,10 +183,13 @@ impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, In
if rhs.is_none() {
match missing {
None => *missing = Some(BinaryOperand::Right),
Some(BinaryOperand::Left) =>
self.lhs_item = Some(OperatorOrOperand::Operand(
Some(BinaryOperand::Left) => {
let operand = OperandMaybeNamed::Unnamed(
ApplyOperator::tokens(mem::take(tokens)).finish(),
)),
);
self.inner.push_maybe_named_operand(operand);
self.lhs_item = Some(MaybeOperator::Operand);
}
Some(BinaryOperand::Right) => unreachable!(),
}
}
@ -208,3 +198,65 @@ impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, In
}
}
}
impl<'s, Inner> ScopeHierarchyConsumer for ClassifyArity<'s, Inner>
where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s> + ScopeHierarchyConsumer
{
type Result = Inner::Result;
fn start_scope(&mut self) {
self.flush();
self.inner.start_scope()
}
fn end_scope(&mut self) -> Self::Result {
self.flush();
self.inner.end_scope()
}
}
impl<'s, Inner> GroupHierarchyConsumer<'s> for ClassifyArity<'s, Inner>
where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s> + GroupHierarchyConsumer<'s>
{
fn start_group(&mut self, open: token::OpenSymbol<'s>) {
self.flush();
self.inner.start_group(open);
}
fn end_group(&mut self, close: token::CloseSymbol<'s>) {
self.emit(MaybeOperator::Operand);
self.inner.end_group(close);
}
}
impl<'s, Inner> OperatorConsumer<'s> for ClassifyArity<'s, Inner>
where Inner: NamedOperandConsumer<'s> + OperatorConsumer<'s>
{
fn push_operator(&mut self, operator: Operator<'s>) {
self.emit(operator);
}
}
// === Operator or Operand
#[derive(Debug)]
enum MaybeOperator<'s> {
Operand,
Operator(Operator<'s>),
}
impl<'s> From<Operator<'s>> for MaybeOperator<'s> {
fn from(operator: Operator<'s>) -> Self {
MaybeOperator::Operator(operator)
}
}
impl<'s> MaybeOperator<'s> {
fn expects_rhs(&self) -> bool {
match self {
MaybeOperator::Operand => false,
MaybeOperator::Operator(op) => op.arity.expects_rhs(),
}
}
}

View File

@ -0,0 +1,87 @@
use crate::syntax::operator::types::*;
use enso_prelude::*;
use crate::syntax::operator::section::MaybeSection;
use crate::syntax::token::CloseSymbol;
use crate::syntax::token::OpenSymbol;
use crate::syntax::tree::SyntaxError;
use crate::syntax::Finish;
use crate::syntax::GroupHierarchyConsumer;
use crate::syntax::ScopeHierarchyConsumer;
use crate::syntax::Tree;
// =====================
// === Group Builder ===
// =====================
/// Constructs parenthesized groups.
#[derive(Default, Debug)]
pub struct BuildGroups<'s, Inner> {
open: Vec<OpenSymbol<'s>>,
inner: Inner,
}
impl<'s, Inner: OperandConsumer<'s>> OperandConsumer<'s> for BuildGroups<'s, Inner> {
fn push_operand(&mut self, operand: MaybeSection<Tree<'s>>) {
self.inner.push_operand(operand)
}
}
impl<'s, Inner: OperatorConsumer<'s>> OperatorConsumer<'s> for BuildGroups<'s, Inner> {
fn push_operator(&mut self, operator: Operator<'s>) {
self.inner.push_operator(operator)
}
}
impl<'s, ScopeResult, Inner> Finish for BuildGroups<'s, Inner>
where
ScopeResult: Into<Option<Tree<'s>>>,
Inner: Finish + ScopeHierarchyConsumer<Result = ScopeResult> + OperandConsumer<'s>,
{
type Result = <Inner as Finish>::Result;
fn finish(&mut self) -> Self::Result {
for open in self.open.drain(..).rev() {
let expression = self.inner.end_scope().into();
self.inner.push_operand(
Tree::group(Some(open), expression, None)
.with_error(SyntaxError::ExprUnclosedParen)
.into(),
);
}
self.inner.finish()
}
}
impl<'s, ScopeResult, Inner> GroupHierarchyConsumer<'s> for BuildGroups<'s, Inner>
where
ScopeResult: Into<Option<Tree<'s>>>,
Inner: ScopeHierarchyConsumer<Result = ScopeResult> + OperandConsumer<'s>,
{
fn start_group(&mut self, open: OpenSymbol<'s>) {
self.open.push(open);
self.inner.start_scope();
}
fn end_group(&mut self, close: CloseSymbol<'s>) {
let open = self.open.pop().unwrap();
let expression = self.inner.end_scope().into();
self.inner.push_operand(Tree::group(Some(open), expression, Some(close)).into());
}
}
impl<'s, Inner> ScopeHierarchyConsumer for BuildGroups<'s, Inner>
where Inner: ScopeHierarchyConsumer
{
type Result = Inner::Result;
fn start_scope(&mut self) {
self.inner.start_scope()
}
fn end_scope(&mut self) -> Self::Result {
self.inner.end_scope()
}
}

View File

@ -0,0 +1,287 @@
use crate::prelude::*;
use crate::syntax::operator::reducer::ApplyToOperand;
use crate::syntax::operator::section::MaybeSection;
use crate::syntax::operator::types::NamedOperandConsumer;
use crate::syntax::operator::types::OperandMaybeNamed;
use crate::syntax::operator::types::Operator;
use crate::syntax::operator::types::OperatorConsumer;
use crate::syntax::token;
use crate::syntax::treebuilding::Spacing;
use crate::syntax::treebuilding::SpacingLookaheadTokenConsumer;
use crate::syntax::treebuilding::SpacingLookaheadTreeConsumer;
use crate::syntax::Finish;
use crate::syntax::GroupHierarchyConsumer;
use crate::syntax::ScopeHierarchyConsumer;
use crate::syntax::Token;
use crate::syntax::Tree;
// ========================
// === Named-App Parser ===
// ========================
/// Parses named-application syntax.
#[derive(Default, Debug)]
pub struct ParseAppNames<'s, Inner> {
inner: Inner,
partial: Option<Partial<'s>>,
stack: Vec<AppName<'s>>,
}
#[derive(Debug)]
pub struct NamedApp<'s> {
pub parens: Option<(token::OpenSymbol<'s>, Option<token::CloseSymbol<'s>>)>,
pub name: token::Ident<'s>,
pub equals: token::AssignmentOperator<'s>,
pub expression: Tree<'s>,
}
impl<'s> ApplyToOperand<'s> for NamedApp<'s> {
fn apply_to_operand(self, operand: Option<MaybeSection<Tree<'s>>>) -> MaybeSection<Tree<'s>> {
let NamedApp { parens, name, equals, expression } = self;
let func = operand.unwrap();
let (open, close) = match parens {
None => (None, None),
Some((open, close)) => (Some(open), close),
};
func.map(|func| Tree::named_app(func, open, name, equals, expression, close))
}
}
#[derive(Debug)]
enum Partial<'s> {
ExpectingName { open: token::OpenSymbol<'s> },
ExpectingEquals { open: Option<token::OpenSymbol<'s>>, name: token::Ident<'s> },
}
#[derive(Debug, Default)]
struct AppName<'s> {
open: Option<token::OpenSymbol<'s>>,
name: token::Ident<'s>,
equals: token::AssignmentOperator<'s>,
spaceproof: bool,
inner_parens: u32,
}
impl<'s> AppName<'s> {
fn finish(
self,
expression: Option<Tree<'s>>,
close: &mut Option<token::CloseSymbol<'s>>,
) -> OperandMaybeNamed<'s> {
let Self { open, name, equals, inner_parens: _, spaceproof: _ } = self;
// An `OuterAppName` is only constructed when lookahead indicates there's a
// token after the `=`.
let expression = expression.unwrap();
OperandMaybeNamed::Named {
parens: open.map(|open| (open, close.take())),
name,
equals,
expression,
}
}
}
impl<'s, Inner> ParseAppNames<'s, Inner>
where Inner: NamedOperandConsumer<'s>
+ ScopeHierarchyConsumer<Result = Option<Tree<'s>>>
+ GroupHierarchyConsumer<'s>
+ SpacingLookaheadTokenConsumer<'s>
{
fn maybe_end_unspaced_expression(
&mut self,
following_spacing: Option<Spacing>,
is_syntactic_binary_operator: bool,
) {
if let Some(last) = self.stack.last_mut() {
if !last.spaceproof
&& last.inner_parens == 0
&& last.open.is_none()
&& following_spacing != Some(Spacing::Unspaced)
{
if is_syntactic_binary_operator {
last.spaceproof = true;
} else {
self.flush_complete(None);
}
}
}
}
fn flush_paren(&mut self, open: token::OpenSymbol<'s>) {
self.inner.start_group(open);
if let Some(last) = self.stack.last_mut() {
last.inner_parens += 1;
}
}
fn flush_paren_and_ident(
&mut self,
open: Option<token::OpenSymbol<'s>>,
name: token::Ident<'s>,
following_spacing: Option<Spacing>,
) {
if let Some(open) = open {
self.flush_paren(open);
}
self.inner.push_token(name.into(), following_spacing);
}
fn flush_partial(&mut self, following: impl FnOnce() -> Option<Spacing>) {
match self.partial.take() {
None => {}
Some(Partial::ExpectingName { open }) => self.flush_paren(open),
Some(Partial::ExpectingEquals { open, name }) =>
self.flush_paren_and_ident(open, name, following()),
};
}
fn flush_complete(&mut self, mut close: Option<token::CloseSymbol<'s>>) {
let expression = self.inner.end_scope();
let operand = self.stack.pop().unwrap().finish(expression, &mut close);
self.inner.push_maybe_named_operand(operand);
if let Some(close) = close {
self.inner.end_group(close);
}
}
}
impl<'s, Inner> SpacingLookaheadTokenConsumer<'s> for ParseAppNames<'s, Inner>
where Inner: SpacingLookaheadTokenConsumer<'s>
+ NamedOperandConsumer<'s>
+ ScopeHierarchyConsumer<Result = Option<Tree<'s>>>
+ GroupHierarchyConsumer<'s>
{
fn push_token(&mut self, token: Token<'s>, following_spacing: Option<Spacing>) {
self.partial = loop {
self.maybe_end_unspaced_expression(Some(Spacing::of_token(&token)), false);
break match (token.variant, (self.partial.take(), following_spacing)) {
(token::Variant::Ident(variant), (None, Some(Spacing::Unspaced)))
if !variant.is_type && token.is_spaced() =>
{
let name = token.with_variant(variant);
Some(Partial::ExpectingEquals { open: None, name })
}
(token::Variant::Ident(variant), (Some(Partial::ExpectingName { open }), _))
if !variant.is_type =>
{
let name = token.with_variant(variant);
Some(Partial::ExpectingEquals { open: Some(open), name })
}
(
token::Variant::AssignmentOperator(variant),
(Some(Partial::ExpectingEquals { open, name }), Some(Spacing::Unspaced))
| (Some(Partial::ExpectingEquals { open: open @ Some(_), name }), _),
) => {
let equals = token.with_variant(variant);
self.stack.push(AppName {
open,
name,
equals,
inner_parens: 0,
spaceproof: false,
});
self.inner.start_scope();
None
}
(_, (None, _)) => {
let is_syntactic_binary_operator = token.is_syntactic_binary_operator();
self.inner.push_token(token, following_spacing);
self.maybe_end_unspaced_expression(
following_spacing,
is_syntactic_binary_operator,
);
None
}
(_, (Some(Partial::ExpectingName { open }), _)) => {
self.flush_paren(open);
self.inner.push_token(token, following_spacing);
None
}
(_, (Some(Partial::ExpectingEquals { open, name }), _)) => {
self.flush_paren_and_ident(open, name, Spacing::of_token(&token).into());
continue;
}
};
}
}
}
impl<'s, Inner> SpacingLookaheadTreeConsumer<'s> for ParseAppNames<'s, Inner>
where Inner: SpacingLookaheadTokenConsumer<'s>
+ NamedOperandConsumer<'s>
+ ScopeHierarchyConsumer<Result = Option<Tree<'s>>>
+ GroupHierarchyConsumer<'s>
{
fn push_tree(&mut self, tree: Tree<'s>, following_spacing: Option<Spacing>) {
self.flush_partial(|| Spacing::of_tree(&tree).into());
self.maybe_end_unspaced_expression(Some(Spacing::of_tree(&tree)), false);
self.inner.push_maybe_named_operand(OperandMaybeNamed::Unnamed(MaybeSection::from(tree)));
self.maybe_end_unspaced_expression(following_spacing, false);
}
}
impl<'s, Inner> GroupHierarchyConsumer<'s> for ParseAppNames<'s, Inner>
where Inner: GroupHierarchyConsumer<'s>
+ SpacingLookaheadTokenConsumer<'s>
+ NamedOperandConsumer<'s>
+ ScopeHierarchyConsumer<Result = Option<Tree<'s>>>
{
fn start_group(&mut self, open: token::OpenSymbol<'s>) {
self.flush_partial(|| Spacing::of_token(&open).into());
self.partial = if open.is_spaced() {
Some(Partial::ExpectingName { open })
} else {
self.flush_paren(open);
None
}
}
fn end_group(&mut self, close: token::CloseSymbol<'s>) {
self.flush_partial(|| Spacing::of_token(&close).into());
if let Some(last) = self.stack.last_mut() {
if last.inner_parens > 0 {
self.inner.end_group(close);
last.inner_parens -= 1;
} else {
self.flush_complete(close.into());
}
} else {
self.inner.end_group(close);
}
}
}
impl<'s, Inner: Finish> Finish for ParseAppNames<'s, Inner>
where Inner: Finish
+ SpacingLookaheadTokenConsumer<'s>
+ NamedOperandConsumer<'s>
+ ScopeHierarchyConsumer<Result = Option<Tree<'s>>>
+ GroupHierarchyConsumer<'s>
{
type Result = <Inner as Finish>::Result;
fn finish(&mut self) -> Self::Result {
self.flush_partial(|| None);
while !self.stack.is_empty() {
self.flush_complete(None);
}
self.inner.finish()
}
}
impl<'s, Inner> OperatorConsumer<'s> for ParseAppNames<'s, Inner>
where Inner: OperatorConsumer<'s>
+ NamedOperandConsumer<'s>
+ ScopeHierarchyConsumer<Result = Option<Tree<'s>>>
+ GroupHierarchyConsumer<'s>
+ SpacingLookaheadTokenConsumer<'s>
{
fn push_operator(&mut self, operator: Operator<'s>) {
self.flush_partial(|| None);
self.maybe_end_unspaced_expression(Some(operator.spacing()), false);
self.inner.push_operator(operator);
}
}

View File

@ -1,12 +1,18 @@
use crate::prelude::*;
use crate::syntax;
use crate::syntax::operator::annotations::ParseAnnotations;
use crate::syntax::operator::application::InsertApps;
use crate::syntax::operator::arity::ClassifyArity;
use crate::syntax::operator::group::BuildGroups;
use crate::syntax::operator::named_app::ParseAppNames;
use crate::syntax::operator::reducer::Reduce;
use crate::syntax::treebuilding;
use crate::syntax::treebuilding::Finish;
use crate::syntax::treebuilding::ItemConsumer;
use crate::syntax::treebuilding::CompoundTokens;
use crate::syntax::treebuilding::FlattenBlockTrees;
use crate::syntax::treebuilding::ParseNumbers;
use crate::syntax::treebuilding::PeekSpacing;
use crate::syntax::Finish;
use crate::syntax::ItemConsumer;
use crate::syntax::Tree;
@ -15,23 +21,33 @@ use crate::syntax::Tree;
// === Precedence ===
// ==================
macro_rules! compose_types {
($ty:ident<'s>) => {
$ty<'s>
};
($ty:ident<'s, _>, $($tail:tt)*) => {
$ty<'s, compose_types!($($tail)*)>
};
($ty:ident<_>, $($tail:tt)*) => {
$ty<compose_types!($($tail)*)>
};
}
/// Operator precedence resolver.
#[derive(Debug, Default)]
pub struct Precedence<'s> {
#[rustfmt::skip]
resolver:
// Items -> Tokens/Trees
treebuilding::FlattenBlockTrees<'s,
// Tokens/Trees -> Tokens/Trees (proper tokens only)
treebuilding::AssembleCompoundTokens<'s,
// Tokens/Trees -> Tokens/Trees + Spacing-lookahead
treebuilding::PeekSpacing<'s,
// Tokens/Trees + Spacing-lookahead -> Operators/Operands
ClassifyArity<'s,
// Operators/Operands -> Operators/Operands (balanced)
InsertApps<
// Operators/Operands -> Tree
Reduce<'s>>>>>>,
resolver: compose_types![
FlattenBlockTrees<'s, _>, // Items -> Tokens/Trees/Groups
CompoundTokens<'s, _>,
ParseNumbers<'s, _>,
PeekSpacing<'s, _>, // Tokens/Trees/Groups -> Tokens/Trees/Groups + Spacing-lookahead
ParseAnnotations<'s, _>, // Tokens/Trees/Groups + S -> T/T/Operators/Groups + S
ParseAppNames<'s, _>,
ClassifyArity<'s, _>, // Tokens/Trees/Groups + Spacing-lookahead -> Oper*s/Groups
InsertApps<_>, // Operators/Operands/Groups -> Oper*s/Groups/Applications
BuildGroups<'s, _>, // Operators/Operands/Groups/Applications -> Oper*s/Applications
Reduce<'s> // Operators/Operands/Applications -> Tree
],
}
impl<'s> Precedence<'s> {

View File

@ -1,14 +1,14 @@
use crate::prelude::*;
use crate::syntax::operator::apply::*;
use crate::syntax::operator::types::*;
use crate::syntax::operator::operand::Operand;
use crate::syntax::operator::section::MaybeSection;
use crate::syntax::token;
use crate::syntax::treebuilding::Finish;
use crate::syntax::treebuilding::Spacing;
use crate::syntax::tree::apply;
use crate::syntax::Finish;
use crate::syntax::ScopeHierarchyConsumer;
use crate::syntax::Tree;
use enso_prelude::VecOps;
// ===============
@ -25,12 +25,13 @@ use enso_prelude::VecOps;
/// [^2](https://en.wikipedia.org/wiki/Shunting_yard_algorithm)
#[derive(Default, Debug)]
pub struct Reduce<'s> {
output: Vec<Operand<Tree<'s>>>,
output: Vec<MaybeSection<Tree<'s>>>,
operator_stack: Vec<StackOperator<'s>>,
scope_stack: Vec<(u32, u32)>,
}
impl<'s> OperandConsumer<'s> for Reduce<'s> {
fn push_operand(&mut self, operand: Operand<Tree<'s>>) {
fn push_operand(&mut self, operand: MaybeSection<Tree<'s>>) {
self.output.push(operand)
}
}
@ -53,14 +54,10 @@ impl<'s> OperatorConsumer<'s> for Reduce<'s> {
}
impl<'s> Finish for Reduce<'s> {
type Result = Option<Operand<Tree<'s>>>;
type Result = Option<MaybeSection<Tree<'s>>>;
fn finish(&mut self) -> Self::Result {
self.reduce(ModifiedPrecedence {
spacing: Spacing::Spaced,
precedence: token::Precedence::min(),
is_value_operation: false,
});
self.reduce(ModifiedPrecedence::min());
let out = self.output.pop();
debug_assert!(self.operator_stack.is_empty());
debug_assert_eq!(
@ -72,14 +69,37 @@ impl<'s> Finish for Reduce<'s> {
}
}
impl<'s> ScopeHierarchyConsumer for Reduce<'s> {
type Result = Option<Tree<'s>>;
fn start_scope(&mut self) {
let operators = self.operator_stack.len() as u32;
let operands = self.output.len() as u32;
self.scope_stack.push((operators, operands));
}
fn end_scope(&mut self) -> Self::Result {
let result = if self.output.len() > self.scope_start().1 {
self.reduce(ModifiedPrecedence::min());
self.output.pop().map(Tree::from)
} else {
None
};
self.scope_stack.pop();
result
}
}
impl<'s> Reduce<'s> {
/// Given a starting value, replace it with the result of successively applying to it all
/// operators in the `operator_stack` that have precedence greater than or equal to the
/// specified value, consuming LHS values from the `output` stack as needed.
fn reduce(&mut self, right_op_precedence: ModifiedPrecedence) -> Warnings {
let mut rhs = self.output.pop();
let mut operand = self.output.pop();
let mut right_op_warnings = Warnings::default();
while let Some(opr) = self.operator_stack.pop_if_mut(|opr| {
let scope_start = self.scope_start().0;
while self.operator_stack.len() > scope_start {
let opr = self.operator_stack.last_mut().unwrap();
let ModifiedPrecedenceComparisonResult { is_greater, inconsistent_spacing } = opr
.right_precedence
.compare(&right_op_precedence, opr.associativity == token::Associativity::Left);
@ -87,46 +107,82 @@ impl<'s> Reduce<'s> {
if is_greater { &mut right_op_warnings } else { &mut opr.warnings }
.set_inconsistent_spacing();
}
is_greater
}) {
if !is_greater {
break;
}
let opr = self.operator_stack.pop().unwrap();
let StackOperator { right_precedence: _, associativity: _, arity, warnings } = opr;
match arity {
Arity::Unary { token, error } => {
let rhs_ = rhs.take();
debug_assert_ne!(rhs_, None);
rhs = ApplyUnaryOperator::token(token)
.with_rhs(rhs_)
.with_error(error)
.with_warnings(warnings)
.finish()
.into();
}
Arity::Binary { tokens, missing, reify_rhs_section } => {
let operand = rhs.take();
debug_assert_ne!(operand, None);
let (lhs, rhs_) = match missing {
Some(BinaryOperand::Left) => (None, operand),
Some(BinaryOperand::Right) => (operand, None),
None => {
let lhs = self.output.pop();
debug_assert_ne!(lhs, None);
(lhs, operand)
}
};
rhs = ApplyOperator::tokens(tokens)
.with_lhs(lhs)
.with_rhs(rhs_, reify_rhs_section)
.with_warnings(warnings)
.finish()
.into();
}
};
operand = reduce_step(arity, operand.take(), &mut self.output).into();
if let Some(operand) = operand.as_mut() {
warnings.apply(&mut operand.value);
}
}
if let Some(rhs) = rhs {
if let Some(rhs) = operand {
self.output.push(rhs);
}
right_op_warnings
}
fn scope_start(&self) -> (usize, usize) {
let (operators, operands) = self.scope_stack.last().copied().unwrap_or_default();
(operators as usize, operands as usize)
}
}
pub trait ApplyToOperands<'s> {
fn apply_to_operands(
self,
operand: Option<MaybeSection<Tree<'s>>>,
additional_operands: &mut Vec<MaybeSection<Tree<'s>>>,
) -> MaybeSection<Tree<'s>>;
}
pub trait ApplyToOperand<'s> {
fn apply_to_operand(self, operand: Option<MaybeSection<Tree<'s>>>) -> MaybeSection<Tree<'s>>;
}
impl<'s, T: ApplyToOperand<'s>> ApplyToOperands<'s> for T {
fn apply_to_operands(
self,
operand: Option<MaybeSection<Tree<'s>>>,
_: &mut Vec<MaybeSection<Tree<'s>>>,
) -> MaybeSection<Tree<'s>> {
self.apply_to_operand(operand)
}
}
fn reduce_step<'s>(
arity: Arity<'s>,
operand: Option<MaybeSection<Tree<'s>>>,
additional_operands: &mut Vec<MaybeSection<Tree<'s>>>,
) -> MaybeSection<Tree<'s>> {
match arity {
Arity::Unary(token) => {
let rhs = operand;
debug_assert_ne!(rhs, None);
ApplyUnaryOperator::token(token).with_rhs(rhs).finish()
}
Arity::Binary { tokens, missing, reify_rhs_section } => {
let op1 = operand;
debug_assert_ne!(op1, None);
let (lhs, rhs) = match missing {
Some(BinaryOperand::Left) => (None, op1),
Some(BinaryOperand::Right) => (op1, None),
None => {
let lhs = additional_operands.pop();
debug_assert_ne!(lhs, None);
(lhs, op1)
}
};
ApplyOperator::tokens(tokens).with_lhs(lhs).with_rhs(rhs, reify_rhs_section).finish()
}
Arity::App => {
let (lhs, rhs) = (additional_operands.pop().unwrap(), operand);
lhs.map(|lhs| apply(lhs, rhs.unwrap().into()))
}
Arity::NamedApp(app) => app.apply_to_operand(operand),
Arity::Annotation(annotation) => annotation.apply_to_operand(operand),
}
}

View File

@ -1,17 +1,17 @@
use enso_prelude::*;
use crate::syntax::tree;
use crate::syntax::Tree;
use enso_prelude::default;
// ===============
// === Operand ===
// ===============
// ====================
// === MaybeSection ===
// ====================
/// Wraps a value, tracking the number of wildcards or elided operands within it.
#[derive(Default, Debug, PartialEq, Eq)]
pub struct Operand<T> {
pub struct MaybeSection<T> {
pub value: T,
/// Number of elided operands in the subtree, potentially forming an *operator section*.
pub elided: u32,
@ -20,23 +20,23 @@ pub struct Operand<T> {
}
/// Transpose. Note that an absent input will not be treated as an elided value; for that
/// conversion, use [`Operand::new`].
impl<T> From<Option<Operand<T>>> for Operand<Option<T>> {
fn from(operand: Option<Operand<T>>) -> Self {
/// conversion, use [`MaybeSection::new`].
impl<T> From<Option<MaybeSection<T>>> for MaybeSection<Option<T>> {
fn from(operand: Option<MaybeSection<T>>) -> Self {
match operand {
Some(Operand { value, elided, wildcards }) =>
Some(MaybeSection { value, elided, wildcards }) =>
Self { value: Some(value), elided, wildcards },
None => default(),
}
}
}
/// Unit. Creates an Operand from a node.
impl<'s> From<Tree<'s>> for Operand<Tree<'s>> {
/// Unit. Creates a MaybeSection from a node.
impl<'s> From<Tree<'s>> for MaybeSection<Tree<'s>> {
fn from(mut value: Tree<'s>) -> Self {
let elided = 0;
let wildcards = if let Tree {
variant: box tree::Variant::Wildcard(tree::Wildcard { de_bruijn_index, .. }),
variant: tree::Variant::Wildcard(box tree::Wildcard { de_bruijn_index, .. }),
..
} = &mut value
{
@ -51,9 +51,9 @@ impl<'s> From<Tree<'s>> for Operand<Tree<'s>> {
}
/// Counit. Bakes any information about elided operands into the tree.
impl<'s> From<Operand<Tree<'s>>> for Tree<'s> {
fn from(operand: Operand<Tree<'s>>) -> Self {
let Operand { mut value, elided, wildcards } = operand;
impl<'s> From<MaybeSection<Tree<'s>>> for Tree<'s> {
fn from(operand: MaybeSection<Tree<'s>>) -> Self {
let MaybeSection { mut value, elided, wildcards } = operand;
if elided != 0 {
value = Tree::opr_section_boundary(elided, value);
}
@ -64,24 +64,24 @@ impl<'s> From<Operand<Tree<'s>>> for Tree<'s> {
}
}
impl<T> Operand<Option<T>> {
impl<T> MaybeSection<Option<T>> {
/// Lift an option value to a potentially-elided operand.
pub fn new(value: Option<Operand<T>>) -> Self {
pub fn new(value: Option<MaybeSection<T>>) -> Self {
match value {
None => Self { value: None, elided: 1, wildcards: default() },
Some(value) => {
let Operand { value, elided, wildcards } = value;
let MaybeSection { value, elided, wildcards } = value;
Self { value: Some(value), elided, wildcards }
}
}
}
}
impl<T> Operand<T> {
impl<T> MaybeSection<T> {
/// Operate on the contained value without altering the elided-operand information.
pub fn map<U>(self, f: impl FnOnce(T) -> U) -> Operand<U> {
pub fn map<U>(self, f: impl FnOnce(T) -> U) -> MaybeSection<U> {
let Self { value, elided, wildcards } = self;
let value = f(value);
Operand { value, elided, wildcards }
MaybeSection { value, elided, wildcards }
}
}

View File

@ -1,12 +1,15 @@
use crate::syntax::operator::operand::Operand;
use crate::syntax::operator::section::MaybeSection;
use crate::syntax::token;
use crate::syntax::tree;
use crate::syntax::treebuilding::Spacing;
use crate::syntax::Inspect;
use crate::syntax::Token;
use crate::syntax::Tree;
use crate::syntax::TreeConsumer;
use std::borrow::Cow;
use std::cmp::Ordering;
use crate::syntax::operator::annotations::Annotation;
use crate::syntax::operator::named_app::NamedApp;
use std::fmt::Debug;
// ================
@ -22,26 +25,40 @@ pub struct Operator<'s> {
pub arity: Arity<'s>,
}
impl<'s> Operator<'s> {
pub(crate) fn spacing(&self) -> Spacing {
match &self.arity {
Arity::Unary(token) => Spacing::of_token(token),
Arity::Binary { tokens, .. } => Spacing::of_token(tokens.first().unwrap()),
Arity::App => Spacing::Spaced,
Arity::NamedApp(_) => Spacing::Spaced,
Arity::Annotation(annotation) => annotation.spacing(),
}
}
}
// === Arity ===
/// Classifies the role of an operator.
#[derive(Debug)]
pub enum Arity<'s> {
Unary {
token: token::Operator<'s>,
error: Option<Cow<'static, str>>,
},
Unary(token::UnaryOperator<'s>),
Binary {
tokens: Vec<token::Operator<'s>>,
tokens: Vec<Token<'s>>,
missing: Option<BinaryOperand>,
reify_rhs_section: bool,
},
App,
NamedApp(Box<NamedApp<'s>>),
Annotation(Annotation<'s>),
}
impl<'s> Arity<'s> {
fn unary(token: token::Operator<'s>) -> Self {
Self::Unary { token, error: None }
pub fn expects_rhs(&self) -> bool {
matches!(
self,
Arity::Unary(_) | Arity::Binary { missing: None | Some(BinaryOperand::Left), .. }
)
}
}
@ -59,9 +76,8 @@ pub enum BinaryOperand {
#[derive(Debug, Copy, Clone)]
pub struct ModifiedPrecedence {
pub spacing: Spacing,
pub precedence: token::Precedence,
pub is_value_operation: bool,
value: u8,
mask: u8,
}
pub struct ModifiedPrecedenceComparisonResult {
@ -70,30 +86,28 @@ pub struct ModifiedPrecedenceComparisonResult {
}
impl ModifiedPrecedence {
pub fn new(spacing: Spacing, precedence: token::Precedence, is_value_operation: bool) -> Self {
let unspaced_bit = match spacing {
Spacing::Spaced => 0,
Spacing::Unspaced => 0x80,
};
let value = precedence.into_u8() | unspaced_bit;
let mask = if is_value_operation { 0x7f } else { 0xff };
Self { value, mask }
}
pub fn compare(&self, other: &Self, include_eq: bool) -> ModifiedPrecedenceComparisonResult {
let spacing_ordering = match (self.spacing, other.spacing) {
(Spacing::Spaced, Spacing::Unspaced) => Some(Ordering::Less),
(Spacing::Unspaced, Spacing::Spaced) => Some(Ordering::Greater),
_ => None,
};
let use_spacing = !(self.is_value_operation && other.is_value_operation);
let natural_ordering = self.precedence.cmp(&other.precedence);
let natural_is_greater = natural_ordering == Ordering::Greater
|| (include_eq && natural_ordering == Ordering::Equal);
let (is_greater, inconsistent_spacing) = match spacing_ordering {
Some(spacing_ordering) => {
let spacing_is_greater = spacing_ordering == Ordering::Greater
|| (include_eq && spacing_ordering == Ordering::Equal);
if use_spacing {
(spacing_is_greater, false)
} else {
(natural_is_greater, natural_is_greater != spacing_is_greater)
}
}
None => (natural_is_greater, false),
};
let adjusted_self = self.value + include_eq as u8;
let mask = self.mask | other.mask;
let is_greater = adjusted_self & mask > other.value & mask;
let is_greater_including_space = adjusted_self > other.value;
let inconsistent_spacing = is_greater != is_greater_including_space;
ModifiedPrecedenceComparisonResult { is_greater, inconsistent_spacing }
}
pub fn min() -> Self {
Self { value: 0, mask: 0xff }
}
}
@ -129,37 +143,71 @@ impl Warnings {
// ======================================
pub trait OperandConsumer<'s> {
fn push_operand(&mut self, operand: Operand<Tree<'s>>);
fn push_operand(&mut self, operand: MaybeSection<Tree<'s>>);
}
pub trait OperatorConsumer<'s> {
fn push_operator(&mut self, operator: Operator<'s>);
}
// ===========================
// === Operator or Operand ===
// ===========================
#[derive(Debug)]
pub enum OperatorOrOperand<'s> {
Operand(Operand<Tree<'s>>),
Operator(Operator<'s>),
pub trait NamedOperandConsumer<'s> {
fn push_maybe_named_operand(&mut self, operand: OperandMaybeNamed<'s>);
}
impl<'s> From<Operand<Tree<'s>>> for OperatorOrOperand<'s> {
fn from(operand: Operand<Tree<'s>>) -> Self {
OperatorOrOperand::Operand(operand)
// === Debugging ===
impl<'s, Inner: NamedOperandConsumer<'s>> NamedOperandConsumer<'s> for Inspect<Inner> {
fn push_maybe_named_operand(&mut self, operand: OperandMaybeNamed<'s>) {
self.observe(&operand);
self.0.push_maybe_named_operand(operand);
}
}
impl<'s> From<Operator<'s>> for OperatorOrOperand<'s> {
fn from(operator: Operator<'s>) -> Self {
OperatorOrOperand::Operator(operator)
impl<'s, Inner: OperatorConsumer<'s>> OperatorConsumer<'s> for Inspect<Inner> {
fn push_operator(&mut self, operator: Operator<'s>) {
self.observe(&operator);
self.0.push_operator(operator);
}
}
// === Conversions ===
impl<'s, T> OperandConsumer<'s> for T
where T: NamedOperandConsumer<'s>
{
fn push_operand(&mut self, operand: MaybeSection<Tree<'s>>) {
self.push_maybe_named_operand(OperandMaybeNamed::Unnamed(operand));
}
}
impl<'s, T> TreeConsumer<'s> for T
where T: OperandConsumer<'s>
{
fn push_tree(&mut self, tree: Tree<'s>) {
self.push_operand(tree.into());
}
}
// ======================
// === Named Operands ===
// ======================
#[derive(Debug, PartialEq, Eq)]
#[allow(clippy::large_enum_variant)] // Clippy considers the `Unnamed` is "at least 0 bytes".
pub enum OperandMaybeNamed<'s> {
Unnamed(MaybeSection<Tree<'s>>),
Named {
parens: Option<(token::OpenSymbol<'s>, Option<token::CloseSymbol<'s>>)>,
name: token::Ident<'s>,
equals: token::AssignmentOperator<'s>,
expression: Tree<'s>,
},
}
// ==========================
// === SectionTermination ===
// ==========================

View File

@ -0,0 +1,383 @@
//! Parses statements in module, body blocks, and type blocks.
mod function_def;
mod type_def;
use crate::empty_tree;
use crate::expression_to_pattern;
use crate::is_qualified_name;
use crate::prelude::*;
use crate::syntax::item;
use crate::syntax::maybe_with_error;
use crate::syntax::operator::Precedence;
use crate::syntax::statement::function_def::parse_function_decl;
use crate::syntax::statement::function_def::try_parse_foreign_function;
use crate::syntax::statement::type_def::try_parse_type_def;
use crate::syntax::token;
use crate::syntax::tree;
use crate::syntax::tree::block;
use crate::syntax::tree::ArgumentDefinition;
use crate::syntax::tree::SyntaxError;
use crate::syntax::treebuilding::Spacing;
use crate::syntax::Item;
use crate::syntax::Token;
use crate::syntax::Tree;
/// Parses normal statements.
#[derive(Debug, Default)]
pub struct BodyBlockParser<'s> {
statement_parser: StatementParser<'s>,
}
impl<'s> BodyBlockParser<'s> {
/// Parse the statements in a block.
pub fn parse_body_block(
&mut self,
lines: impl IntoIterator<Item = item::Line<'s>>,
precedence: &mut Precedence<'s>,
) -> Tree<'s> {
let lines = lines.into_iter().map(|item::Line { newline, mut items }| block::Line {
newline,
expression: self.statement_parser.parse_body_block_statement(&mut items, 0, precedence),
});
Tree::body_block(block::compound_lines(lines).collect())
}
/// Parse the declarations and statements at the top level of a module.
pub fn parse_module(
&mut self,
lines: impl IntoIterator<Item = item::Line<'s>>,
precedence: &mut Precedence<'s>,
) -> Tree<'s> {
let lines = lines.into_iter().map(|item::Line { newline, mut items }| block::Line {
newline,
expression: self.statement_parser.parse_module_statement(&mut items, 0, precedence),
});
Tree::body_block(block::compound_lines(lines).collect())
}
}
#[derive(Debug, Default)]
struct StatementParser<'s> {
args_buffer: Vec<ArgumentDefinition<'s>>,
}
impl<'s> StatementParser<'s> {
fn parse_body_block_statement(
&mut self,
items: &mut Vec<Item<'s>>,
start: usize,
precedence: &mut Precedence<'s>,
) -> Option<Tree<'s>> {
let private_keywords = scan_private_keywords(&*items);
let mut statement = parse_body_block_statement(
items,
start + private_keywords,
precedence,
&mut self.args_buffer,
);
for _ in 0..private_keywords {
let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() };
let token::Variant::Private(variant) = keyword.variant else { unreachable!() };
let keyword = keyword.with_variant(variant);
let error = match statement.as_ref().map(|tree| &tree.variant) {
Some(tree::Variant::Invalid(_) | tree::Variant::Function(_)) => None,
_ => SyntaxError::StmtUnexpectedPrivateUsage.into(),
};
let private_stmt = Tree::private(keyword, statement.take());
statement = maybe_with_error(private_stmt, error).into();
}
statement
}
fn parse_module_statement(
&mut self,
items: &mut Vec<Item<'s>>,
start: usize,
precedence: &mut Precedence<'s>,
) -> Option<Tree<'s>> {
let private_keywords = scan_private_keywords(&*items);
let mut statement = parse_body_block_statement(
items,
start + private_keywords,
precedence,
&mut self.args_buffer,
);
for _ in 0..private_keywords {
let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() };
let token::Variant::Private(variant) = keyword.variant else { unreachable!() };
let keyword = keyword.with_variant(variant);
let error = match statement.as_ref().map(|tree| &tree.variant) {
Some(tree::Variant::Invalid(_) | tree::Variant::Function(_)) | None => None,
_ => SyntaxError::StmtUnexpectedPrivateUsage.into(),
};
let private_stmt = Tree::private(keyword, statement.take());
statement = maybe_with_error(private_stmt, error).into();
}
statement
}
}
fn scan_private_keywords<'s>(items: impl IntoIterator<Item = impl AsRef<Item<'s>>>) -> usize {
items
.into_iter()
.take_while(|item| {
matches!(item.as_ref(), Item::Token(Token { variant: token::Variant::Private(_), .. }))
})
.count()
}
fn parse_body_block_statement<'s>(
items: &mut Vec<Item<'s>>,
start: usize,
precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>,
) -> Option<Tree<'s>> {
use token::Variant;
if let Some(type_def) = try_parse_type_def(items, start, precedence, args_buffer) {
return Some(type_def);
}
let top_level_operator = match find_top_level_operator(&items[start..]) {
Ok(top_level_operator) => top_level_operator.map(|(i, t)| (i + start, t)),
Err(e) =>
return precedence
.resolve_non_section(items.drain(start..))
.unwrap()
.with_error(e)
.into(),
};
let statement = match top_level_operator {
Some((i, Token { variant: Variant::AssignmentOperator(_), .. })) =>
parse_assignment_like_statement(items, start, i, precedence, args_buffer).into(),
Some((i, Token { variant: Variant::TypeAnnotationOperator(_), .. })) => {
let type_ = precedence.resolve_non_section(items.drain(i + 1..));
let Some(Item::Token(operator)) = items.pop() else { unreachable!() };
let Variant::TypeAnnotationOperator(variant) = operator.variant else { unreachable!() };
let operator = operator.with_variant(variant);
let lhs = precedence.resolve_non_section(items.drain(start..));
let type_ = type_.unwrap_or_else(|| {
empty_tree(operator.code.position_after()).with_error(SyntaxError::ExpectedType)
});
if lhs.as_ref().is_some_and(is_qualified_name) {
Tree::type_signature(lhs.unwrap(), operator, type_).into()
} else {
let lhs = lhs.unwrap_or_else(|| {
empty_tree(operator.left_offset.code.position_before())
.with_error(SyntaxError::ExpectedExpression)
});
Tree::type_annotated(lhs, operator, type_).into()
}
}
Some(_) => unreachable!(),
None => precedence.resolve(items.drain(start..)),
};
debug_assert_eq!(items.len(), start);
statement
}
fn parse_assignment_like_statement<'s>(
items: &mut Vec<Item<'s>>,
start: usize,
operator: usize,
precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>,
) -> Tree<'s> {
if operator == start {
return precedence
.resolve_non_section(items.drain(start..))
.unwrap()
.with_error(SyntaxError::StmtInvalidAssignmentOrMethod);
}
let mut expression = precedence.resolve(items.drain(operator + 1..));
let Some(Item::Token(operator)) = items.pop() else { unreachable!() };
let token::Variant::AssignmentOperator(variant) = operator.variant else { unreachable!() };
let operator = operator.with_variant(variant);
let qn_len = scan_qn(&items[start..]);
let mut operator = Some(operator);
if let Some(function) = try_parse_foreign_function(
items,
start,
&mut operator,
&mut expression,
precedence,
args_buffer,
) {
return function;
}
let operator = operator.unwrap();
match (expression, qn_len) {
(Some(e), Some(qn_len)) if matches!(e.variant, tree::Variant::BodyBlock(_)) => {
let (qn, args, return_) =
parse_function_decl(items, start, qn_len, precedence, args_buffer);
Tree::function(qn, args, return_, operator, Some(e))
}
(Some(expression), None) =>
parse_assignment(items.drain(start..), operator, expression, precedence),
(Some(expression), Some(1)) if items.len() == start + 1 =>
parse_assignment(items.drain(start..), operator, expression, precedence),
(e, Some(qn_len)) => {
let (qn, args, return_) =
parse_function_decl(items, start, qn_len, precedence, args_buffer);
Tree::function(qn, args, return_, operator, e)
}
(None, None) => Tree::opr_app(
precedence.resolve_non_section(items.drain(start..)),
Ok(operator.with_variant(token::variant::Operator())),
None,
)
.with_error(SyntaxError::StmtInvalidAssignmentOrMethod),
}
}
fn parse_assignment<'s>(
items: impl IntoIterator<Item = Item<'s>>,
operator: token::AssignmentOperator<'s>,
expression: Tree<'s>,
precedence: &mut Precedence<'s>,
) -> Tree<'s> {
let pattern = expression_to_pattern(precedence.resolve_non_section(items).unwrap());
Tree::assignment(pattern, operator, expression)
}
fn parse_pattern<'s>(
items: &mut Vec<Item<'s>>,
arg_start: usize,
precedence: &mut Precedence<'s>,
) -> (Option<token::SuspensionOperator<'s>>, Option<Tree<'s>>) {
let have_suspension = matches!(
items.get(arg_start),
Some(Item::Token(Token { variant: token::Variant::SuspensionOperator(_), .. }))
);
let pattern_start = arg_start + have_suspension as usize;
let pattern = if items.len() - pattern_start == 1 {
Some(match items.pop().unwrap() {
Item::Token(token) => match token.variant {
token::Variant::Ident(variant) => Tree::ident(token.with_variant(variant)),
token::Variant::Wildcard(variant) =>
Tree::wildcard(token.with_variant(variant), None),
_ => tree::to_ast(token).with_error(SyntaxError::ArgDefExpectedPattern),
},
item => precedence
.resolve_non_section(Some(item))
.map(|tree| tree.with_error(SyntaxError::ArgDefExpectedPattern))
.unwrap(),
})
} else {
precedence
.resolve_non_section(items.drain(pattern_start..))
.map(|tree| tree.with_error(SyntaxError::ArgDefExpectedPattern))
};
let suspension = have_suspension.then(|| {
let Item::Token(token) = items.pop().unwrap() else { unreachable!() };
let token::Variant::SuspensionOperator(variant) = token.variant else { unreachable!() };
token.with_variant(variant)
});
(suspension, pattern)
}
fn find_top_level_operator<'a, 's>(
items: &'a [Item<'s>],
) -> Result<Option<(usize, &'a Token<'s>)>, SyntaxError> {
use token::Variant;
let mut candidate: Option<(usize, &'a Token<'s>, bool)> = None;
let mut after_first_space = false;
for (i, item) in items.iter().enumerate() {
let next_is_after_space =
i != 0 && (after_first_space || matches!(Spacing::of_item(item), Spacing::Spaced));
if let Item::Token(token) = item {
let is_spaced = token.is_spaced();
if !after_first_space || is_spaced {
match &token.variant {
Variant::AssignmentOperator(_) => {
if is_spaced
&& items
.get(i + 1)
.is_some_and(|item| Spacing::of_item(item) == Spacing::Unspaced)
{
return Err(SyntaxError::StmtLhsInvalidOperatorSpacing);
}
if is_spaced {
return Ok(Some((i, token)));
}
if candidate.is_none()
|| (is_spaced && !candidate.unwrap().2)
|| !matches!(
candidate.unwrap().1.variant,
Variant::AssignmentOperator(_)
)
{
candidate = Some((i, token, is_spaced));
}
}
Variant::TypeAnnotationOperator(_) => {
if is_spaced
&& items
.get(i + 1)
.is_some_and(|item| Spacing::of_item(item) == Spacing::Unspaced)
{
return Err(SyntaxError::StmtLhsInvalidOperatorSpacing);
}
if candidate.is_none() || (is_spaced && !candidate.unwrap().2) {
candidate = Some((i, token, is_spaced));
}
}
Variant::Operator(_)
| Variant::DotOperator(_)
| Variant::ArrowOperator(_)
| Variant::CommaOperator(_) =>
if is_spaced && candidate.is_some_and(|(_, _, is_spaced)| !is_spaced) {
candidate = None;
},
_ => {}
}
}
}
after_first_space = next_is_after_space;
}
Ok(candidate.map(|(i, t, _)| (i, t)))
}
fn next_spaced(items: &[Item]) -> Option<usize> {
for (i, item) in items.iter().enumerate().skip(1) {
if matches!(Spacing::of_item(item), Spacing::Spaced) {
return Some(i);
}
}
None
}
/// Returns length of the QN.
fn scan_qn<'s>(items: impl IntoIterator<Item = impl AsRef<Item<'s>>>) -> Option<usize> {
enum State {
ExpectingDot,
ExpectingIdent,
}
use token::Variant::*;
use Item::*;
use State::*;
let mut state = ExpectingIdent;
for (i, item) in items.into_iter().enumerate() {
match item.as_ref() {
Token(token) if i != 0 && token.is_spaced() => break,
Token(token) => match (state, &token.variant) {
(ExpectingDot, DotOperator(_)) => state = ExpectingIdent,
(ExpectingIdent, Ident(ident)) if ident.is_type => state = ExpectingDot,
(
ExpectingIdent,
Ident(_) | Operator(_) | NegationOperator(_) | UnaryOperator(_),
) => return Some(i + 1),
_ => break,
},
Group(_) | Tree(_) => break,
Block(_) => unreachable!(),
}
}
None
}

View File

@ -0,0 +1,376 @@
use crate::prelude::*;
use crate::empty_tree;
use crate::syntax::item;
use crate::syntax::maybe_with_error;
use crate::syntax::operator::Precedence;
use crate::syntax::statement::find_top_level_operator;
use crate::syntax::statement::parse_pattern;
use crate::syntax::token;
use crate::syntax::tree;
use crate::syntax::tree::ArgumentDefault;
use crate::syntax::tree::ArgumentDefinition;
use crate::syntax::tree::ArgumentDefinitionLine;
use crate::syntax::tree::ArgumentType;
use crate::syntax::tree::ReturnSpecification;
use crate::syntax::tree::SyntaxError;
use crate::syntax::Item;
use crate::syntax::Token;
use crate::syntax::Tree;
pub fn parse_function_decl<'s>(
items: &mut Vec<Item<'s>>,
start: usize,
qn_len: usize,
precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>,
) -> (Tree<'s>, Vec<ArgumentDefinition<'s>>, Option<ReturnSpecification<'s>>) {
let mut arg_starts = vec![];
let mut arrow = None;
for (i, item) in items.iter().enumerate().skip(start + qn_len) {
if let Item::Token(Token { variant: token::Variant::ArrowOperator(_), .. }) = item {
arrow = Some(i);
break;
}
if i == start + qn_len || matches!(Spacing::of_item(item), Spacing::Spaced) {
arg_starts.push(i);
}
}
let return_ = arrow.map(|arrow| parse_return_spec(items, arrow, precedence));
args_buffer.extend(
arg_starts.drain(..).rev().map(|arg_start| parse_arg_def(items, arg_start, precedence)),
);
let args = args_buffer.drain(..).rev().collect();
let qn = precedence.resolve_non_section(items.drain(start..)).unwrap();
(qn, args, return_)
}
pub fn parse_constructor_definition<'s>(
items: &mut Vec<Item<'s>>,
start: usize,
precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>,
) -> Tree<'s> {
let mut block_args = vec![];
if matches!(items.last().unwrap(), Item::Block(_)) {
let Item::Block(block) = items.pop().unwrap() else { unreachable!() };
block_args.extend(block.into_vec().into_iter().map(|item::Line { newline, mut items }| {
let argument = (!items.is_empty()).then(|| parse_arg_def(&mut items, 0, precedence));
ArgumentDefinitionLine { newline, argument }
}))
}
let (name, inline_args) = parse_constructor_decl(items, start, precedence, args_buffer);
Tree::constructor_definition(name, inline_args, block_args)
}
fn parse_constructor_decl<'s>(
items: &mut Vec<Item<'s>>,
start: usize,
precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>,
) -> (token::Ident<'s>, Vec<ArgumentDefinition<'s>>) {
let args = parse_type_args(items, start + 1, precedence, args_buffer);
let Item::Token(name) = items.pop().unwrap() else { unreachable!() };
let Token { variant: token::Variant::Ident(variant), .. } = name else { unreachable!() };
let name = name.with_variant(variant);
debug_assert_eq!(items.len(), start);
(name, args)
}
pub fn parse_type_args<'s>(
items: &mut Vec<Item<'s>>,
start: usize,
precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>,
) -> Vec<ArgumentDefinition<'s>> {
if start == items.len() {
return default();
}
let mut arg_starts = vec![start];
let mut expecting_rhs = false;
for (i, item) in items.iter().enumerate().skip(start + 1) {
if expecting_rhs {
expecting_rhs = false;
continue;
}
if let Item::Token(Token { variant: token::Variant::AssignmentOperator(_), .. }) = item {
expecting_rhs = true;
continue;
}
if matches!(Spacing::of_item(item), Spacing::Spaced) {
arg_starts.push(i);
}
}
args_buffer.extend(
arg_starts.drain(..).rev().map(|arg_start| parse_arg_def(items, arg_start, precedence)),
);
debug_assert_eq!(items.len(), start);
args_buffer.drain(..).rev().collect()
}
pub fn try_parse_foreign_function<'s>(
items: &mut Vec<Item<'s>>,
start: usize,
operator: &mut Option<token::AssignmentOperator<'s>>,
expression: &mut Option<Tree<'s>>,
precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>,
) -> Option<Tree<'s>> {
match items.get(start) {
Some(Item::Token(token)) if token.code == "foreign" => {}
_ => return None,
}
let operator = operator.take().unwrap();
match items.get(start + 1) {
Some(Item::Token(Token { variant: token::Variant::Ident(ident), .. }))
if !ident.is_type => {}
_ => {
items.push(Item::from(Token::from(operator)));
items.extend(expression.take().map(Item::from));
return precedence
.resolve_non_section(items.drain(start..))
.unwrap()
.with_error(SyntaxError::ForeignFnExpectedLanguage)
.into();
}
}
match items.get(start + 2) {
Some(Item::Token(Token { variant: token::Variant::Ident(ident), .. }))
if !ident.is_type => {}
_ => {
items.push(Item::from(Token::from(operator)));
items.extend(expression.take().map(Item::from));
return precedence
.resolve_non_section(items.drain(start..))
.unwrap()
.with_error(SyntaxError::ForeignFnExpectedName)
.into();
}
}
let body = expression
.take()
.map(|body| {
let error = match &body.variant {
tree::Variant::TextLiteral(_) => None,
_ => Some(SyntaxError::ForeignFnExpectedStringBody),
};
maybe_with_error(body, error)
})
.unwrap_or_else(|| {
empty_tree(operator.code.position_after())
.with_error(SyntaxError::ForeignFnExpectedStringBody)
});
let mut arg_starts = vec![];
for (i, item) in items.iter().enumerate().skip(start + 3) {
if i == start + 3 || matches!(Spacing::of_item(item), Spacing::Spaced) {
arg_starts.push(i);
}
}
args_buffer.extend(
arg_starts.drain(..).rev().map(|arg_start| parse_arg_def(items, arg_start, precedence)),
);
let args = args_buffer.drain(..).rev().collect();
let Item::Token(name) = items.pop().unwrap() else { unreachable!() };
let token::Variant::Ident(variant) = name.variant else { unreachable!() };
let name = name.with_variant(variant);
let Item::Token(language) = items.pop().unwrap() else { unreachable!() };
let token::Variant::Ident(variant) = language.variant else { unreachable!() };
let language = language.with_variant(variant);
let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() };
let keyword = keyword.with_variant(token::variant::ForeignKeyword());
Tree::foreign_function(keyword, language, name, args, operator, body).into()
}
#[derive(Debug, PartialEq, Eq)]
enum IsParenthesized {
Parenthesized,
Unparenthesized,
}
use crate::syntax::treebuilding::Spacing;
use IsParenthesized::*;
struct ArgDefInfo {
type_: Option<(IsParenthesized, usize)>,
default: Option<usize>,
}
fn parse_return_spec<'s>(
items: &mut Vec<Item<'s>>,
arrow: usize,
precedence: &mut Precedence<'s>,
) -> ReturnSpecification<'s> {
let r#type = precedence.resolve_non_section(items.drain(arrow + 1..));
let Item::Token(arrow) = items.pop().unwrap() else { unreachable!() };
let token::Variant::ArrowOperator(variant) = arrow.variant else { unreachable!() };
let arrow = arrow.with_variant(variant);
let r#type = r#type.unwrap_or_else(|| {
empty_tree(arrow.code.position_after()).with_error(SyntaxError::ExpectedExpression)
});
ReturnSpecification { arrow, r#type }
}
fn parse_arg_def<'s>(
items: &mut Vec<Item<'s>>,
mut start: usize,
precedence: &mut Precedence<'s>,
) -> ArgumentDefinition<'s> {
let mut open1 = None;
let mut close1 = None;
let mut parenthesized_body = None;
if matches!(items[start..], [Item::Group(_)]) {
let Some(Item::Group(item::Group { open, body, close })) = items.pop() else {
unreachable!()
};
open1 = open.into();
close1 = close;
parenthesized_body = body.into_vec().into();
debug_assert_eq!(items.len(), start);
start = 0;
}
let items = parenthesized_body.as_mut().unwrap_or(items);
let ArgDefInfo { type_, default } = match analyze_arg_def(&items[start..]) {
Err(e) => {
let pattern =
precedence.resolve_non_section(items.drain(start..)).unwrap().with_error(e);
return ArgumentDefinition {
open: open1,
open2: None,
suspension: None,
pattern,
type_: None,
close2: None,
default: None,
close: close1,
};
}
Ok(arg_def) => arg_def,
};
let default = default.map(|default| {
let tree = precedence.resolve(items.drain(start + default + 1..));
let Item::Token(equals) = items.pop().unwrap() else { unreachable!() };
let expression = tree.unwrap_or_else(|| {
empty_tree(equals.code.position_after()).with_error(SyntaxError::ExpectedExpression)
});
let Token { variant: token::Variant::AssignmentOperator(variant), .. } = equals else {
unreachable!()
};
let equals = equals.with_variant(variant);
ArgumentDefault { equals, expression }
});
let mut open2 = None;
let mut close2 = None;
let mut suspension_and_pattern = None;
let type_ = type_.map(|(parenthesized, type_)| {
let mut parenthesized_body = None;
if parenthesized == Parenthesized
&& (start..items.len()).len() == 1
&& matches!(items.last(), Some(Item::Group(_)))
{
let Some(Item::Group(item::Group { open, body, close })) = items.pop() else {
unreachable!()
};
open2 = open.into();
close2 = close;
parenthesized_body = body.into_vec().into();
start = 0;
}
let items = parenthesized_body.as_mut().unwrap_or(items);
let tree = precedence.resolve_non_section(items.drain(start + type_ + 1..));
let Item::Token(operator) = items.pop().unwrap() else { unreachable!() };
let type_ = tree.unwrap_or_else(|| {
empty_tree(operator.code.position_after()).with_error(SyntaxError::ExpectedType)
});
let token::Variant::TypeAnnotationOperator(variant) = operator.variant else {
unreachable!()
};
let operator = operator.with_variant(variant);
suspension_and_pattern = Some(parse_pattern(items, start, precedence));
ArgumentType { operator, type_ }
});
let (suspension, pattern) =
suspension_and_pattern.unwrap_or_else(|| parse_pattern(items, start, precedence));
let pattern = pattern.unwrap_or_else(|| {
empty_tree(
suspension
.as_ref()
.map(|t| t.code.position_after())
.or_else(|| open2.as_ref().map(|t| t.code.position_after()))
.or_else(|| open1.as_ref().map(|t| t.code.position_after()))
.or_else(|| type_.as_ref().map(|t| t.operator.left_offset.code.position_before()))
// Why does this one need a type annotation???
.or_else(|| {
close2
.as_ref()
.map(|t: &token::CloseSymbol| t.left_offset.code.position_before())
})
.or_else(|| default.as_ref().map(|t| t.equals.left_offset.code.position_before()))
.or_else(|| close1.as_ref().map(|t| t.left_offset.code.position_before()))
.unwrap(),
)
.with_error(SyntaxError::ArgDefExpectedPattern)
});
ArgumentDefinition {
open: open1,
open2,
suspension,
pattern,
type_,
close2,
default,
close: close1,
}
}
fn analyze_arg_def(outer: &[Item]) -> Result<ArgDefInfo, SyntaxError> {
let mut default = None;
let mut type_ = None;
match find_top_level_operator(outer)? {
None => {}
Some((
annotation_op_pos,
Token { variant: token::Variant::TypeAnnotationOperator(_), .. },
)) => {
type_ = (Unparenthesized, annotation_op_pos).into();
}
Some((assignment_op_pos, Token { variant: token::Variant::AssignmentOperator(_), .. })) => {
default = assignment_op_pos.into();
match find_top_level_operator(&outer[..assignment_op_pos])? {
None => {}
Some((
annotation_op_pos,
Token { variant: token::Variant::TypeAnnotationOperator(_), .. },
)) => {
type_ = (Unparenthesized, annotation_op_pos).into();
}
Some(_) => return Err(SyntaxError::ArgDefUnexpectedOpInParenClause),
}
}
Some(_) => return Err(SyntaxError::ArgDefUnexpectedOpInParenClause),
};
if type_.is_none() {
if let Item::Group(item::Group { body: inner, .. }) = &outer[0] {
let inner_op = find_top_level_operator(inner)?;
type_ = (Parenthesized, match inner_op {
None => return Err(SyntaxError::ArgDefSpuriousParens),
Some((
inner_op_pos,
Token { variant: token::Variant::TypeAnnotationOperator(_), .. },
)) => inner_op_pos,
Some(_) => return Err(SyntaxError::ArgDefUnexpectedOpInParenClause),
})
.into();
}
}
Ok(ArgDefInfo { type_, default })
}

View File

@ -0,0 +1,133 @@
use crate::prelude::*;
use crate::syntax::item;
use crate::syntax::maybe_with_error;
use crate::syntax::operator::Precedence;
use crate::syntax::statement::function_def::parse_constructor_definition;
use crate::syntax::statement::function_def::parse_type_args;
use crate::syntax::statement::parse_body_block_statement;
use crate::syntax::statement::scan_private_keywords;
use crate::syntax::token;
use crate::syntax::tree;
use crate::syntax::tree::block;
use crate::syntax::tree::ArgumentDefinition;
use crate::syntax::tree::SyntaxError;
use crate::syntax::treebuilding::Spacing;
use crate::syntax::Item;
use crate::syntax::Token;
use crate::syntax::Tree;
pub fn try_parse_type_def<'s>(
items: &mut Vec<Item<'s>>,
start: usize,
precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>,
) -> Option<Tree<'s>> {
match items.get(start) {
Some(Item::Token(token)) if token.code == "type" => {}
_ => return None,
}
match items.get(start + 1) {
Some(Item::Token(Token { variant: token::Variant::Ident(ident), .. })) if ident.is_type => {
}
_ =>
return precedence
.resolve_non_section(items.drain(start..))
.unwrap()
.with_error(SyntaxError::TypeDefExpectedTypeName)
.into(),
}
let body = if let Some(Item::Block(lines)) = items.last_mut() {
let block = mem::take(lines).into_vec();
items.pop();
let lines = block.into_iter().map(|item::Line { newline, mut items }| block::Line {
newline,
expression: {
if let Some(Item::Token(token)) = items.first_mut()
&& matches!(token.variant, token::Variant::Operator(_))
{
let opr_ident =
token::variant::Ident { is_operator_lexically: true, ..default() };
token.variant = token::Variant::Ident(opr_ident);
}
parse_type_body_statement(items, precedence, args_buffer)
},
});
block::compound_lines(lines).collect()
} else {
default()
};
let params = parse_type_args(items, start + 2, precedence, args_buffer);
let name = {
let Item::Token(name) = items.pop().unwrap() else { unreachable!() };
let token::Variant::Ident(variant) = name.variant else { unreachable!() };
name.with_variant(variant)
};
let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() };
let keyword = keyword.with_variant(token::variant::TypeKeyword());
debug_assert_eq!(items.len(), start);
Tree::type_def(keyword, name, params, body).into()
}
fn parse_type_body_statement<'s>(
mut items: Vec<Item<'s>>,
precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>,
) -> Option<Tree<'s>> {
let private_keywords = scan_private_keywords(&items);
let mut statement = match items.get(private_keywords) {
Some(Item::Token(Token { variant: token::Variant::Ident(ident), .. }))
if ident.is_type
&& !items
.get(private_keywords + 1)
.is_some_and(|item| Spacing::of_item(item) == Spacing::Unspaced) =>
Some(parse_constructor_definition(
&mut items,
private_keywords,
precedence,
args_buffer,
)),
None => None,
_ => {
let tree =
parse_body_block_statement(&mut items, private_keywords, precedence, args_buffer)
.unwrap();
let error = match &tree.variant {
tree::Variant::Function(_)
| tree::Variant::ForeignFunction(_)
| tree::Variant::Assignment(_)
| tree::Variant::Documented(_)
| tree::Variant::Annotated(_)
| tree::Variant::AnnotatedBuiltin(_) => None,
tree::Variant::TypeSignature(_) => None,
tree::Variant::TypeDef(_) => None,
_ => Some(SyntaxError::UnexpectedExpressionInTypeBody),
};
maybe_with_error(tree, error).into()
}
};
for _ in 0..private_keywords {
let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() };
let token::Variant::Private(variant) = keyword.variant else { unreachable!() };
let keyword = keyword.with_variant(variant);
let error = match statement.as_ref().map(|tree| &tree.variant) {
Some(
tree::Variant::Invalid(_)
| tree::Variant::ConstructorDefinition(_)
| tree::Variant::Function(_),
) => None,
_ => SyntaxError::TypeBodyUnexpectedPrivateUsage.into(),
};
let private_stmt = Tree::private(keyword, statement.take());
statement = maybe_with_error(private_stmt, error).into();
}
statement
}

View File

@ -93,9 +93,19 @@
//!
//! See the definitions and macros below to learn more.
mod collect;
mod operator;
use crate::prelude::*;
use crate::source::*;
pub use operator::Associativity;
pub use operator::OperatorProperties;
pub use operator::Precedence;
pub use operator::TokenOperatorProperties;
// =============
@ -159,6 +169,18 @@ impl<'s, T> Token<'s, T> {
let code_length = self.code.length();
span::Ref { left_offset: &self.left_offset, code_length }
}
/// Whether this token has space characters on the left.
pub fn is_spaced(&self) -> bool {
self.left_offset.visible.width_in_spaces != 0
}
}
impl<'s> Token<'s, Variant> {
/// Whether this token is a syntactically-special binary operator.
pub fn is_syntactic_binary_operator(&self) -> bool {
is_syntactic_binary_operator(&self.variant)
}
}
impl<'s, V: Clone> Token<'s, V> {
@ -268,16 +290,31 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
#[reflect(skip)]
pub is_default: bool,
},
Operator {
#[serde(skip)]
#[reflect(skip)]
pub properties: OperatorProperties,
},
// === Binary operators ===
Operator,
AssignmentOperator,
TypeAnnotationOperator,
ArrowOperator,
DotOperator,
CommaOperator,
// === Unary operators ===
UnaryOperator,
AnnotationOperator,
AutoscopeOperator,
LambdaOperator,
SuspensionOperator,
NegationOperator,
Digits {
pub base: Option<Base>
},
NumberBase,
Private,
TypeKeyword,
ForeignKeyword,
AllKeyword,
CaseKeyword,
OfKeyword,
TextStart,
TextEnd,
TextSection,
@ -319,285 +356,6 @@ impl Default for Variant {
}
// === Operator properties ===
/// Properties of an operator that are identified when lexing.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
pub struct OperatorProperties {
// Precedence
binary_infix_precedence: Option<Precedence>,
unary_prefix_precedence: Option<Precedence>,
is_value_operation: bool,
// Operator section behavior
lhs_section_termination: Option<crate::syntax::operator::SectionTermination>,
// Special properties
is_compile_time_operation: bool,
is_right_associative: bool,
is_modifier: bool,
// Unique operators
is_decimal: bool,
is_type_annotation: bool,
is_assignment: bool,
is_arrow: bool,
is_sequence: bool,
is_suspension: bool,
is_autoscope: bool,
is_annotation: bool,
is_dot: bool,
is_special: bool,
is_token_joiner: bool,
}
impl OperatorProperties {
/// Construct an operator with default properties.
pub fn new() -> Self {
default()
}
/// Return a copy of this operator, with the given binary infix precedence.
pub fn with_binary_infix_precedence(self, value: u32) -> Self {
let precedence = Precedence { value };
debug_assert!(precedence > Precedence::min());
Self { binary_infix_precedence: Some(precedence), ..self }
}
/// Return a copy of this operator, with unary prefix parsing allowed.
pub fn with_unary_prefix_mode(self, precedence: Precedence) -> Self {
debug_assert!(precedence > Precedence::min());
Self { unary_prefix_precedence: Some(precedence), ..self }
}
/// Return a copy of this operator, modified to be flagged as a compile time operation.
pub fn as_compile_time_operation(self) -> Self {
Self { is_compile_time_operation: true, ..self }
}
/// Return whether this operator is flagged as a compile time operation.
pub fn is_compile_time_operation(&self) -> bool {
self.is_compile_time_operation
}
/// Mark the operator as a value-level operation, as opposed to functional.
pub fn as_value_operation(self) -> Self {
Self { is_value_operation: true, ..self }
}
/// Return whether the operator is a value-level operation, as opposed to functional.
pub fn is_value_operation(&self) -> bool {
self.is_value_operation
}
/// Return a copy of this operator, modified to be flagged as right associative.
pub fn as_right_associative(self) -> Self {
Self { is_right_associative: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as an modified-assignment operator.
pub fn as_modifier(self) -> Self {
Self { is_modifier: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as special.
pub fn as_special(self) -> Self {
Self { is_special: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as the token-joiner operator.
pub fn as_token_joiner(self) -> Self {
Self { is_token_joiner: true, ..self }
}
/// Return a copy of this operator, modified to have the specified LHS operator-section/
/// template-function behavior.
pub fn with_lhs_section_termination<T>(self, lhs_section_termination: T) -> Self
where T: Into<Option<crate::syntax::operator::SectionTermination>> {
Self { lhs_section_termination: lhs_section_termination.into(), ..self }
}
/// Return a copy of this operator, modified to be flagged as a type annotation operator.
pub fn as_type_annotation(self) -> Self {
Self { is_type_annotation: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as an assignment operator.
pub fn as_assignment(self) -> Self {
Self { is_assignment: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as an arrow operator.
pub fn as_arrow(self) -> Self {
Self { is_arrow: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as the sequence operator.
pub fn as_sequence(self) -> Self {
Self { is_sequence: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as the annotation operator.
pub fn as_annotation(self) -> Self {
Self { is_annotation: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as the execution-suspension operator.
pub fn as_suspension(self) -> Self {
Self { is_suspension: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as the autoscope operator.
pub fn as_autoscope(self) -> Self {
Self { is_autoscope: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as the dot operator.
pub fn as_dot(self) -> Self {
Self { is_dot: true, ..self }
}
/// Return a copy of this operator, modified to be interpreted as a decimal point.
pub fn as_decimal(self) -> Self {
Self { is_decimal: true, ..self }
}
/// Return this operator's binary infix precedence, if it has one.
pub fn binary_infix_precedence(&self) -> Option<Precedence> {
self.binary_infix_precedence
}
/// Return this operator's unary prefix precedence, if it has one.
pub fn unary_prefix_precedence(&self) -> Option<Precedence> {
self.unary_prefix_precedence
}
/// Return whether this operator can form operator sections.
pub fn can_form_section(&self) -> bool {
!self.is_compile_time_operation
}
/// Return whether this operator is the type annotation operator.
pub fn is_type_annotation(&self) -> bool {
self.is_type_annotation
}
/// Return the LHS operator-section/template-function behavior of this operator.
pub fn lhs_section_termination(&self) -> Option<crate::syntax::operator::SectionTermination> {
self.lhs_section_termination
}
/// Return whether this operator is illegal outside special uses.
pub fn is_special(&self) -> bool {
self.is_special
}
/// Return whether this operator is the assignment operator.
pub fn is_assignment(&self) -> bool {
self.is_assignment
}
/// Return whether this operator is a modified-assignment operator.
pub fn is_modifier(&self) -> bool {
self.is_modifier
}
/// Return whether this operator is the arrow operator.
pub fn is_arrow(&self) -> bool {
self.is_arrow
}
/// Return whether this operator is the sequence operator.
pub fn is_sequence(&self) -> bool {
self.is_sequence
}
/// Return whether this operator is the execution-suspension operator.
pub fn is_suspension(&self) -> bool {
self.is_suspension
}
/// Return whether this operator is the autoscope operator.
pub fn is_autoscope(&self) -> bool {
self.is_autoscope
}
/// Return whether this operator is the annotation operator.
pub fn is_annotation(&self) -> bool {
self.is_annotation
}
/// Return whether this operator is the dot operator.
pub fn is_dot(&self) -> bool {
self.is_dot
}
/// Return whether this operator is the token-joiner operator.
pub fn is_token_joiner(&self) -> bool {
self.is_token_joiner
}
/// Return this operator's associativity.
pub fn associativity(&self) -> Associativity {
match self.is_right_associative {
false => Associativity::Left,
true => Associativity::Right,
}
}
/// Return whether this operator is a decimal point.
pub fn is_decimal(&self) -> bool {
self.is_decimal
}
}
/// Value that can be compared to determine which operator will bind more tightly within an
/// expression.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Reflect, Deserialize, PartialOrd, Ord)]
pub struct Precedence {
/// A numeric value determining precedence order.
value: u32,
}
impl Precedence {
/// Return a precedence that is lower than the precedence of any operator.
pub fn min() -> Self {
Precedence { value: 0 }
}
/// Return the precedence for any operator.
pub fn min_valid() -> Self {
Precedence { value: 1 }
}
/// Return a precedence that is not lower than any other precedence.
pub fn max() -> Self {
Precedence { value: 100 }
}
/// Return the precedence of application.
pub fn application() -> Self {
Precedence { value: 80 }
}
/// Return the precedence of unary minus.
pub fn unary_minus() -> Self {
Precedence { value: 79 }
}
/// Return the precedence of unary minus when applied to a numeric literal.
pub fn unary_minus_numeric_literal() -> Self {
Precedence { value: 80 }
}
}
/// Associativity (left or right).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Associativity {
/// Left-associative.
Left,
/// Right-associative.
Right,
}
// === Numbers ===
/// Alternate numeric bases (decimal is the default).
@ -721,4 +479,5 @@ macro_rules! define_token_type {
}
with_token_definition!(define_token_type());
use crate::syntax::token::operator::is_syntactic_binary_operator;
pub use variant::Variant;

View File

@ -0,0 +1,55 @@
use enso_prelude::*;
use crate::source;
use crate::syntax::token;
use crate::syntax::BlockHierarchyConsumer;
use crate::syntax::Finish;
use crate::syntax::GroupHierarchyConsumer;
use crate::syntax::NewlineConsumer;
use crate::syntax::Token;
use crate::syntax::TokenConsumer;
// =========================
// === Collecting Tokens ===
// =========================
impl<'s> TokenConsumer<'s> for Vec<Token<'s>> {
fn push_token(&mut self, token: Token<'s>) {
self.push(token);
}
}
impl<'s> NewlineConsumer<'s> for Vec<Token<'s>> {
fn push_newline(&mut self, token: token::Newline<'s>) {
self.push(token.into());
}
}
impl<'s> BlockHierarchyConsumer for Vec<Token<'s>> {
fn start_block(&mut self) {
self.push(Token(source::Offset::default(), default(), token::Variant::block_start()));
}
fn end_block(&mut self) {
self.push(Token(source::Offset::default(), default(), token::Variant::block_end()));
}
}
impl<'s> GroupHierarchyConsumer<'s> for Vec<Token<'s>> {
fn start_group(&mut self, open: token::OpenSymbol<'s>) {
self.push(open.into())
}
fn end_group(&mut self, close: token::CloseSymbol<'s>) {
self.push(close.into())
}
}
impl<'s> Finish for Vec<Token<'s>> {
type Result = Vec<Token<'s>>;
fn finish(&mut self) -> Self::Result {
mem::take(self)
}
}

View File

@ -0,0 +1,331 @@
use crate::syntax::token::*;
use crate::lexer::analyze_non_syntactic_operator;
use crate::syntax::operator::SectionTermination;
/// Properties of an operator that are identified when lexing.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
pub struct OperatorProperties {
// Precedence / associativity
binary_infix_precedence: Option<Precedence>,
unary_prefix_precedence: Option<Precedence>,
is_value_operation: bool,
is_right_associative: bool,
// Special properties
lhs_section_termination: Option<SectionTermination>,
is_modifier: bool,
is_compile_time: bool,
rhs_is_non_expression: bool,
}
pub fn is_syntactic_binary_operator(variant: &Variant) -> bool {
use Variant::*;
matches!(
variant,
AssignmentOperator(_) | TypeAnnotationOperator(_) | ArrowOperator(_) | CommaOperator(_)
)
}
impl OperatorProperties {
/// Construct an operator with default properties.
pub fn new() -> Self {
default()
}
/// Construct a value-level operator.
pub fn value() -> Self {
Self { is_value_operation: true, ..default() }
}
/// Construct a functional operator.
pub fn functional() -> Self {
Self { is_value_operation: false, ..default() }
}
/// Return a copy of this operator, with the given binary infix precedence.
pub fn with_binary_infix_precedence(self, value: u8) -> Self {
let precedence = Precedence { value };
debug_assert!(precedence > Precedence::min());
debug_assert!(value & 0x80 == 0);
debug_assert!((value + 1) & 0x80 == 0);
Self { binary_infix_precedence: Some(precedence), ..self }
}
/// Return a copy of this operator, with unary prefix parsing allowed.
pub fn with_unary_prefix_mode(self, precedence: Precedence) -> Self {
debug_assert!(precedence > Precedence::min());
Self { unary_prefix_precedence: Some(precedence), ..self }
}
/// Mark the operator as a value-level operation, as opposed to functional.
pub fn as_value_operation(self) -> Self {
Self { is_value_operation: true, ..self }
}
/// Return whether the operator is a value-level operation, as opposed to functional.
pub fn is_value_operation(&self) -> bool {
self.is_value_operation
}
/// Return a copy of this operator, modified to be flagged as right associative.
pub fn as_right_associative(self) -> Self {
Self { is_right_associative: true, ..self }
}
/// Return a copy of this operator, modified to be flagged as a modified-assignment operator.
pub fn as_modifier(self) -> Self {
Self { is_modifier: true, ..self }
}
/// Return a copy of this operator, modified to have the specified LHS operator-section/
/// template-function behavior.
fn with_lhs_section_termination<T>(self, lhs_section_termination: T) -> Self
where T: Into<Option<SectionTermination>> {
Self { lhs_section_termination: lhs_section_termination.into(), ..self }
}
/// Return this operator's binary infix precedence, if it has one.
pub fn binary_infix_precedence(&self) -> Option<Precedence> {
self.binary_infix_precedence
}
/// Return this operator's unary prefix precedence, if it has one.
pub fn unary_prefix_precedence(&self) -> Option<Precedence> {
self.unary_prefix_precedence
}
/// Return whether this operator can form operator sections.
pub fn can_form_section(&self) -> bool {
!self.is_compile_time
}
/// Return the LHS operator-section/template-function behavior of this operator.
pub fn lhs_section_termination(&self) -> Option<SectionTermination> {
self.lhs_section_termination
}
/// Return whether this operator is a modified-assignment operator.
pub fn is_modifier(&self) -> bool {
self.is_modifier
}
/// Return this operator's associativity.
pub fn associativity(&self) -> Associativity {
match self.is_right_associative {
false => Associativity::Left,
true => Associativity::Right,
}
}
/// Whether the RHS is an expression; if true, the operator may introduce a body block.
pub fn rhs_is_expression(&self) -> bool {
!self.rhs_is_non_expression
}
}
/// Operator-like tokens have operator properties, including normal operators and syntactic
/// operators.
trait HasOperatorProperties {
/// Return the properties of this operator.
fn operator_properties(&self) -> OperatorProperties;
}
/// If a token is operator-like, it has associated properties.
pub trait TokenOperatorProperties {
/// Return a value if this token is operator-like.
fn operator_properties(&self) -> Option<OperatorProperties>;
}
impl<'s, Variant: HasOperatorProperties> HasOperatorProperties for Token<'s, Variant> {
fn operator_properties(&self) -> OperatorProperties {
self.variant.operator_properties()
}
}
impl<'s> TokenOperatorProperties for Token<'s> {
fn operator_properties(&self) -> Option<OperatorProperties> {
Some(match self.variant {
Variant::Operator(_) => analyze_non_syntactic_operator(self.code.repr.0),
Variant::AssignmentOperator(op) => op.operator_properties(),
Variant::TypeAnnotationOperator(op) => op.operator_properties(),
Variant::ArrowOperator(op) => op.operator_properties(),
Variant::AnnotationOperator(op) => op.operator_properties(),
Variant::AutoscopeOperator(op) => op.operator_properties(),
Variant::NegationOperator(op) => op.operator_properties(),
Variant::LambdaOperator(op) => op.operator_properties(),
Variant::DotOperator(op) => op.operator_properties(),
Variant::SuspensionOperator(op) => op.operator_properties(),
Variant::CommaOperator(op) => op.operator_properties(),
_ => return None,
})
}
}
impl HasOperatorProperties for variant::AssignmentOperator {
fn operator_properties(&self) -> OperatorProperties {
OperatorProperties {
binary_infix_precedence: Some(Precedence { value: 1 }),
lhs_section_termination: Some(SectionTermination::Unwrap),
is_right_associative: true,
is_compile_time: true,
..default()
}
}
}
impl HasOperatorProperties for variant::TypeAnnotationOperator {
fn operator_properties(&self) -> OperatorProperties {
OperatorProperties {
binary_infix_precedence: Some(Precedence { value: 2 }),
lhs_section_termination: Some(SectionTermination::Reify),
is_right_associative: true,
is_compile_time: true,
rhs_is_non_expression: true,
..default()
}
}
}
impl HasOperatorProperties for variant::ArrowOperator {
fn operator_properties(&self) -> OperatorProperties {
OperatorProperties {
binary_infix_precedence: Some(Precedence { value: 2 }),
lhs_section_termination: Some(SectionTermination::Unwrap),
is_right_associative: true,
is_compile_time: true,
..default()
}
}
}
impl HasOperatorProperties for variant::AnnotationOperator {
fn operator_properties(&self) -> OperatorProperties {
OperatorProperties {
unary_prefix_precedence: Some(Precedence::max()),
is_right_associative: true,
is_compile_time: true,
rhs_is_non_expression: true,
..default()
}
}
}
impl HasOperatorProperties for variant::AutoscopeOperator {
fn operator_properties(&self) -> OperatorProperties {
OperatorProperties {
unary_prefix_precedence: Some(Precedence::min_valid()),
is_compile_time: true,
rhs_is_non_expression: true,
..default()
}
}
}
impl HasOperatorProperties for variant::NegationOperator {
fn operator_properties(&self) -> OperatorProperties {
OperatorProperties {
is_value_operation: true,
unary_prefix_precedence: Some(Precedence::unary_minus()),
..default()
}
}
}
impl HasOperatorProperties for variant::LambdaOperator {
fn operator_properties(&self) -> OperatorProperties {
OperatorProperties {
unary_prefix_precedence: Some(Precedence::min_valid()),
is_compile_time: true,
..default()
}
}
}
impl HasOperatorProperties for variant::DotOperator {
fn operator_properties(&self) -> OperatorProperties {
OperatorProperties { binary_infix_precedence: Some(Precedence { value: 80 }), ..default() }
}
}
impl HasOperatorProperties for variant::SuspensionOperator {
fn operator_properties(&self) -> OperatorProperties {
OperatorProperties {
unary_prefix_precedence: Some(Precedence::max()),
is_compile_time: true,
rhs_is_non_expression: true,
..default()
}
}
}
impl HasOperatorProperties for variant::CommaOperator {
fn operator_properties(&self) -> OperatorProperties {
OperatorProperties {
binary_infix_precedence: Some(Precedence { value: 1 }),
is_compile_time: true,
rhs_is_non_expression: true,
..default()
}
}
}
/// Value that can be compared to determine which operator will bind more tightly within an
/// expression.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Precedence {
/// A numeric value determining precedence order.
pub(super) value: u8,
}
impl Precedence {
/// Return a precedence that is lower than the precedence of any operator.
pub fn min() -> Self {
Precedence { value: 0 }
}
/// Return the precedence for any operator.
pub fn min_valid() -> Self {
Precedence { value: 1 }
}
/// Return a precedence that is not lower than any other precedence.
pub fn max() -> Self {
Precedence { value: 100 }
}
/// Return the precedence of application.
pub fn application() -> Self {
Precedence { value: 80 }
}
/// Return the precedence of @annotations.
pub fn annotation() -> Self {
Precedence { value: 79 }
}
/// Return the precedence of unary minus.
pub fn unary_minus() -> Self {
Precedence { value: 79 }
}
/// Return the precedence of unary minus when applied to a numeric literal.
pub fn unary_minus_numeric_literal() -> Self {
Precedence { value: 80 }
}
/// Return the value as a number.
pub fn into_u8(self) -> u8 {
self.value
}
}
/// Associativity (left or right).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Associativity {
/// Left-associative.
Left,
/// Right-associative.
Right,
}

View File

@ -5,11 +5,14 @@ use crate::source::*;
use crate::syntax::*;
use crate::span_builder;
use crate::syntax::token::TokenOperatorProperties;
use crate::syntax::treebuilding::Spacing;
#[cfg(feature = "debug")]
use enso_parser_syntax_tree_visitor::Visitor;
// ==============
// === Export ===
// ==============
@ -32,13 +35,13 @@ pub struct Tree<'s> {
#[deref]
#[deref_mut]
#[reflect(subtype)]
pub variant: Box<Variant<'s>>,
pub variant: Variant<'s>,
}
/// Constructor.
#[allow(non_snake_case)]
pub fn Tree<'s>(span: Span<'s>, variant: impl Into<Variant<'s>>) -> Tree<'s> {
let variant = Box::new(variant.into());
let variant = variant.into();
Tree { variant, span, warnings: default() }
}
@ -51,7 +54,7 @@ impl<'s> AsRef<Span<'s>> for Tree<'s> {
impl<'s> Default for Tree<'s> {
fn default() -> Self {
Self {
variant: Box::new(Variant::Ident(Ident { token: Default::default() })),
variant: Variant::Ident(Box::new(Ident { token: Default::default() })),
span: Span::empty_without_offset(),
warnings: default(),
}
@ -63,10 +66,9 @@ impl<'s> Default for Tree<'s> {
#[macro_export]
macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)*
/// [`Tree`] variants definition. See its docs to learn more.
#[tagged_enum]
#[tagged_enum(boxed)]
#[cfg_attr(feature = "debug", derive(Visitor))]
#[derive(Clone, Eq, PartialEq, Serialize, Reflect, Deserialize)]
#[allow(clippy::large_enum_variant)] // Inefficient. Will be fixed in #182878443.
#[tagged_enum(apply_attributes_to = "variants")]
#[reflect(inline)]
pub enum Variant<'s> {
@ -144,7 +146,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
pub func: Tree<'s>,
pub open: Option<token::OpenSymbol<'s>>,
pub name: token::Ident<'s>,
pub equals: token::Operator<'s>,
pub equals: token::AssignmentOperator<'s>,
pub arg: Tree<'s>,
pub close: Option<token::CloseSymbol<'s>>,
},
@ -159,12 +161,12 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
/// Application of a unary operator, like `-a` or `~handler`. It is a syntax error for `rhs`
/// to be `None`.
UnaryOprApp {
pub opr: token::Operator<'s>,
pub opr: token::UnaryOperator<'s>,
pub rhs: Option<Tree<'s>>,
},
/// Application of the autoscope operator to an identifier, e.g. `..True`.
AutoscopedIdentifier {
pub opr: token::Operator<'s>,
pub opr: token::AutoscopeOperator<'s>,
pub ident: token::Ident<'s>,
},
/// Defines the point where operator sections should be expanded to lambdas. Let's consider
@ -196,7 +198,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
/// - Type constructors definitions.
/// - Bindings, defining either methods or type methods.
TypeDef {
pub keyword: token::Ident<'s>,
pub keyword: token::TypeKeyword<'s>,
pub name: token::Ident<'s>,
pub params: Vec<ArgumentDefinition<'s>>,
pub body: Vec<block::Line<'s>>,
@ -206,7 +208,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
/// The pattern which should be unified with the expression.
pub pattern: Tree<'s>,
/// The `=` token.
pub equals: token::Operator<'s>,
pub equals: token::AssignmentOperator<'s>,
/// The expression initializing the value(s) in the pattern.
pub expr: Tree<'s>,
},
@ -219,7 +221,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
/// An optional specification of return type, like `-> Integer`.
pub returns: Option<ReturnSpecification<'s>>,
/// The `=` token.
pub equals: token::Operator<'s>,
pub equals: token::AssignmentOperator<'s>,
/// The body, which will typically be an inline expression or a `BodyBlock` expression.
/// It is an error for this to be empty.
pub body: Option<Tree<'s>>,
@ -227,7 +229,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
/// A foreign function definition.
ForeignFunction {
/// The `foreign` keyword.
pub foreign: token::Ident<'s>,
pub foreign: token::ForeignKeyword<'s>,
/// The function's language.
pub language: token::Ident<'s>,
/// The name to which the function should be bound.
@ -235,7 +237,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
/// The argument patterns.
pub args: Vec<ArgumentDefinition<'s>>,
/// The `=` token.
pub equals: token::Operator<'s>,
pub equals: token::AssignmentOperator<'s>,
/// The body, which is source code for the specified language.
pub body: Tree<'s>,
},
@ -244,7 +246,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
pub polyglot: Option<MultiSegmentAppSegment<'s>>,
pub from: Option<MultiSegmentAppSegment<'s>>,
pub import: MultiSegmentAppSegment<'s>,
pub all: Option<token::Ident<'s>>,
pub all: Option<token::AllKeyword<'s>>,
#[reflect(rename = "as")]
pub as_: Option<MultiSegmentAppSegment<'s>>,
pub hiding: Option<MultiSegmentAppSegment<'s>>,
@ -253,10 +255,8 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
Export {
pub from: Option<MultiSegmentAppSegment<'s>>,
pub export: MultiSegmentAppSegment<'s>,
pub all: Option<token::Ident<'s>>,
#[reflect(rename = "as")]
pub as_: Option<MultiSegmentAppSegment<'s>>,
pub hiding: Option<MultiSegmentAppSegment<'s>>,
},
/// An expression grouped by matched parentheses.
Group {
@ -269,7 +269,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
/// (Qualified) name of the item whose type is being declared.
pub variable: Tree<'s>,
/// The `:` token.
pub operator: token::Operator<'s>,
pub operator: token::TypeAnnotationOperator<'s>,
/// The variable's type.
#[reflect(rename = "type")]
pub type_: Tree<'s>,
@ -279,21 +279,21 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
/// The expression whose type is being annotated.
pub expression: Tree<'s>,
/// The `:` token.
pub operator: token::Operator<'s>,
pub operator: token::TypeAnnotationOperator<'s>,
/// The expression's type.
#[reflect(rename = "type")]
pub type_: Tree<'s>,
},
/// A `case _ of` pattern-matching expression.
CaseOf {
pub case: token::Ident<'s>,
pub case: token::CaseKeyword<'s>,
pub expression: Option<Tree<'s>>,
pub of: token::Ident<'s>,
pub of: token::OfKeyword<'s>,
pub cases: Vec<CaseLine<'s>>,
},
/// A lambda expression.
Lambda {
pub operator: token::Operator<'s>,
pub operator: token::LambdaOperator<'s>,
pub arrow: Option<Tree<'s>>,
},
/// An array literal.
@ -316,7 +316,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
/// Table.select_columns : Vector Text | Column_Selector -> Boolean -> Problem_Behavior -> Table
/// ```
Annotated {
pub token: token::Operator<'s>,
pub token: token::AnnotationOperator<'s>,
pub annotation: token::Ident<'s>,
pub argument: Option<Tree<'s>>,
pub newlines: Vec<token::Newline<'s>>,
@ -324,7 +324,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
},
/// An expression preceded by a special built-in annotation, e.g. `@Tail_Call foo 4`.
AnnotatedBuiltin {
pub token: token::Operator<'s>,
pub token: token::AnnotationOperator<'s>,
pub annotation: token::Ident<'s>,
pub newlines: Vec<token::Newline<'s>>,
pub expression: Option<Tree<'s>>,
@ -510,7 +510,7 @@ impl<'s> span::Builder<'s> for DocComment<'s> {
#[allow(missing_docs)]
pub struct FractionalDigits<'s> {
/// The dot operator.
pub dot: token::Operator<'s>,
pub dot: token::DotOperator<'s>,
/// The decimal digits after the dot.
pub digits: token::Digits<'s>,
}
@ -533,7 +533,7 @@ pub struct ArgumentDefinition<'s> {
/// Opening parenthesis (inner).
pub open2: Option<token::OpenSymbol<'s>>,
/// An optional execution-suspension unary operator (~).
pub suspension: Option<token::Operator<'s>>,
pub suspension: Option<token::SuspensionOperator<'s>>,
/// The pattern being bound to an argument.
pub pattern: Tree<'s>,
/// An optional type ascribed to an argument.
@ -565,7 +565,7 @@ impl<'s> span::Builder<'s> for ArgumentDefinition<'s> {
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Reflect, Deserialize)]
pub struct ArgumentDefault<'s> {
/// The `=` token.
pub equals: token::Operator<'s>,
pub equals: token::AssignmentOperator<'s>,
/// The default value.
pub expression: Tree<'s>,
}
@ -581,7 +581,7 @@ impl<'s> span::Builder<'s> for ArgumentDefault<'s> {
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Reflect, Deserialize)]
pub struct ArgumentType<'s> {
/// The `:` token.
pub operator: token::Operator<'s>,
pub operator: token::TypeAnnotationOperator<'s>,
/// The type.
#[reflect(rename = "type")]
pub type_: Tree<'s>,
@ -598,7 +598,7 @@ impl<'s> span::Builder<'s> for ArgumentType<'s> {
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Reflect, Deserialize)]
pub struct ReturnSpecification<'s> {
/// The `->` operator.
pub arrow: token::Operator<'s>,
pub arrow: token::ArrowOperator<'s>,
/// The function's return type.
#[reflect(rename = "type")]
pub r#type: Tree<'s>,
@ -650,7 +650,7 @@ pub struct Case<'s> {
/// The pattern being matched. It is an error for this to be absent.
pub pattern: Option<Tree<'s>>,
/// Token.
pub arrow: Option<token::Operator<'s>>,
pub arrow: Option<token::ArrowOperator<'s>>,
/// The expression associated with the pattern. It is an error for this to be empty.
pub expression: Option<Tree<'s>>,
}
@ -686,7 +686,7 @@ pub type OperatorOrError<'s> = Result<token::Operator<'s>, MultipleOperatorError
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Reflect, Deserialize)]
#[allow(missing_docs)]
pub struct MultipleOperatorError<'s> {
pub operators: NonEmptyVec<token::Operator<'s>>,
pub operators: Box<NonEmptyVec<token::Operator<'s>>>,
}
impl<'s> span::Builder<'s> for MultipleOperatorError<'s> {
@ -799,6 +799,72 @@ enum WarningId {
pub const WARNINGS: [&str; WarningId::NUM_WARNINGS as usize] =
["Spacing is inconsistent with operator precedence"];
#[allow(missing_copy_implementations)] // Future errors may have attached information.
#[derive(Debug)]
#[allow(missing_docs)] // See associated messages defined below.
pub enum SyntaxError {
ArgDefUnexpectedOpInParenClause,
ArgDefSpuriousParens,
ArgDefExpectedPattern,
ExpectedExpression,
ExpectedPattern,
ExpectedQualifiedName,
ExpectedType,
ForeignFnExpectedLanguage,
ForeignFnExpectedName,
ForeignFnExpectedStringBody,
StmtInvalidAssignmentOrMethod,
StmtLhsInvalidOperatorSpacing,
StmtUnexpectedPrivateUsage,
TypeBodyUnexpectedPrivateUsage,
TypeDefExpectedTypeName,
ExprUnexpectedAssignment,
ExprUnclosedParen,
UnexpectedExpressionInTypeBody,
ImportsNoAllInExport,
ImportsNoHidingInExport,
ImportsExpectedNameInExport,
AnnotationOpMustBeAppliedToIdent,
PatternUnexpectedExpression,
PatternUnexpectedDot,
}
impl From<SyntaxError> for Cow<'static, str> {
fn from(error: SyntaxError) -> Self {
use SyntaxError::*;
(match error {
AnnotationOpMustBeAppliedToIdent => "The annotation operator must be applied to an identifier",
ArgDefUnexpectedOpInParenClause => "Unexpected operator in parenthesized argument definition clause",
ArgDefSpuriousParens => "Invalid parentheses in argument definition",
ArgDefExpectedPattern => "Expected identifier or wildcard in argument binding",
ExpectedExpression => "Expected expression",
ExpectedPattern => "Expected pattern",
ExpectedQualifiedName => "Expected qualified name.",
ExpectedType => "Expected type",
ForeignFnExpectedLanguage => "Expected language name in foreign function definition",
ForeignFnExpectedName => "Expected function name in foreign function definition",
ForeignFnExpectedStringBody => "The body of a foreign function must be a text literal",
StmtInvalidAssignmentOrMethod => "Invalid assignment or method definition",
StmtLhsInvalidOperatorSpacing =>
"Each operator on the left side of an assignment operator must be applied to two operands, with the same spacing on each side",
StmtUnexpectedPrivateUsage =>
"In a body block, the `private` keyword can only be applied to a function definition",
TypeBodyUnexpectedPrivateUsage =>
"In a type definition, the `private` keyword can only be applied to a constructor or function definition",
TypeDefExpectedTypeName => "Expected type identifier in type declaration",
ExprUnexpectedAssignment => "Unexpected use of assignment operator in expression",
ExprUnclosedParen => "Unclosed parenthesis in expression",
UnexpectedExpressionInTypeBody => "Expression unexpected in type definition",
ImportsExpectedNameInExport => "Expected name following `export` keyword",
ImportsNoAllInExport => "`all` not allowed in `export` statement",
ImportsNoHidingInExport => "`hiding` not allowed in `export` statement",
PatternUnexpectedExpression => "Expression invalid in a pattern",
PatternUnexpectedDot => "In a pattern, the dot operator can only be used in a qualified name",
})
.into()
}
}
// ====================================
@ -810,25 +876,7 @@ pub const WARNINGS: [&str; WarningId::NUM_WARNINGS as usize] =
/// For most input types, this simply constructs an `App`; however, for some operand types
/// application has special semantics.
pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
match (&mut *func.variant, &mut *arg.variant) {
(Variant::Annotated(func_ @ Annotated { argument: None, .. }), _) => {
func.span.code_length += arg.span.length_including_whitespace();
func_.argument = maybe_apply(mem::take(&mut func_.argument), arg).into();
func
}
(Variant::AnnotatedBuiltin(func_), _) => {
func.span.code_length += arg.span.length_including_whitespace();
func_.expression = maybe_apply(mem::take(&mut func_.expression), arg).into();
func
}
(
Variant::OprApp(OprApp { lhs: Some(_), opr: Ok(_), rhs: rhs @ None }),
Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs: None, arguments }),
) => {
func.span.code_length += arg.span.length_including_whitespace();
*rhs = block::body_from_lines(mem::take(arguments)).into();
func
}
match (&mut func.variant, &mut arg.variant) {
(_, Variant::ArgumentBlockApplication(block)) if block.lhs.is_none() => {
let code =
func.span.code_length + arg.span.left_offset.code.length() + arg.span.code_length;
@ -839,7 +887,7 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
first.newline.left_offset += arg_left_offset;
}
block.lhs = Some(func);
arg
return arg;
}
(_, Variant::OperatorBlockApplication(block)) if block.lhs.is_none() => {
let code =
@ -851,30 +899,16 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
first.newline.left_offset += arg_left_offset;
}
block.lhs = Some(func);
arg
return arg;
}
(_, Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }))
if opr.properties.is_assignment()
&& let Variant::Ident(lhs) = &*lhs.variant =>
{
let mut lhs = lhs.token.clone();
lhs.left_offset += arg.span.left_offset;
Tree::named_app(func, None, lhs, opr.clone(), rhs.clone(), None)
}
(_, Variant::Group(Group { open: Some(open), body: Some(body), close: Some(close) }))
if let box Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }) =
&body.variant
&& opr.properties.is_assignment()
&& let Variant::Ident(lhs) = &*lhs.variant =>
{
let mut open = open.clone();
open.left_offset += arg.span.left_offset;
let open = Some(open);
let close = Some(close.clone());
Tree::named_app(func, open, lhs.token.clone(), opr.clone(), rhs.clone(), close)
}
_ => Tree::app(func, arg),
_ => {}
}
let error = match Spacing::of_tree(&arg) {
Spacing::Spaced => None,
Spacing::Unspaced if matches!(arg.variant, Variant::SuspendedDefaultArguments(_)) => None,
Spacing::Unspaced => Some("Space required between terms."),
};
maybe_with_error(Tree::app(func, arg), error)
}
fn maybe_apply<'s>(f: Option<Tree<'s>>, x: Tree<'s>) -> Tree<'s> {
@ -890,142 +924,59 @@ fn maybe_apply<'s>(f: Option<Tree<'s>>, x: Tree<'s>) -> Tree<'s> {
/// `ArgumentBlock`) is reinterpreted as a `BodyBlock` when it appears in the RHS of an operator
/// expression.
pub fn apply_operator<'s>(
mut lhs: Option<Tree<'s>>,
opr: Vec<token::Operator<'s>>,
mut rhs: Option<Tree<'s>>,
lhs: Option<Tree<'s>>,
opr: Vec<Token<'s>>,
rhs: Option<Tree<'s>>,
) -> Tree<'s> {
let opr = match opr.len() {
0 => return apply(lhs.unwrap(), rhs.unwrap()),
0 => unreachable!(),
1 => Ok(opr.into_iter().next().unwrap()),
_ => Err(MultipleOperatorError { operators: NonEmptyVec::try_from(opr).unwrap() }),
};
if let Ok(opr_) = &opr
&& opr_.properties.is_token_joiner()
&& let Some(lhs_) = lhs.as_mut()
&& let Some(rhs_) = rhs.as_mut()
{
return match (&mut *lhs_.variant, &mut *rhs_.variant) {
(
Variant::Number(func_ @ Number { base: _, integer: None, fractional_digits: None }),
Variant::Number(Number { base: None, integer, fractional_digits }),
) => {
func_.integer = mem::take(integer);
func_.fractional_digits = mem::take(fractional_digits);
lhs_.span.code_length += rhs_.span.code_length;
lhs.take().unwrap()
}
_ => {
debug_assert!(false, "Unexpected use of token-joiner operator!");
apply(lhs.take().unwrap(), rhs.take().unwrap())
}
};
}
if let Ok(opr_) = &opr
&& opr_.properties.is_special()
{
let tree = Tree::opr_app(lhs, opr, rhs);
return tree.with_error("Invalid use of special operator.");
}
if let Ok(opr_) = &opr
&& opr_.properties.is_type_annotation()
{
return match (lhs, rhs) {
(Some(lhs), Some(rhs)) => Tree::type_annotated(lhs, opr.unwrap(), rhs),
(lhs, rhs) => {
let invalid = Tree::opr_app(lhs, opr, rhs);
invalid.with_error("`:` operator must be applied to two operands.")
}
};
}
if let Ok(opr_) = &opr
&& !opr_.properties.can_form_section()
&& lhs.is_none()
&& rhs.is_none()
{
let error = format!("Operator `{opr:?}` must be applied to two operands.");
let invalid = Tree::opr_app(lhs, opr, rhs);
return invalid.with_error(error);
}
if let Ok(opr) = &opr
&& opr.properties.is_decimal()
&& let Some(lhs) = lhs.as_mut()
&& let box Variant::Number(lhs_) = &mut lhs.variant
&& lhs_.fractional_digits.is_none()
&& let Some(rhs) = rhs.as_mut()
&& let box Variant::Number(Number {
base: None,
integer: Some(digits),
fractional_digits: None,
}) = &mut rhs.variant
{
let dot = opr.clone();
let digits = digits.clone();
lhs.span.code_length += dot.code.length() + rhs.span.code_length;
lhs_.fractional_digits = Some(FractionalDigits { dot, digits });
return lhs.clone();
}
if let Some(rhs_) = rhs.as_mut() {
if let Variant::ArgumentBlockApplication(block) = &mut *rhs_.variant {
if block.lhs.is_none() {
if let Some(first) = block.arguments.first_mut() {
first.newline.left_offset += rhs_.span.left_offset.take_as_prefix();
}
let ArgumentBlockApplication { lhs: _, arguments } = block;
let arguments = mem::take(arguments);
*rhs_ = block::body_from_lines(arguments);
}
}
}
Tree::opr_app(lhs, opr, rhs)
}
/// Apply a unary operator to an operand.
///
/// For most inputs this will simply construct a `UnaryOprApp`; however, some operators are special.
pub fn apply_unary_operator<'s>(opr: token::Operator<'s>, rhs: Option<Tree<'s>>) -> Tree<'s> {
if opr.properties.is_annotation()
&& let Some(Tree { variant: box Variant::Ident(Ident { token }), .. }) = rhs
{
return match token.is_type {
true => Tree::annotated_builtin(opr, token, vec![], None),
false => Tree::annotated(opr, token, None, vec![], None),
};
}
if opr.properties.is_autoscope()
&& let Some(rhs) = rhs
{
return if let box Variant::Ident(Ident { mut token }) = rhs.variant {
let applied_to_type = token.variant.is_type;
token.left_offset = rhs.span.left_offset;
let autoscope_application = Tree::autoscoped_identifier(opr, token);
return if applied_to_type {
autoscope_application
} else {
autoscope_application.with_error(
"The auto-scope operator may only be applied to a capitalized identifier.",
_ => Err(MultipleOperatorError {
operators: Box::new(
NonEmptyVec::try_from(
opr.into_iter()
.map(|opr| opr.with_variant(token::variant::Operator()))
.collect::<Vec<_>>(),
)
.unwrap(),
),
}),
};
match opr {
Ok(opr) => {
let error = match (&opr.variant, lhs.as_ref().map(|tree| &tree.variant), &rhs) {
(_, Some(Variant::AutoscopedIdentifier(_)), _) if !opr.is_spaced() =>
Some("Space required between term and operator."),
(_, _, None) | (_, None, _) if opr.is_syntactic_binary_operator() =>
Some("Operator must be applied to two operands."),
(
token::Variant::Operator(_)
| token::Variant::DotOperator(_)
| token::Variant::ArrowOperator(_)
| token::Variant::TypeAnnotationOperator(_)
// Old lambda syntax: (a = b) -> a
| token::Variant::AssignmentOperator(_),
_,
_,
) => None,
_ => Some("Invalid use of syntactic operator in expression"),
};
} else {
Tree::unary_opr_app(opr, Some(rhs))
.with_error("The auto-scope operator (..) may only be applied to an identifier.")
};
let tree = match (opr.variant, lhs, rhs) {
(token::Variant::TypeAnnotationOperator(annotation), Some(lhs), Some(rhs)) =>
Tree::type_annotated(lhs, opr.with_variant(annotation), rhs),
(_, lhs, rhs) =>
Tree::opr_app(lhs, Ok(opr.with_variant(token::variant::Operator())), rhs),
};
maybe_with_error(tree, error)
}
_ => Tree::opr_app(lhs, opr.map(|opr| opr.with_variant(token::variant::Operator())), rhs),
}
if !opr.properties.can_form_section() && rhs.is_none() {
let error = format!("Operator `{opr:?}` must be applied to an operand.");
let invalid = Tree::unary_opr_app(opr, rhs);
return invalid.with_error(error);
}
Tree::unary_opr_app(opr, rhs)
}
/// Create an AST node for a token.
pub fn to_ast(token: Token) -> Tree {
match token.variant {
token::Variant::Ident(ident) => token.with_variant(ident).into(),
token::Variant::Digits(number) =>
Tree::number(None, Some(token.with_variant(number)), None),
token::Variant::NumberBase(base) =>
Tree::number(Some(token.with_variant(base)), None, None),
token::Variant::Wildcard(wildcard) => Tree::wildcard(token.with_variant(wildcard), default()),
token::Variant::SuspendedDefaultArguments(t) => Tree::suspended_default_arguments(token.with_variant(t)),
token::Variant::OpenSymbol(s) =>
@ -1039,7 +990,24 @@ pub fn to_ast(token: Token) -> Tree {
| token::Variant::BlockEnd(_)
// This should be unreachable: `Precedence::resolve` doesn't calls `to_ast` for operators.
| token::Variant::Operator(_)
| token::Variant::AssignmentOperator(_)
| token::Variant::TypeAnnotationOperator(_)
| token::Variant::ArrowOperator(_)
| token::Variant::AutoscopeOperator(_)
| token::Variant::UnaryOperator(_)
| token::Variant::NegationOperator(_)
| token::Variant::LambdaOperator(_)
| token::Variant::DotOperator(_)
| token::Variant::SuspensionOperator(_)
| token::Variant::AnnotationOperator(_)
| token::Variant::CommaOperator(_)
// Keywords are handled by macros.
| token::Variant::Private(_)
| token::Variant::TypeKeyword(_)
| token::Variant::ForeignKeyword(_)
| token::Variant::AllKeyword(_)
| token::Variant::CaseKeyword(_)
| token::Variant::OfKeyword(_)
// Handled during compound-token assembly.
| token::Variant::TextStart(_)
| token::Variant::TextSection(_)
@ -1047,6 +1015,8 @@ pub fn to_ast(token: Token) -> Tree {
| token::Variant::TextEnd(_)
| token::Variant::TextInitialNewline(_)
| token::Variant::TextNewline(_)
| token::Variant::Digits(_)
| token::Variant::NumberBase(_)
// Map an error case in the lexer to an error in the AST.
| token::Variant::Invalid(_) => {
let message = format!("Unexpected token: {token:?}");
@ -1184,6 +1154,7 @@ spanless_leaf_impls!(Cow<'static, str>);
// === ItemVisitable special cases ===
#[cfg(feature = "debug")]
impl<'s, 'a> ItemVisitable<'s, 'a> for Tree<'s> {
fn visit_item<V: ItemVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
@ -1202,6 +1173,13 @@ where &'a Token<'s, T>: Into<token::Ref<'s, 'a>>
}
}
#[cfg(feature = "debug")]
impl<'s, 'a, T: ItemVisitable<'s, 'a>> ItemVisitable<'s, 'a> for Box<T> {
fn visit_item<V: ItemVisitor<'s, 'a>>(&'a self, visitor: &mut V) {
Box::as_ref(self).visit_item(visitor)
}
}
// ==========================
@ -1295,3 +1273,14 @@ impl<'s> Tree<'s> {
self.variant.visit_item(&mut ItemFnVisitor { f });
}
}
// === Helper ===
/// Return the input, or an `Invalid` node with the given error.
pub fn maybe_with_error(tree: Tree, error: Option<impl Into<Cow<'static, str>>>) -> Tree {
match error {
None => tree,
Some(error) => tree.with_error(error.into()),
}
}

View File

@ -2,6 +2,8 @@
use crate::syntax::tree::*;
use crate::syntax::statement::BodyBlockParser;
// =============
@ -46,19 +48,16 @@ impl<'s> span::Builder<'s> for Line<'s> {
// === Body Block ===
// ==================
/// Build a body block from a sequence of lines; this includes:
/// - Reinterpret the input expressions in statement context (i.e. expressions at the top-level of
/// the block that involve the `=` operator will be reinterpreted as function/variable bindings).
/// - Combine sibling lines in case of multi-line statements, such as annotated statements and
/// documented statements.
pub fn body_from_lines<'s>(lines: impl IntoIterator<Item = Line<'s>>) -> Tree<'s> {
use crate::expression_to_statement;
let lines = lines.into_iter().map(|l| l.map_expression(expression_to_statement));
let statements: Vec<_> = compound_lines(lines).collect();
Tree::body_block(statements)
/// Parse the top-level of a module.
pub fn parse_module<'s>(
lines: impl IntoIterator<Item = item::Line<'s>>,
precedence: &mut operator::Precedence<'s>,
) -> Tree<'s> {
BodyBlockParser::default().parse_module(lines, precedence)
}
// === Multi-line expression construction ===
/// Adapts a sequence of lines by combining sibling lines in case of multi-line statements, such as
@ -133,19 +132,19 @@ where I: Iterator<Item = Line<'s>>
/// Representation used to build multi-line statements.
#[derive(Debug)]
enum Prefix<'s> {
Annotation { node: Annotated<'s>, span: Span<'s> },
BuiltinAnnotation { node: AnnotatedBuiltin<'s>, span: Span<'s> },
Documentation { node: Documented<'s>, span: Span<'s> },
Annotation { node: Box<Annotated<'s>>, span: Span<'s> },
BuiltinAnnotation { node: Box<AnnotatedBuiltin<'s>>, span: Span<'s> },
Documentation { node: Box<Documented<'s>>, span: Span<'s> },
}
impl<'s> TryFrom<Tree<'s>> for Prefix<'s> {
type Error = Tree<'s>;
fn try_from(tree: Tree<'s>) -> Result<Self, Self::Error> {
match tree.variant {
box Variant::Annotated(node) => Ok(Prefix::Annotation { node, span: tree.span }),
box Variant::AnnotatedBuiltin(node @ AnnotatedBuiltin { expression: None, .. }) =>
Variant::Annotated(node) => Ok(Prefix::Annotation { node, span: tree.span }),
Variant::AnnotatedBuiltin(node @ box AnnotatedBuiltin { expression: None, .. }) =>
Ok(Prefix::BuiltinAnnotation { node, span: tree.span }),
box Variant::Documented(node) => Ok(Prefix::Documentation { node, span: tree.span }),
Variant::Documented(node) => Ok(Prefix::Documentation { node, span: tree.span }),
_ => Err(tree),
}
}
@ -154,10 +153,10 @@ impl<'s> TryFrom<Tree<'s>> for Prefix<'s> {
impl<'s> Prefix<'s> {
fn push_newline(&mut self, newline: token::Newline<'s>) {
let (newlines, span) = match self {
Prefix::Annotation { node: Annotated { newlines, .. }, span }
| Prefix::BuiltinAnnotation { node: AnnotatedBuiltin { newlines, .. }, span }
Prefix::Annotation { node: box Annotated { newlines, .. }, span }
| Prefix::BuiltinAnnotation { node: box AnnotatedBuiltin { newlines, .. }, span }
| Prefix::Documentation {
node: Documented { documentation: DocComment { newlines, .. }, .. },
node: box Documented { documentation: DocComment { newlines, .. }, .. },
span,
} => (newlines, span),
};
@ -181,14 +180,11 @@ impl<'s> From<Prefix<'s>> for Tree<'s> {
fn from(prefix: Prefix<'s>) -> Self {
match prefix {
Prefix::Annotation { node, span } =>
Tree { variant: Box::new(Variant::Annotated(node)), span, warnings: default() },
Prefix::BuiltinAnnotation { node, span } => Tree {
variant: Box::new(Variant::AnnotatedBuiltin(node)),
span,
warnings: default(),
},
Tree { variant: Variant::Annotated(node), span, warnings: default() },
Prefix::BuiltinAnnotation { node, span } =>
Tree { variant: Variant::AnnotatedBuiltin(node), span, warnings: default() },
Prefix::Documentation { node, span } =>
Tree { variant: Box::new(Variant::Documented(node)), span, warnings: default() },
Tree { variant: Variant::Documented(node), span, warnings: default() },
}
}
}
@ -217,9 +213,10 @@ fn to_operator_block_expression<'s>(
if let Some(b) = items.get(1)
&& b.left_visible_offset().width_in_spaces != 0
&& let Some(Item::Token(a)) = items.first()
&& let token::Variant::Operator(op) = &a.variant
&& let Some(properties) = &a.operator_properties()
&& properties.can_form_section()
{
let operator = Ok(Token(a.left_offset.clone(), a.code.clone(), *op));
let operator = Ok(Token(a.left_offset.clone(), a.code.clone(), token::variant::Operator()));
let mut items = items.into_iter();
items.next();
let expression = precedence.resolve(items).unwrap();
@ -266,12 +263,7 @@ impl<'s> span::Builder<'s> for OperatorLine<'s> {
// === Block Builder ===
// =====================
/// Builds an AST block type from a sequence of lines.
///
/// Note that the block type is not fully determined at this stage: We apply context information
/// later (see `apply_operator`) to distinguish the two non-operator block types, `BodyBlock` and
/// `ArgumentBlockApplication`. Here we treat every non-operator block as an argument block,
/// because creating a body block involves re-interpreting the expressions in statement context.
/// Builds an argument block or operator block from a sequence of lines.
///
/// The implementation is a state machine. The only top-level transitions are:
/// - `Indeterminate` -> `Operator`
@ -279,61 +271,36 @@ impl<'s> span::Builder<'s> for OperatorLine<'s> {
///
/// The `Operator` state has two substates, and one possible transition:
/// - `body_lines is empty` -> `body_lines is not empty`
#[derive(Debug)]
pub enum Builder<'s> {
#[derive(Debug, Default)]
pub struct Builder<'s> {
state: State,
empty_lines: Vec<token::Newline<'s>>,
operator_lines: Vec<OperatorLine<'s>>,
body_lines: Vec<Line<'s>>,
}
#[derive(Debug, Default)]
enum State {
/// The builder is in an indeterminate state until a non-empty line has been encountered, which
/// would distinguish an operator-block from a non-operator block.
Indeterminate {
/// The `Newline` token introducing the block, and `Newline` tokens for any empty lines
/// that have been encountered.
empty_lines: Vec<token::Newline<'s>>,
},
// `empty_lines` contains the `Newline` token introducing the block, and `Newline` tokens for
// any empty lines that have been encountered.
#[default]
Indeterminate,
/// Building an operator block. If any line doesn't fit the operator-block syntax, that line
/// and all following will be placed in `body_lines`.
Operator {
/// Valid operator-block expressions.
operator_lines: Vec<OperatorLine<'s>>,
/// Any lines violating the expected operator-block syntax.
body_lines: Vec<Line<'s>>,
},
/// Building a non-operator block (either a body block or an argument block).
NonOperator {
/// The block content.
body_lines: Vec<Line<'s>>,
},
// `operator_lines` contains valid operator-block expressions.
// `body_lines` contains any lines violating the expected operator-block syntax.
Operator,
/// Building an argument block.
// `body_lines` contains the block content.
Argument,
}
impl<'s> Builder<'s> {
/// Create a new instance, in initial state.
pub fn new() -> Self {
Self::Indeterminate { empty_lines: default() }
}
/// Create a new instance, in a state appropriate for the given expression.
fn new_with_expression(
empty_lines: impl IntoIterator<Item = token::Newline<'s>>,
newline: token::Newline<'s>,
items: Vec<Item<'s>>,
precedence: &mut operator::Precedence<'s>,
) -> Self {
let empty_lines = empty_lines.into_iter();
let new_lines = 1;
match to_operator_block_expression(items, precedence) {
Ok(expression) => {
let expression = Some(expression);
let mut operator_lines = Vec::with_capacity(empty_lines.size_hint().0 + new_lines);
operator_lines.extend(empty_lines.map(OperatorLine::from));
operator_lines.push(OperatorLine { newline, expression });
Self::Operator { operator_lines, body_lines: default() }
}
Err(expression) => {
let expression = Some(expression);
let mut body_lines = Vec::with_capacity(empty_lines.size_hint().0 + new_lines);
body_lines.extend(empty_lines.map(Line::from));
body_lines.push(Line { newline, expression });
Self::NonOperator { body_lines }
}
}
Self::default()
}
/// Apply a new line to the state.
@ -343,48 +310,52 @@ impl<'s> Builder<'s> {
items: Vec<Item<'s>>,
precedence: &mut operator::Precedence<'s>,
) {
match self {
Builder::Indeterminate { empty_lines } if items.is_empty() => empty_lines.push(newline),
Builder::Indeterminate { empty_lines } =>
*self = Self::new_with_expression(empty_lines.drain(..), newline, items, precedence),
Builder::NonOperator { body_lines, .. } =>
body_lines.push(Line { newline, expression: precedence.resolve(items) }),
Builder::Operator { body_lines, .. } if !body_lines.is_empty() => {
body_lines.push(Line { newline, expression: precedence.resolve(items) });
}
Builder::Operator { operator_lines, body_lines, .. } if !items.is_empty() =>
match to_operator_block_expression(items, precedence) {
match &mut self.state {
State::Indeterminate if items.is_empty() => self.empty_lines.push(newline),
State::Indeterminate => {
self.state = match to_operator_block_expression(items, precedence) {
Ok(expression) => {
let expression = Some(expression);
operator_lines.push(OperatorLine { newline, expression });
self.operator_lines
.push(OperatorLine { newline, expression: Some(expression) });
State::Operator
}
Err(expression) => {
let expression = Some(expression);
body_lines.push(Line { newline, expression })
self.body_lines.push(Line { newline, expression: Some(expression) });
State::Argument
}
},
Builder::Operator { operator_lines, .. } => operator_lines.push(newline.into()),
};
}
State::Argument =>
self.body_lines.push(Line { newline, expression: precedence.resolve(items) }),
State::Operator if !self.body_lines.is_empty() =>
self.body_lines.push(Line { newline, expression: precedence.resolve(items) }),
State::Operator if items.is_empty() => self.operator_lines.push(newline.into()),
State::Operator => match to_operator_block_expression(items, precedence) {
Ok(expression) =>
self.operator_lines.push(OperatorLine { newline, expression: Some(expression) }),
Err(expression) =>
self.body_lines.push(Line { newline, expression: Some(expression) }),
},
}
}
/// Produce an AST node from the state.
pub fn build(self) -> Tree<'s> {
match self {
Builder::Indeterminate { empty_lines } => {
let empty_lines = empty_lines.into_iter();
let lines = empty_lines.map(Line::from).collect();
Tree::argument_block_application(None, lines)
pub fn build(&mut self) -> Tree<'s> {
match self.state {
State::Operator => {
let mut operator_lines =
Vec::with_capacity(self.empty_lines.len() + self.operator_lines.len());
operator_lines.extend(self.empty_lines.drain(..).map(OperatorLine::from));
operator_lines.append(&mut self.operator_lines);
Tree::operator_block_application(None, operator_lines, self.body_lines.split_off(0))
}
State::Argument | State::Indeterminate => {
let mut body_lines =
Vec::with_capacity(self.empty_lines.len() + self.body_lines.len());
body_lines.extend(self.empty_lines.drain(..).map(Line::from));
body_lines.append(&mut self.body_lines);
Tree::argument_block_application(None, body_lines)
}
Builder::Operator { operator_lines, body_lines } =>
Tree::operator_block_application(None, operator_lines, body_lines),
Builder::NonOperator { body_lines } =>
Tree::argument_block_application(None, body_lines),
}
}
}
impl<'s> Default for Builder<'s> {
fn default() -> Self {
Self::new()
}
}

View File

@ -1,3 +1,4 @@
use crate::syntax::tree;
use crate::syntax::Token;
use crate::syntax::Tree;
@ -5,7 +6,7 @@ use crate::syntax::Tree;
mod block;
mod compound_token;
mod consumer;
mod numbers;
mod whitespace;
@ -14,21 +15,21 @@ mod whitespace;
// ===============
pub use block::FlattenBlockTrees;
pub use compound_token::AssembleCompoundTokens;
pub use consumer::Finish;
pub use consumer::ItemConsumer;
pub use consumer::TreeConsumer;
pub use compound_token::CompoundTokens;
pub use numbers::ParseNumbers;
pub use whitespace::PeekSpacing;
pub use whitespace::Spacing;
pub use whitespace::SpacingLookaheadTokenConsumer;
pub use whitespace::SpacingLookaheadTreeConsumer;
// ===================
// === TokenOrTree ===
// ===================
#[allow(missing_docs)]
#[derive(Debug)]
enum TokenOrTree<'s> {
pub enum TokenOrTree<'s> {
Token(Token<'s>),
Tree(Tree<'s>),
}
@ -44,3 +45,12 @@ impl<'s> From<Tree<'s>> for TokenOrTree<'s> {
TokenOrTree::Tree(tree)
}
}
impl<'s> From<TokenOrTree<'s>> for Tree<'s> {
fn from(t: TokenOrTree<'s>) -> Self {
match t {
TokenOrTree::Token(token) => tree::to_ast(token),
TokenOrTree::Tree(tree) => tree,
}
}
}

View File

@ -1,9 +1,15 @@
use crate::syntax;
use crate::prelude::*;
use crate::syntax::consumer::Finish;
use crate::syntax::consumer::ItemConsumer;
use crate::syntax::consumer::TokenConsumer;
use crate::syntax::consumer::TreeConsumer;
use crate::syntax::item;
use crate::syntax::operator;
use crate::syntax::treebuilding::consumer::Finish;
use crate::syntax::treebuilding::consumer::ItemConsumer;
use crate::syntax::treebuilding::consumer::TokenConsumer;
use crate::syntax::treebuilding::consumer::TreeConsumer;
use crate::syntax::statement::BodyBlockParser;
use crate::syntax::token::TokenOperatorProperties;
use crate::syntax::tree::block;
use crate::syntax::GroupHierarchyConsumer;
use crate::syntax::Item;
@ -15,30 +21,95 @@ use crate::syntax::Item;
/// Consumes `Item`s and passes their content to a token/tree consumer, using an
/// [`operator::Precedence`] parser to flatten blocks.
#[derive(Debug, Default)]
pub struct FlattenBlockTrees<'s, T> {
inner: T,
pub struct FlattenBlockTrees<'s, Inner> {
/// Consumes child blocks. Stores no semantic state, but is reused for performance.
child: Option<Box<operator::Precedence<'s>>>,
child: Option<Box<operator::Precedence<'s>>>,
block_context: BlockContext,
block_builder: block::Builder<'s>,
block_parser: BodyBlockParser<'s>,
inner: Inner,
}
impl<'s, T: TokenConsumer<'s> + TreeConsumer<'s>> ItemConsumer<'s> for FlattenBlockTrees<'s, T> {
#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)]
enum BlockContext {
#[default]
Body,
ArgumentOrOperator,
}
impl<'s, Inner> ItemConsumer<'s> for FlattenBlockTrees<'s, Inner>
where Inner: TokenConsumer<'s> + TreeConsumer<'s> + GroupHierarchyConsumer<'s>
{
fn push_item(&mut self, item: Item<'s>) {
match item {
self.block_context = match item {
Item::Block(lines) => {
let mut child = self.child.take().unwrap_or_default();
self.inner.push_tree(syntax::item::build_block(lines, &mut child));
self.inner.push_tree(match self.block_context {
BlockContext::Body =>
self.block_parser.parse_body_block(lines.into_vec(), &mut child),
BlockContext::ArgumentOrOperator => {
for item::Line { newline, items } in lines.into_vec() {
self.block_builder.push(newline, items, &mut child);
}
self.block_builder.build()
}
});
self.child = Some(child);
BlockContext::ArgumentOrOperator
}
Item::Token(token) => self.inner.push_token(token),
Item::Tree(tree) => self.inner.push_tree(tree),
}
Item::Token(token) => {
let properties = token.operator_properties();
self.inner.push_token(token);
match properties {
Some(properties) if properties.rhs_is_expression() => BlockContext::Body,
_ => BlockContext::ArgumentOrOperator,
}
}
Item::Tree(tree) => {
self.inner.push_tree(tree);
BlockContext::ArgumentOrOperator
}
Item::Group(item::Group { open, body, mut close }) => {
self.inner.start_group(open);
let mut stack = vec![];
let mut body = body.into_vec().into_iter();
loop {
while let Some(item) = body.next() {
match item {
Item::Token(token) => self.inner.push_token(token),
Item::Tree(tree) => self.inner.push_tree(tree),
Item::Group(group) => {
self.inner.start_group(group.open);
let outer_body =
mem::replace(&mut body, group.body.into_vec().into_iter());
let outer_close = mem::replace(&mut close, group.close);
stack.push((outer_body, outer_close));
continue;
}
Item::Block(_) => unreachable!(),
}
}
if let Some(close) = close {
self.inner.end_group(close);
}
if let Some((outer_body, outer_close)) = stack.pop() {
body = outer_body;
close = outer_close;
} else {
break;
}
}
BlockContext::ArgumentOrOperator
}
};
}
}
impl<'s, T: Finish> Finish for FlattenBlockTrees<'s, T> {
type Result = T::Result;
impl<'s, Inner: Finish> Finish for FlattenBlockTrees<'s, Inner> {
type Result = Inner::Result;
fn finish(&mut self) -> Self::Result {
self.block_context = default();
self.inner.finish()
}
}

View File

@ -1,11 +1,14 @@
use enso_prelude::*;
use crate::syntax;
use crate::syntax::consumer::Finish;
use crate::syntax::consumer::TokenConsumer;
use crate::syntax::consumer::TreeConsumer;
use crate::syntax::maybe_with_error;
use crate::syntax::token;
use crate::syntax::treebuilding::consumer::Finish;
use crate::syntax::treebuilding::consumer::TokenConsumer;
use crate::syntax::treebuilding::consumer::TreeConsumer;
use crate::syntax::GroupHierarchyConsumer;
use crate::syntax::Token;
use crate::syntax::Tree;
@ -15,88 +18,50 @@ use crate::syntax::Token;
/// Recognizes lexical tokens that are indivisible, and assembles them into trees.
#[derive(Default, Debug)]
pub struct AssembleCompoundTokens<'s, T> {
pub struct CompoundTokens<'s, Inner> {
compounding: Option<CompoundToken<'s>>,
inner: T,
inner: Inner,
}
#[derive(Debug)]
enum CompoundToken<'s> {
TextLiteral(TextLiteralBuilder<'s>),
}
impl<'s, T: TreeConsumer<'s> + TokenConsumer<'s>> TokenConsumer<'s>
for AssembleCompoundTokens<'s, T>
{
fn push_token(&mut self, token: Token<'s>) {
match (&mut self.compounding, token.variant) {
(this @ None, token::Variant::TextStart(variant)) => {
let token = token.with_variant(variant);
*this = Some(CompoundToken::TextLiteral(TextLiteralBuilder {
open: token,
newline: default(),
elements: default(),
}));
impl<'s, Inner: TreeConsumer<'s> + TokenConsumer<'s>> CompoundTokens<'s, Inner> {
fn try_start(&mut self, token: Token<'s>) {
match CompoundToken::start(token) {
StartStep::Start(compounding) => self.compounding = Some(compounding),
StartStep::RejectButStart(compounding, token) => {
self.inner.push_token(token);
self.compounding = Some(compounding)
}
(
Some(CompoundToken::TextLiteral(TextLiteralBuilder {
newline: newline @ None,
..
})),
token::Variant::TextInitialNewline(_),
) => {
let token = token::newline(token.left_offset, token.code);
*newline = Some(token);
}
(
Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })),
token::Variant::TextSection(variant),
) => {
let token = token.with_variant(variant);
let element = syntax::tree::TextElement::Section { text: token };
elements.push(element);
}
(
Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })),
token::Variant::TextEscape(variant),
) => {
let token = token.with_variant(variant);
let element = syntax::tree::TextElement::Escape { token };
elements.push(element);
}
(
Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })),
token::Variant::TextNewline(_),
) => {
let token = token::newline(token.left_offset, token.code);
let element = syntax::tree::TextElement::Newline { newline: token };
elements.push(element);
}
(this @ Some(CompoundToken::TextLiteral(_)), token::Variant::TextEnd(variant)) => {
let builder = match mem::take(this) {
Some(CompoundToken::TextLiteral(builder)) => builder,
_ => unreachable!(),
};
let close = token.with_variant(variant);
self.inner.push_tree(builder.finish(Some(close)));
}
(_, token::Variant::TextStart(_)) => unreachable!(),
(_, token::Variant::TextInitialNewline(_)) => unreachable!(),
(_, token::Variant::TextSection(_)) => unreachable!(),
(_, token::Variant::TextEscape(_)) => unreachable!(),
(_, token::Variant::TextNewline(_)) => unreachable!(),
(_, token::Variant::TextEnd(_)) => unreachable!(),
_ => self.inner.push_token(token),
StartStep::Reject(token) => self.inner.push_token(token),
}
}
}
impl<'s, T: TreeConsumer<'s>> TreeConsumer<'s> for AssembleCompoundTokens<'s, T> {
fn push_tree(&mut self, mut tree: syntax::Tree<'s>) {
impl<'s, Inner: TreeConsumer<'s> + TokenConsumer<'s>> TokenConsumer<'s>
for CompoundTokens<'s, Inner>
{
fn push_token(&mut self, token: Token<'s>) {
if let Some(compounding) = self.compounding.take() {
match compounding.step(token) {
Step::Complete(tree) => self.inner.push_tree(tree),
Step::Accept(compounding) => self.compounding = Some(compounding),
Step::Reject(tree, token) => {
self.inner.push_tree(tree);
self.try_start(token);
}
Step::Return(token) => self.inner.push_token(token),
}
} else {
self.try_start(token);
}
}
}
impl<'s, Inner: TreeConsumer<'s>> TreeConsumer<'s> for CompoundTokens<'s, Inner> {
fn push_tree(&mut self, mut tree: Tree<'s>) {
match (&mut self.compounding, &mut tree.variant) {
(
Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })),
box syntax::tree::Variant::TextLiteral(syntax::tree::TextLiteral {
syntax::tree::Variant::TextLiteral(box syntax::tree::TextLiteral {
open: None,
newline: None,
elements: rhs_elements,
@ -118,16 +83,16 @@ impl<'s, T: TreeConsumer<'s>> TreeConsumer<'s> for AssembleCompoundTokens<'s, T>
}
}
impl<'s, T: TreeConsumer<'s>> AssembleCompoundTokens<'s, T> {
impl<'s, Inner: TreeConsumer<'s>> CompoundTokens<'s, Inner> {
fn flush(&mut self) {
if let Some(CompoundToken::TextLiteral(builder)) = mem::take(&mut self.compounding) {
self.inner.push_tree(builder.finish(None))
if let Some(tree) = self.compounding.take().and_then(|builder| builder.flush()) {
self.inner.push_tree(tree);
}
}
}
impl<'s, T: TreeConsumer<'s> + Finish> Finish for AssembleCompoundTokens<'s, T> {
type Result = T::Result;
impl<'s, Inner: TreeConsumer<'s> + Finish> Finish for CompoundTokens<'s, Inner> {
type Result = Inner::Result;
fn finish(&mut self) -> Self::Result {
self.flush();
@ -135,8 +100,112 @@ impl<'s, T: TreeConsumer<'s> + Finish> Finish for AssembleCompoundTokens<'s, T>
}
}
impl<'s, Inner> GroupHierarchyConsumer<'s> for CompoundTokens<'s, Inner>
where Inner: TreeConsumer<'s> + GroupHierarchyConsumer<'s>
{
fn start_group(&mut self, open: token::OpenSymbol<'s>) {
self.flush();
self.inner.start_group(open);
}
// === Text literal builder ===
fn end_group(&mut self, close: token::CloseSymbol<'s>) {
self.flush();
self.inner.end_group(close);
}
}
// ==============================
// === Compound token builder ===
// ==============================
trait CompoundTokenBuilder<'s>: Sized {
fn start(token: Token<'s>) -> StartStep<Self, Token<'s>>;
fn step(self, token: Token<'s>) -> Step<Self, Token<'s>, Tree<'s>>;
fn flush(self) -> Option<Tree<'s>>;
}
enum StartStep<State, Input> {
Start(State),
RejectButStart(State, Input),
Reject(Input),
}
enum Step<State, Input, Output> {
Accept(State),
Reject(Output, Input),
Complete(Output),
Return(Input),
}
impl<State, Input, Output> Step<State, Input, Output> {
fn map_state<State2>(self, f: impl FnOnce(State) -> State2) -> Step<State2, Input, Output> {
match self {
Step::Accept(state) => Step::Accept(f(state)),
Step::Reject(input, output) => Step::Reject(input, output),
Step::Complete(output) => Step::Complete(output),
Step::Return(input) => Step::Return(input),
}
}
}
#[derive(Debug)]
enum CompoundToken<'s> {
TextLiteral(TextLiteralBuilder<'s>),
OperatorIdentifier(OperatorIdentifierBuilder),
Autoscope(AutoscopeBuilder<'s>),
}
impl<State, Input> StartStep<State, Input> {
fn map_state<State1>(self, f: impl FnOnce(State) -> State1) -> StartStep<State1, Input> {
match self {
StartStep::Start(state) => StartStep::Start(f(state)),
StartStep::RejectButStart(state, input) => StartStep::RejectButStart(f(state), input),
StartStep::Reject(input) => StartStep::Reject(input),
}
}
fn or_else(self, f: impl FnOnce(Input) -> StartStep<State, Input>) -> StartStep<State, Input> {
match self {
StartStep::Start(state) => StartStep::Start(state),
StartStep::RejectButStart(state, input) => StartStep::RejectButStart(state, input),
StartStep::Reject(input) => f(input),
}
}
}
impl<'s> CompoundTokenBuilder<'s> for CompoundToken<'s> {
fn start(token: Token<'s>) -> StartStep<Self, Token<'s>> {
use CompoundToken::*;
StartStep::Reject(token)
.or_else(|token| TextLiteralBuilder::start(token).map_state(TextLiteral))
.or_else(|token| OperatorIdentifierBuilder::start(token).map_state(OperatorIdentifier))
.or_else(|token| AutoscopeBuilder::start(token).map_state(Autoscope))
}
fn step(self, token: Token<'s>) -> Step<Self, Token<'s>, Tree<'s>> {
use CompoundToken::*;
match self {
TextLiteral(builder) => builder.step(token).map_state(TextLiteral),
OperatorIdentifier(builder) => builder.step(token).map_state(OperatorIdentifier),
Autoscope(builder) => builder.step(token).map_state(Autoscope),
}
}
fn flush(self) -> Option<Tree<'s>> {
use CompoundToken::*;
match self {
TextLiteral(builder) => builder.flush(),
OperatorIdentifier(builder) => builder.flush(),
Autoscope(builder) => builder.flush(),
}
}
}
// =====================
// === Text literals ===
// =====================
#[derive(Debug)]
struct TextLiteralBuilder<'s> {
@ -145,17 +214,158 @@ struct TextLiteralBuilder<'s> {
elements: Vec<syntax::tree::TextElement<'s>>,
}
impl<'s> CompoundTokenBuilder<'s> for TextLiteralBuilder<'s> {
fn start(token: Token<'s>) -> StartStep<Self, Token<'s>> {
match token.variant {
token::Variant::TextStart(variant) => {
let token = token.with_variant(variant);
StartStep::Start(Self { open: token, newline: default(), elements: default() })
}
_ => StartStep::Reject(token),
}
}
fn step(mut self, token: Token<'s>) -> Step<Self, Token<'s>, Tree<'s>> {
match token.variant {
token::Variant::TextInitialNewline(_) => {
let token = token::newline(token.left_offset, token.code);
self.newline = Some(token);
Step::Accept(self)
}
token::Variant::TextSection(variant) => {
let token = token.with_variant(variant);
let element = syntax::tree::TextElement::Section { text: token };
self.elements.push(element);
Step::Accept(self)
}
token::Variant::TextEscape(variant) => {
let token = token.with_variant(variant);
let element = syntax::tree::TextElement::Escape { token };
self.elements.push(element);
Step::Accept(self)
}
token::Variant::TextNewline(_) => {
let token = token::newline(token.left_offset, token.code);
let element = syntax::tree::TextElement::Newline { newline: token };
self.elements.push(element);
Step::Accept(self)
}
token::Variant::TextEnd(variant) => {
let close = token.with_variant(variant);
Step::Complete(self.finish(close))
}
_ => unreachable!(),
}
}
fn flush(self) -> Option<Tree<'s>> {
let Self { open, newline, elements } = self;
Some(Tree::text_literal(Some(open), newline, elements, None))
}
}
impl<'s> TextLiteralBuilder<'s> {
fn finish(self, close: Option<token::TextEnd<'s>>) -> syntax::Tree<'s> {
fn finish(self, close: token::TextEnd<'s>) -> Tree<'s> {
let Self { open, newline, elements } = self;
if open.code.starts_with('#') {
assert_eq!(newline, None);
let doc = syntax::tree::DocComment { open, elements, newlines: default() };
syntax::Tree::documented(doc, default())
Tree::documented(doc, default())
} else {
let close =
close.and_then(|close| if close.code.is_empty() { None } else { Some(close) });
syntax::Tree::text_literal(Some(open), newline, elements, close)
let close = if close.code.is_empty() { None } else { Some(close) };
Tree::text_literal(Some(open), newline, elements, close)
}
}
}
// ============================
// === Operator-identifiers ===
// ============================
#[derive(Debug)]
struct OperatorIdentifierBuilder;
impl<'s> CompoundTokenBuilder<'s> for OperatorIdentifierBuilder {
fn start(token: Token<'s>) -> StartStep<Self, Token<'s>> {
match token.variant {
token::Variant::DotOperator(_) => StartStep::RejectButStart(Self, token),
_ => StartStep::Reject(token),
}
}
fn step(self, token: Token<'s>) -> Step<Self, Token<'s>, Tree<'s>> {
match token.variant {
token::Variant::Operator(_)
| token::Variant::NegationOperator(_)
| token::Variant::UnaryOperator(_)
if token.left_offset.visible.width_in_spaces == 0 =>
Step::Return(token.with_variant(token::Variant::operator_ident().into())),
_ => Step::Return(token),
}
}
fn flush(self) -> Option<Tree<'s>> {
None
}
}
// =================
// === Autoscope ===
// =================
#[derive(Debug)]
struct AutoscopeBuilder<'s> {
operator: token::AutoscopeOperator<'s>,
}
impl<'s> CompoundTokenBuilder<'s> for AutoscopeBuilder<'s> {
fn start(token: Token<'s>) -> StartStep<Self, Token<'s>> {
match token.variant {
token::Variant::AutoscopeOperator(variant) => {
let operator = token.with_variant(variant);
StartStep::Start(Self { operator })
}
_ => StartStep::Reject(token),
}
}
fn step(self, token: Token<'s>) -> Step<Self, Token<'s>, Tree<'s>> {
match token.variant {
token::Variant::Ident(ident) if !token.is_spaced() => {
let Self { operator } = self;
let token = token.with_variant(ident);
let error = (!token.variant.is_type).then_some(
"The auto-scope operator may only be applied to a capitalized identifier.",
);
let autoscope_application = Tree::autoscoped_identifier(operator, token);
Step::Complete(maybe_with_error(autoscope_application, error))
}
_ => Step::Reject(self.into_error(), token),
}
}
fn flush(self) -> Option<Tree<'s>> {
Some(self.into_error())
}
}
impl<'s> AutoscopeBuilder<'s> {
fn into_error(self) -> Tree<'s> {
let Self { operator } = self;
token_to_error(operator, "The autoscope operator must be applied to an identifier.")
}
}
// ===============
// === Helpers ===
// ===============
fn token_to_error<'s>(
token: impl Into<Token<'s>>,
error: impl Into<Cow<'static, str>>,
) -> Tree<'s> {
syntax::tree::to_ast(token.into()).with_error(error)
}

View File

@ -1,23 +0,0 @@
use crate::syntax::Item;
use crate::syntax::Token;
use crate::syntax::Tree;
pub trait ItemConsumer<'s> {
fn push_item(&mut self, tree: Item<'s>);
}
pub trait TreeConsumer<'s> {
fn push_tree(&mut self, tree: Tree<'s>);
}
pub trait TokenConsumer<'s> {
fn push_token(&mut self, token: Token<'s>);
}
pub trait Finish {
type Result;
fn finish(&mut self) -> Self::Result;
}

View File

@ -0,0 +1,178 @@
use crate::prelude::*;
use crate::syntax::token;
use crate::syntax::tree;
use crate::syntax::Finish;
use crate::syntax::GroupHierarchyConsumer;
use crate::syntax::Token;
use crate::syntax::TokenConsumer;
use crate::syntax::Tree;
use crate::syntax::TreeConsumer;
#[derive(Debug, Default)]
pub struct ParseNumbers<'s, Inner> {
state: State<'s>,
inner: Inner,
}
#[derive(Debug, Default)]
struct State<'s> {
prev_item_in_expression: bool,
negation: Option<Token<'s>>,
number: Option<Number<'s>>,
}
#[derive(Debug)]
enum Number<'s> {
Based { base: token::NumberBase<'s> },
Fractional { digits: token::Digits<'s>, dot: Option<token::DotOperator<'s>> },
}
impl<'s, Inner: TokenConsumer<'s> + TreeConsumer<'s>> TokenConsumer<'s>
for ParseNumbers<'s, Inner>
{
fn push_token(&mut self, token: Token<'s>) {
match (token.variant, &mut self.state) {
(token::Variant::Digits(variant), State { number: Some(Number::Based { .. }), .. }) => {
let State { negation, number: Some(Number::Based { base }), .. } =
mem::take(&mut self.state)
else {
unreachable!()
};
self.inner.push_tree(maybe_negated(
negation,
Tree::number(Some(base), Some(token.with_variant(variant)), None),
));
}
(
token::Variant::Digits(variant),
State { number: Some(Number::Fractional { digits: _, dot: Some(_) }), .. },
) if token.left_offset.visible.width_in_spaces == 0 => {
let State {
negation,
number: Some(Number::Fractional { digits, dot: Some(dot) }),
..
} = mem::take(&mut self.state)
else {
unreachable!()
};
self.inner.push_tree(maybe_negated(
negation,
Tree::number(
None,
Some(digits),
Some(tree::FractionalDigits { dot, digits: token.with_variant(variant) }),
),
));
}
(
token::Variant::Operator(_) | token::Variant::NegationOperator(_),
State { prev_item_in_expression, negation, number },
) if (token.is_spaced() || !*prev_item_in_expression) && token.code.repr.0 == "-" => {
if negation.is_some() || number.is_some() {
flush(&mut self.inner, negation, number);
}
self.state.negation = Some(token);
}
(token::Variant::NumberBase(variant), State { negation, number, .. }) => {
if number.is_some() {
flush(&mut self.inner, negation, number);
} else if token.left_offset.visible.width_in_spaces != 0
&& let Some(minus) = negation.take()
{
self.inner.push_token(minus.with_variant(token::Variant::operator()));
}
*number = Some(Number::Based { base: token.with_variant(variant) })
}
(token::Variant::Digits(variant), State { negation, number, .. }) => {
if number.is_some() {
flush(&mut self.inner, negation, number);
} else if token.left_offset.visible.width_in_spaces != 0
&& let Some(minus) = negation.take()
{
self.inner.push_token(minus.with_variant(token::Variant::operator()));
}
*number =
Some(Number::Fractional { digits: token.with_variant(variant), dot: None });
}
(
token::Variant::DotOperator(_),
State { number: Some(Number::Fractional { digits: _, dot: dot @ None }), .. },
) if token.left_offset.visible.width_in_spaces == 0 =>
*dot = Some(token.with_variant(token::variant::DotOperator())),
_ => {
self.flush();
self.inner.push_token(token)
}
}
self.state.prev_item_in_expression = true;
}
}
impl<'s, Inner: TokenConsumer<'s> + TreeConsumer<'s>> TreeConsumer<'s> for ParseNumbers<'s, Inner> {
fn push_tree(&mut self, tree: Tree<'s>) {
self.flush();
self.inner.push_tree(tree);
self.state.prev_item_in_expression = true;
}
}
impl<'s, Inner: TokenConsumer<'s> + TreeConsumer<'s>> ParseNumbers<'s, Inner> {
fn flush(&mut self) {
let State { negation, number, prev_item_in_expression: _ } = &mut self.state;
flush(&mut self.inner, negation, number);
}
}
fn flush<'s, Inner: TokenConsumer<'s> + TreeConsumer<'s>>(
inner: &mut Inner,
negation: &mut Option<Token<'s>>,
number: &mut Option<Number<'s>>,
) {
if let Some(number) = number.take() {
let (number, trailing_token) = match number {
Number::Based { base } => (Tree::number(Some(base), None, None), None),
Number::Fractional { digits, dot } => (Tree::number(None, Some(digits), None), dot),
};
inner.push_tree(maybe_negated(negation.take(), number));
if let Some(trailing_token) = trailing_token {
inner.push_token(trailing_token.into());
}
} else if let Some(minus) = negation.take() {
inner.push_token(minus);
}
}
fn maybe_negated<'s>(minus: Option<Token<'s>>, tree: Tree<'s>) -> Tree<'s> {
match minus {
Some(minus) =>
Tree::unary_opr_app(minus.with_variant(token::variant::UnaryOperator()), Some(tree)),
None => tree,
}
}
impl<'s, Inner: TokenConsumer<'s> + TreeConsumer<'s> + Finish> Finish for ParseNumbers<'s, Inner> {
type Result = Inner::Result;
fn finish(&mut self) -> Self::Result {
self.flush();
self.state.prev_item_in_expression = false;
self.inner.finish()
}
}
impl<'s, Inner> GroupHierarchyConsumer<'s> for ParseNumbers<'s, Inner>
where Inner: TokenConsumer<'s> + TreeConsumer<'s> + GroupHierarchyConsumer<'s>
{
fn start_group(&mut self, open: token::OpenSymbol<'s>) {
self.flush();
self.inner.start_group(open);
}
fn end_group(&mut self, close: token::CloseSymbol<'s>) {
self.flush();
self.inner.end_group(close);
}
}

View File

@ -1,9 +1,11 @@
use crate::syntax::consumer::Finish;
use crate::syntax::consumer::TokenConsumer;
use crate::syntax::consumer::TreeConsumer;
use crate::syntax::token;
use crate::syntax::tree;
use crate::syntax::treebuilding::consumer::Finish;
use crate::syntax::treebuilding::consumer::TokenConsumer;
use crate::syntax::treebuilding::consumer::TreeConsumer;
use crate::syntax::treebuilding::TokenOrTree;
use crate::syntax::GroupHierarchyConsumer;
use crate::syntax::Item;
use crate::syntax::Token;
use crate::syntax::Tree;
@ -35,25 +37,32 @@ impl Spacing {
true => Spacing::Spaced,
}
}
pub fn of_item(item: &Item) -> Self {
match item {
Item::Token(token) => Spacing::of_token(token),
Item::Tree(tree) => Spacing::of_tree(tree),
Item::Group(group) => Spacing::of_token(&group.open),
Item::Block(_) => Spacing::Spaced,
}
}
}
// Returns `true` for an item if that item should not follow any other item in a no-space group
// (i.e. the item has "space" before it).
fn token_starts_new_no_space_group<'a: 'b, 'b, T: Into<token::Ref<'a, 'b>>>(token: T) -> bool {
let token = token.into();
match &token.data {
token::Variant::Operator(opr) if opr.properties.is_sequence() => true,
_ => token.left_offset.visible.width_in_spaces != 0,
}
token.left_offset.visible.width_in_spaces != 0
|| matches!(token.data, token::Variant::CommaOperator(_))
}
fn tree_starts_new_no_space_group(tree: &Tree) -> bool {
tree.span.left_offset.visible.width_in_spaces != 0
|| matches!(
&tree.variant,
box tree::Variant::BodyBlock(_)
| box tree::Variant::OperatorBlockApplication(_)
| box tree::Variant::ArgumentBlockApplication(_)
tree::Variant::BodyBlock(_)
| tree::Variant::OperatorBlockApplication(_)
| tree::Variant::ArgumentBlockApplication(_)
)
}
@ -72,13 +81,13 @@ pub trait SpacingLookaheadTokenConsumer<'s> {
/// Maintains 1-token whitespace lookahead.
#[derive(Debug, Default)]
pub struct PeekSpacing<'s, T> {
pub struct PeekSpacing<'s, Inner> {
current: Option<TokenOrTree<'s>>,
inner: T,
inner: Inner,
}
impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>>
PeekSpacing<'s, T>
impl<'s, Inner: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>>
PeekSpacing<'s, Inner>
{
fn emit(&mut self, tt: Option<TokenOrTree<'s>>, rhs: Option<Spacing>) {
match tt {
@ -87,22 +96,26 @@ impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>
None => {}
}
}
}
impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s> + Finish> Finish
for PeekSpacing<'s, T>
{
type Result = T::Result;
fn finish(&mut self) -> T::Result {
fn flush(&mut self) {
let last = self.current.take();
self.emit(last, None);
}
}
impl<'s, Inner: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s> + Finish>
Finish for PeekSpacing<'s, Inner>
{
type Result = Inner::Result;
fn finish(&mut self) -> Inner::Result {
self.flush();
self.inner.finish()
}
}
impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> TokenConsumer<'s>
for PeekSpacing<'s, T>
impl<'s, Inner: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>>
TokenConsumer<'s> for PeekSpacing<'s, Inner>
{
fn push_token(&mut self, token: Token<'s>) {
let rhs = Spacing::of_token(&token);
@ -111,8 +124,8 @@ impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>
}
}
impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> TreeConsumer<'s>
for PeekSpacing<'s, T>
impl<'s, Inner: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>>
TreeConsumer<'s> for PeekSpacing<'s, Inner>
{
fn push_tree(&mut self, tree: Tree<'s>) {
let rhs = Spacing::of_tree(&tree);
@ -121,8 +134,19 @@ impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>
}
}
impl<'s, T: TreeConsumer<'s>> SpacingLookaheadTreeConsumer<'s> for T {
fn push_tree(&mut self, tree: Tree<'s>, _: Option<Spacing>) {
self.push_tree(tree);
impl<'s, Inner> GroupHierarchyConsumer<'s> for PeekSpacing<'s, Inner>
where Inner: GroupHierarchyConsumer<'s>
+ SpacingLookaheadTreeConsumer<'s>
+ SpacingLookaheadTokenConsumer<'s>
{
fn start_group(&mut self, open: token::OpenSymbol<'s>) {
let prev = self.current.take();
self.emit(prev, Spacing::of_token(&open).into());
self.inner.start_group(open);
}
fn end_group(&mut self, close: token::CloseSymbol<'s>) {
self.flush();
self.inner.end_group(close);
}
}

View File

@ -143,10 +143,13 @@ pub struct ColdVec<T> {
impl<T> ColdVec<T> {
pub fn push(&mut self, element: T) {
if self.elements.is_none() {
self.elements = Some(Default::default());
self.elements_mut().push(element);
}
pub fn append(&mut self, other: &mut Self) {
if let Some(other_elements) = other.elements.as_mut() {
self.elements_mut().append(other_elements)
}
self.elements.as_mut().unwrap().push(element);
}
pub fn iter(&self) -> std::slice::Iter<T> {
@ -155,6 +158,13 @@ impl<T> ColdVec<T> {
None => [].iter(),
}
}
fn elements_mut(&mut self) -> &mut Vec<T> {
if self.elements.is_none() {
self.elements = Some(Default::default());
}
self.elements.as_mut().unwrap()
}
}
impl<T: PartialEq<T>> PartialEq<ColdVec<T>> for ColdVec<T> {

View File

@ -40,7 +40,7 @@ add_specs suite_builder = suite_builder.group "List" group_builder->
Test.expect_panic_with (l.any "invalid arg") Type_Error
group_builder.specify "should allow checking if all elements satisfy a predicate with `.all`" <|
all_even = l.all(x -> x % 2 == 0)
all_even = l.all x-> x % 2 == 0
all_less_than_four = l.all (< 4)
all_even . should_be_false
all_less_than_four . should_be_true

View File

@ -107,60 +107,60 @@ add_specs suite_builder =
(10.div 0).should_fail_with Arithmetic_Error
group_builder.specify "should support integral binary literals" <|
lit = 2_01101101
lit = 0b01101101
lit . should_equal 109
group_builder.specify "should support integral hexadecimal literals" <|
lit = 16_6D
lit = 0x6D
lit . should_equal 109
group_builder.specify "should support bitwise and" <|
left = 2_01101101
right = 2_11000100
big_left = 16_17ffffffffffffffa
big_right = 16_17ffffffffffffffc
left.bit_and right . should_equal 2_01000100
left.bit_and big_right . should_equal 2_01101100
big_left.bit_and right . should_equal 2_11000000
big_left.bit_and big_right . should_equal 16_17ffffffffffffff8
left = 0b01101101
right = 0b11000100
big_left = 0x17ffffffffffffffa
big_right = 0x17ffffffffffffffc
left.bit_and right . should_equal 0b01000100
left.bit_and big_right . should_equal 0b01101100
big_left.bit_and right . should_equal 0b11000000
big_left.bit_and big_right . should_equal 0x17ffffffffffffff8
group_builder.specify "should support bitwise or" <|
left = 2_01101101
right = 2_11000100
big_left = 16_17ffffffffffffffa
big_right = 16_17ffffffffffffffc
left.bit_or right . should_equal 2_11101101
left.bit_or big_right . should_equal 16_17ffffffffffffffd
big_left.bit_or right . should_equal 16_17ffffffffffffffe
big_left.bit_or right . should_equal 16_17ffffffffffffffe
left = 0b01101101
right = 0b11000100
big_left = 0x17ffffffffffffffa
big_right = 0x17ffffffffffffffc
left.bit_or right . should_equal 0b11101101
left.bit_or big_right . should_equal 0x17ffffffffffffffd
big_left.bit_or right . should_equal 0x17ffffffffffffffe
big_left.bit_or right . should_equal 0x17ffffffffffffffe
group_builder.specify "should support bitwise exclusive or" <|
left = 2_01101101
right = 2_11000100
big_left = 16_17ffffffffffffffa
big_right = 16_17ffffffffffffffc
left.bit_xor right . should_equal 2_10101001
left.bit_xor big_right . should_equal 16_17fffffffffffff91
big_left.bit_xor right . should_equal 16_17fffffffffffff3e
big_left.bit_xor big_right . should_equal 2_00000110
left = 0b01101101
right = 0b11000100
big_left = 0x17ffffffffffffffa
big_right = 0x17ffffffffffffffc
left.bit_xor right . should_equal 0b10101001
left.bit_xor big_right . should_equal 0x17fffffffffffff91
big_left.bit_xor right . should_equal 0x17fffffffffffff3e
big_left.bit_xor big_right . should_equal 0b00000110
group_builder.specify "should support bitwise negation" <|
bits = 2_01101101
big_bits = 16_17ffffffffffffffa
bits.bit_not . should_equal -2_01101110
bits = 0b01101101
big_bits = 0x17ffffffffffffffa
bits.bit_not . should_equal -0b01101110
bits.bit_not.bit_not . should_equal bits
big_bits.bit_not . should_equal -16_17ffffffffffffffb
big_bits.bit_not . should_equal -0x17ffffffffffffffb
big_bits.bit_not.bit_not . should_equal big_bits
group_builder.specify "should support left bit shifts" <|
positive_bits = 2_01101101
negative_bits = -2_01101101
positive_bits = 0b01101101
negative_bits = -0b01101101
positive_big_bits = almost_max_long_times_three
negative_big_bits = -almost_max_long_times_three
positive_bits.bit_shift_l 2 . should_equal 2_0110110100
positive_bits.bit_shift_l 64 . should_equal 16_6d0000000000000000
positive_bits.bit_shift_l -2 . should_equal 2_011011
positive_bits.bit_shift_l 2 . should_equal 0b0110110100
positive_bits.bit_shift_l 64 . should_equal 0x6d0000000000000000
positive_bits.bit_shift_l -2 . should_equal 0b011011
positive_bits.bit_shift_l -64 . should_equal 0
(positive_bits.bit_shift_l positive_big_bits).should_fail_with Arithmetic_Error
positive_bits.bit_shift_l negative_big_bits . should_equal 0
@ -187,8 +187,8 @@ add_specs suite_builder =
negative_big_bits.bit_shift_l negative_big_bits . should_equal -1
group_builder.specify "should support right bit shifts, preserving sign" <|
positive_bits = 2_01101101
negative_bits = -2_01101101
positive_bits = 0b01101101
negative_bits = -0b01101101
positive_big_bits = almost_max_long_times_three
negative_big_bits = -almost_max_long_times_three

View File

@ -525,7 +525,7 @@ add_specs suite_builder =
suite_builder.group "caching" group_builder->
group_builder.specify "Replacer cache drops old values" <|
pattern = Regex.compile('([a-c])')
pattern = Regex.compile '([a-c])'
# Add enough values to flush out the first values.
0.up_to get_lru_size+1 . map i->

View File

@ -552,7 +552,7 @@ js_date year month=1 day=1 =
js_array_date year month=1 day=1 =
arr = Panic.catch Any (js_array_dateCreate year month day) (err -> Error.throw (Time_Error.Error err.payload))
js_set_zone arr.at(0)
js_set_zone (arr.at 0)
java_parse date_text pattern=Nothing =
Panic.catch Any handler=(err -> Error.throw (Time_Error.Error err.payload.getMessage)) <|

View File

@ -812,7 +812,7 @@ js_parse text format=Date_Time_Formatter.default_enso_zoned_date_time =
js_array_datetime year month=1 day=1 hour=0 minute=0 second=0 nanosecond=0 zone=Time_Zone.system =
arr = Panic.catch Any (js_array_datetimeCreate year month day hour minute second nanosecond) (err -> Error.throw (Time_Error.Error err.payload))
js_set_zone arr.at(0) zone
js_set_zone (arr.at 0) zone
foreign js js_array_datetimeCreate year month day hour minute second nanosecond = """
if (month > 12 || month < 1) {
@ -852,7 +852,7 @@ java_parse date_text_raw pattern=Nothing =
utc_replaced = date_text_raw.replace "[UTC]" "Z"
date_text = if utc_replaced.ends_with "ZZ" then date_text_raw else utc_replaced
if pattern == Nothing then Panic.catch Any (maybe_parse_java_zoned date_text) (cause -> parse_java_local cause.payload date_text pattern) else
formatter = DateTimeFormatter.ofPattern(pattern)
formatter = DateTimeFormatter.ofPattern pattern
Panic.catch Any (maybe_parse_java_zoned date_text formatter) (cause -> parse_java_local cause.payload date_text pattern)
main filter=Nothing =

View File

@ -439,13 +439,13 @@ add_specs suite_builder = suite_builder.group "Dataflow Warnings" group_builder-
Warning.get_all result_4 . map (x-> x.value.to_text) . should_equal ["Baz!", "Baz!", "Baz!"]
group_builder.specify "should only report the first 100 unique warnings" <|
vec = (0.up_to 500).map(e -> Warning.attach "Foo!" e)
vec_plus_1 = vec.map(e -> e+1)
vec = (0.up_to 500).map e-> Warning.attach "Foo!" e
vec_plus_1 = vec.map e-> e+1
Warning.get_all vec_plus_1 . length . should_equal 100
Warning.limit_reached vec . should_equal True
warn = Warning.attach "Boo!" 42
vec_2 = (0.up_to 500).map(e -> if (e < 30) then Warning.attach "Foo!" e else (warn + e))
vec_2 = (0.up_to 500).map e-> if e<30 then Warning.attach "Foo!" e else warn+e
Warning.get_all vec_2 . length . should_equal 31
Warning.limit_reached vec_2 . should_equal False

View File

@ -55,7 +55,7 @@ type Int_Comparator
compare a:Int b:Int = Ordering.compare a.v b.v
hash i:Int = 3721 + Ordering.hash i.v
Comparable.from(that:Int) = Comparable.new that Int_Comparator
Comparable.from that:Int = Comparable.new that Int_Comparator
# The Benchmarks ==============================================================

View File

@ -198,13 +198,13 @@ add_specs suite_builder detailed setup =
expression_test "#2020-12-23 12:34:56Z[UTC]#" (Date_Time.new 2020 12 23 12 34 56 zone=Time_Zone.utc)
expression_test "#2020-12-23 12:34:56+02:30[UTC]#" (Date_Time.new 2020 12 23 10 04 56 zone=Time_Zone.utc)
expression_test "#2020-12-23 12:34:56.157+01[UTC]#" (Date_Time.new 2020 12 23 11 34 56 157 zone=Time_Zone.utc)
expression_test "#2020-12-23T12:34[Europe/Warsaw]#" (Date_Time.new 2020 12 23 12 34 zone=Time_Zone.parse("Europe/Warsaw"))
expression_test "#2020-12-23T12:34[Europe/Warsaw]#" (Date_Time.new 2020 12 23 12 34 zone=(Time_Zone.parse "Europe/Warsaw"))
group_builder.specify "should correctly handle timezones" pending=pending_datetime <|
## We cannot just test equality as the Database may change the timezone,
so all we can do is check that the values are accepted and can be compared
with other values in the database.
t = table_builder [["X", [Date_Time.new 2020 12 23 12 34 56 zone=Time_Zone.utc, Date_Time.new 2010 1 2 12 34 56 zone=Time_Zone.parse("Europe/Warsaw")]]]
t = table_builder [["X", [Date_Time.new 2020 12 23 12 34 56 zone=Time_Zone.utc, Date_Time.new 2010 1 2 12 34 56 zone=(Time_Zone.parse "Europe/Warsaw")]]]
c1 = t.evaluate_expression "#2020-12-23 12:34:56Z[UTC]# == [X]"
c1.to_vector . should_equal [True, False]
c2 = t.evaluate_expression "#2010-01-02 12:34:56[Europe/Warsaw]# == [X]"