From 4cff789b696e0cc84573b2bbc3129e5c58a40a71 Mon Sep 17 00:00:00 2001 From: Kaz Wesley Date: Thu, 18 Jul 2024 08:48:20 -0700 Subject: [PATCH] Refactor precedence for whitespace changes (#10569) Single-phase whitespace-aware precedence resolution. #### Performance ![newplot(4)](https://github.com/user-attachments/assets/9822b0dc-17c3-4d2d-adf7-eb8b1c240522) Since this is a major refactor of the core of the parser, I benchmarked it; it's about 3% faster. # Important Notes - Move operator-identifier recognition to lexer. - Move compound-token assembly out of precedence resolver --- app/gui2/shared/ast/tree.ts | 16 +- .../org/enso/compiler/ErrorCompilerTest.java | 7 +- lib/rust/parser/debug/tests/parse.rs | 91 ++- lib/rust/parser/src/lexer.rs | 88 ++- lib/rust/parser/src/macros/built_in.rs | 2 +- lib/rust/parser/src/syntax.rs | 4 + lib/rust/parser/src/syntax/operator.rs | 646 ++++-------------- .../parser/src/syntax/operator/application.rs | 79 +++ lib/rust/parser/src/syntax/operator/arity.rs | 213 ++++++ .../parser/src/syntax/operator/operand.rs | 87 +++ .../parser/src/syntax/operator/reducer.rs | 113 +++ lib/rust/parser/src/syntax/operator/types.rs | 73 ++ lib/rust/parser/src/syntax/token.rs | 10 + lib/rust/parser/src/syntax/tree.rs | 55 +- lib/rust/parser/src/syntax/treebuilding.rs | 46 ++ .../parser/src/syntax/treebuilding/block.rs | 44 ++ .../src/syntax/treebuilding/compound_token.rs | 161 +++++ .../src/syntax/treebuilding/consumer.rs | 23 + .../src/syntax/treebuilding/whitespace.rs | 128 ++++ 19 files changed, 1248 insertions(+), 638 deletions(-) create mode 100644 lib/rust/parser/src/syntax/operator/application.rs create mode 100644 lib/rust/parser/src/syntax/operator/arity.rs create mode 100644 lib/rust/parser/src/syntax/operator/operand.rs create mode 100644 lib/rust/parser/src/syntax/operator/reducer.rs create mode 100644 lib/rust/parser/src/syntax/operator/types.rs create mode 100644 lib/rust/parser/src/syntax/treebuilding.rs create mode 100644 lib/rust/parser/src/syntax/treebuilding/block.rs create mode 100644 lib/rust/parser/src/syntax/treebuilding/compound_token.rs create mode 100644 lib/rust/parser/src/syntax/treebuilding/consumer.rs create mode 100644 lib/rust/parser/src/syntax/treebuilding/whitespace.rs diff --git a/app/gui2/shared/ast/tree.ts b/app/gui2/shared/ast/tree.ts index 1f90f1b84c3..429b4c7eb72 100644 --- a/app/gui2/shared/ast/tree.ts +++ b/app/gui2/shared/ast/tree.ts @@ -668,20 +668,12 @@ export class App extends Ast { : ensureSpaced(nameSpecification.name, verbatim) yield ensureSpacedOnlyIf(nameSpecification.equals, spacedEquals, verbatim) } - yield ensureSpacedOnlyIf(argument, !nameSpecification || spacedEquals, verbatim) + // Some syntax trees, including many error conditions, involve unspaced applications. + // If a parsed input lacked a space before the argument, reproduce it as-is. + const verbatimArgument = true + yield ensureSpacedOnlyIf(argument, !nameSpecification || spacedEquals, verbatimArgument) if (useParens) yield preferUnspaced(parens.close) } - - printSubtree( - info: SpanMap, - offset: number, - parentIndent: string | undefined, - verbatim?: boolean, - ): string { - const verbatim_ = - verbatim ?? (this.function instanceof Invalid || this.argument instanceof Invalid) - return super.printSubtree(info, offset, parentIndent, verbatim_) - } } function ensureSpacedOnlyIf( child: NodeChild, diff --git a/engine/runtime-integration-tests/src/test/java/org/enso/compiler/ErrorCompilerTest.java b/engine/runtime-integration-tests/src/test/java/org/enso/compiler/ErrorCompilerTest.java index c3dacedf565..2fd86a4d032 100644 --- a/engine/runtime-integration-tests/src/test/java/org/enso/compiler/ErrorCompilerTest.java +++ b/engine/runtime-integration-tests/src/test/java/org/enso/compiler/ErrorCompilerTest.java @@ -52,12 +52,7 @@ public class ErrorCompilerTest extends CompilerTest { main = Date.new day=- """); - assertSingleSyntaxError( - ir, - new Syntax.UnsupportedSyntax("Strange unary -"), - "Syntax is not supported yet: Strange unary -", - 51, - 52); + assertSingleSyntaxError(ir, Syntax.UnrecognizedToken$.MODULE$, "Unrecognized token", 51, 52); } @Test diff --git a/lib/rust/parser/debug/tests/parse.rs b/lib/rust/parser/debug/tests/parse.rs index b9d7e0489ee..04fa4d63b4c 100644 --- a/lib/rust/parser/debug/tests/parse.rs +++ b/lib/rust/parser/debug/tests/parse.rs @@ -343,6 +343,9 @@ fn type_methods() { "=" (BodyBlock #((Ident self)))))) ]; test(&code.join("\n"), expected); + test!("[foo., bar.]", + (Array (OprSectionBoundary 1 (OprApp (Ident foo) (Ok ".") ())) + #(("," (OprSectionBoundary 1 (OprApp (Ident bar) (Ok ".") ())))))); } #[test] @@ -365,6 +368,22 @@ fn type_operator_methods() { (Function (OprApp (Ident Foo) (Ok ".") (Ident #"+")) #((() (Ident self) () ()) (() (Ident b) () ())) () "=" (Ident b))))]; test(&code.join("\n"), expected); + test!("Any.==", (OprApp (Ident Any) (Ok ".") (Ident #"=="))); + expect_invalid_node("x.-y"); + expect_invalid_node("x.-1"); + expect_invalid_node("x.+y"); + expect_invalid_node("x.+1"); + expect_invalid_node("x.+'a'"); + // Compile-time operators are never operator-identifiers. + test!("x.~y", (OprApp (Ident x) (Ok ".") (UnaryOprApp "~" (Ident y)))); + test!("x.~1", (OprApp (Ident x) (Ok ".") (UnaryOprApp "~" (Number () "1" ())))); +} + +#[test] +fn unspaced_app() { + test!("js_set_zone arr.at(0)", (App (Ident js_set_zone) + (App (OprApp (Ident arr) (Ok ".") (Ident at)) + (Group (Number () "0" ()))))); } #[test] @@ -727,16 +746,13 @@ fn first_line_indented() { #[test] fn multiple_operator_error() { - let code = ["x + + x"]; - let expected = block![ - (OprApp (Ident x) (Err (#("+" "+"))) (Ident x)) - ]; - test(&code.join("\n"), expected); - let code = ["x + + + x"]; - let expected = block![ - (OprApp (Ident x) (Err (#("+" "+" "+"))) (Ident x)) - ]; - test(&code.join("\n"), expected); + expect_multiple_operator_error("x + + x"); + expect_multiple_operator_error("x + + + x"); + expect_multiple_operator_error("x + +"); + expect_multiple_operator_error("+ + x"); + expect_multiple_operator_error("+ +"); + expect_multiple_operator_error("+ -"); + expect_multiple_operator_error("x + -"); } #[test] @@ -779,12 +795,9 @@ fn pipeline_operators() { #[test] fn accessor_operator() { // Test that the accessor operator `.` is treated like any other operator. - let cases = [ - ("Console.", block![(OprSectionBoundary 1 (OprApp (Ident Console) (Ok ".") ()))]), - (".", block![(OprSectionBoundary 2 (OprApp () (Ok ".") ()))]), - (".log", block![(OprSectionBoundary 1 (OprApp () (Ok ".") (Ident log)))]), - ]; - cases.into_iter().for_each(|(code, expected)| test(code, expected)); + test!("Console.", (OprSectionBoundary 1 (OprApp (Ident Console) (Ok ".") ()))); + test!(".", (OprSectionBoundary 2 (OprApp () (Ok ".") ()))); + test!(".log", (OprSectionBoundary 1 (OprApp () (Ok ".") (Ident log)))); } #[test] @@ -808,6 +821,21 @@ fn operator_sections() { test("increment = 1 +", block![ (Assignment (Ident increment) "=" (OprSectionBoundary 1 (OprApp (Number () "1" ()) (Ok "+") ())))]); + test!("1+ << 2*", + (OprSectionBoundary 1 + (OprApp (OprApp (Number () "1" ()) (Ok "+") ()) + (Ok "<<") + (OprSectionBoundary 1 (OprApp (Number () "2" ()) (Ok "*") ()))))); + test!("+1 << *2", + (OprSectionBoundary 1 + (OprApp (OprApp () (Ok "+") (Number () "1" ())) + (Ok "<<") + (OprSectionBoundary 1 (OprApp () (Ok "*") (Number () "2" ())))))); + test!("+1+1 << *2*2", + (OprSectionBoundary 1 + (OprApp (OprApp (OprApp () (Ok "+") (Number () "1" ())) (Ok "+") (Number () "1" ())) + (Ok "<<") + (OprSectionBoundary 1 (OprApp (OprApp () (Ok "*") (Number () "2" ())) (Ok "*") (Number () "2" ())))))); } #[test] @@ -873,13 +901,8 @@ fn unspaced_operator_sequence() { #[test] fn minus_binary() { - let cases = [ - ("x - x", block![(OprApp (Ident x) (Ok "-") (Ident x))]), - ("x-x", block![(OprApp (Ident x) (Ok "-") (Ident x))]), - ("x.-y", block![(OprApp (Ident x) (Ok ".") (UnaryOprApp "-" (Ident y)))]), - ("x.~y", block![(OprApp (Ident x) (Ok ".") (UnaryOprApp "~" (Ident y)))]), - ]; - cases.into_iter().for_each(|(code, expected)| test(code, expected)); + test!("x - x", (OprApp (Ident x) (Ok "-") (Ident x))); + test!("x-x", (OprApp (Ident x) (Ok "-") (Ident x))); } #[test] @@ -939,6 +962,8 @@ fn autoscope_operator() { expect_invalid_node("x = f(.. ..)"); expect_invalid_node("x = f(.. *)"); expect_invalid_node("x = f(.. True)"); + expect_invalid_node("x = True.."); + expect_invalid_node("x = True..True"); expect_multiple_operator_error("x = .."); expect_multiple_operator_error("x = .. True"); expect_multiple_operator_error("x : .. True"); @@ -1231,6 +1256,7 @@ fn old_lambdas() { test("x -> y", block![(OprApp (Ident x) (Ok "->") (Ident y))]); test("x->y", block![(OprApp (Ident x) (Ok "->") (Ident y))]); test("x-> y", block![(OprApp (Ident x) (Ok "->") (Ident y))]); + test("x-> x + y", block![(OprApp (Ident x) (Ok "->") (OprApp (Ident x) (Ok "+") (Ident y)))]); test("x->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]); test("x ->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]); test("f x->\n y", block![ @@ -1815,9 +1841,8 @@ struct Errors { } impl Errors { - fn collect(code: &str) -> Self { - let ast = parse(code); - expect_tree_representing_code(code, &ast); + fn collect(ast: &enso_parser::syntax::Tree, code: &str) -> Self { + expect_tree_representing_code(code, ast); let errors = core::cell::Cell::new(Errors::default()); ast.visit_trees(|tree| match &*tree.variant { enso_parser::syntax::tree::Variant::Invalid(_) => { @@ -1834,18 +1859,22 @@ impl Errors { /// Checks that an input contains an `Invalid` node somewhere. fn expect_invalid_node(code: &str) { - let errors = Errors::collect(code); - assert!(errors.invalid_node, "{:?}", enso_parser::Parser::new().run(code)); + let ast = enso_parser::Parser::new().run(code); + let errors = Errors::collect(&ast, code); + assert!(errors.invalid_node, "{}", to_s_expr(&ast, code)); } /// Checks that an input contains a multiple-operator error somewhere. fn expect_multiple_operator_error(code: &str) { - let errors = Errors::collect(code); - assert!(errors.multiple_operator, "{:?}", enso_parser::Parser::new().run(code)); + let ast = enso_parser::Parser::new().run(code); + let errors = Errors::collect(&ast, code); + assert!(errors.multiple_operator || errors.invalid_node, "{}", to_s_expr(&ast, code)); + assert!(errors.multiple_operator, "{:?}", ast); } /// Check that the input can be parsed, and doesn't yield any `Invalid` nodes. fn expect_valid(code: &str) { - let errors = Errors::collect(code); + let ast = enso_parser::Parser::new().run(code); + let errors = Errors::collect(&ast, code); assert!(!errors.invalid_node); } diff --git a/lib/rust/parser/src/lexer.rs b/lib/rust/parser/src/lexer.rs index 70fc1875adf..c6d7388e7c0 100644 --- a/lib/rust/parser/src/lexer.rs +++ b/lib/rust/parser/src/lexer.rs @@ -44,7 +44,7 @@ trait Pattern { impl bool> Pattern for T { #[inline(always)] fn match_pattern(&mut self, input: char) -> bool { - (self)(input) + self(input) } } @@ -236,6 +236,12 @@ impl<'s> Lexer<'s> { self.output.push(token); } + /// Push the [`tokens`] to the result stream. + #[inline(always)] + fn submit_tokens>>(&mut self, tokens: T) { + self.output.extend(tokens); + } + /// Start a new block. #[inline(always)] fn start_block(&mut self, new_indent: VisibleOffset) { @@ -600,6 +606,9 @@ impl<'s> Lexer<'s> { this.take_while_1(is_ident_char); } }) { + if token.left_offset.is_empty() { + self.unspaced_term(); + } let tp = token::Variant::new_ident_or_wildcard_unchecked(&token.code); let token = token.with_variant(tp); self.submit_token(token); @@ -672,6 +681,17 @@ impl<'s> Lexer<'s> { let token = token.with_variant(token::Variant::operator(opr)); self.submit_token(token); } + // Operator-identifiers. + _ if self.prev_token_is_dot_operator() => { + let properties = analyze_operator(&token.code); + if properties.is_compile_time_operation() { + self.submit_token(token.with_variant(token::Variant::operator(properties))); + } else { + self.submit_token( + token.with_variant(token::Variant::operator_ident().into()), + ); + } + } // The unary-negation operator binds tighter to numeric literals than other // expressions. "-" if self.last_spaces_visible_offset.width_in_spaces == 0 @@ -693,6 +713,28 @@ impl<'s> Lexer<'s> { } } } + + fn prev_token_is_dot_operator(&self) -> bool { + match self.output.last() { + Some(Token { variant: token::Variant::Operator(operator), .. }) => + operator.properties.is_dot(), + _ => false, + } + } + + fn unspaced_term(&mut self) { + if let Some(Token { + variant: + variant @ token::Variant::Ident(token::variant::Ident { + is_operator_lexically: true, + .. + }), + .. + }) = self.output.last_mut() + { + *variant = token::Variant::invalid(); + } + } } @@ -881,6 +923,9 @@ impl<'s> Lexer<'s> { } }); if let Some(token) = token { + if token.left_offset.is_empty() { + self.unspaced_term(); + } if let Some(base) = base { self.submit_token(token.with_variant(token::Variant::number_base())); let after_base = self.current_offset; @@ -933,6 +978,9 @@ impl<'s> Lexer<'s> { } _ => return, }; + if self.last_spaces_visible_offset == VisibleOffset(0) { + self.unspaced_term(); + } let indent = self.current_block_indent; let open_quote_start = self.mark(); self.take_next(); @@ -963,17 +1011,17 @@ impl<'s> Lexer<'s> { close_quote_start.clone(), token::Variant::text_start(), ); - self.output.push(token); + self.submit_token(token); let token = self.make_token(close_quote_start, close_quote_end, token::Variant::text_end()); - self.output.push(token); + self.submit_token(token); } } else { // One quote followed by non-quote character: Inline quote. let open_quote_end = self.mark_without_whitespace(); let token = self.make_token(open_quote_start, open_quote_end, token::Variant::text_start()); - self.output.push(token); + self.submit_token(token); self.inline_quote(quote_char, text_type); } self.spaces_after_lexeme(); @@ -987,12 +1035,12 @@ impl<'s> Lexer<'s> { ) { let open_quote_end = self.mark_without_whitespace(); let token = self.make_token(open_quote_start, open_quote_end, token::Variant::text_start()); - self.output.push(token); + self.submit_token(token); let mut initial_indent = None; if text_type.expects_initial_newline() && let Some(newline) = self.line_break() { - self.output.push(newline.with_variant(token::Variant::text_initial_newline())); + self.submit_token(newline.with_variant(token::Variant::text_initial_newline())); if self.last_spaces_visible_offset > block_indent { initial_indent = self.last_spaces_visible_offset.into(); } @@ -1014,7 +1062,7 @@ impl<'s> Lexer<'s> { let splice_quote_end = self.mark_without_whitespace(); let token = self.make_token(splice_quote_start, splice_quote_end, token::Variant::close_symbol()); - self.output.push(token); + self.submit_token(token); match state { State::InlineText => self.inline_quote('\'', TextType::Interpolated), State::MultilineText { .. } => { @@ -1061,8 +1109,8 @@ impl<'s> Lexer<'s> { ); // If `token.code.is_empty()`, we ignore the `token.left_offset` here even if // it is non-empty, because it will be attached to the newline token. - if !(token.code.is_empty()) { - self.output.push(token); + if !token.code.is_empty() { + self.submit_token(token); } else { before_newline = text_start; } @@ -1097,9 +1145,9 @@ impl<'s> Lexer<'s> { let offset = Offset(VisibleOffset(0), location.clone()); Token(offset, location, token::Variant::text_end()) }; - self.output.push(text_end); + self.submit_token(text_end); self.end_blocks(indent, newlines.first().as_ref().unwrap()); - self.output.extend(newlines); + self.submit_tokens(newlines); if self.current_offset == text_start.location { self.last_spaces_visible_offset = text_start.offset.visible; self.last_spaces_offset = text_start.offset.code.range().start; @@ -1109,7 +1157,7 @@ impl<'s> Lexer<'s> { let newlines = newlines .into_iter() .map(|token| token.with_variant(token::Variant::text_newline())); - self.output.extend(newlines); + self.submit_tokens(newlines); continue; } } @@ -1125,7 +1173,7 @@ impl<'s> Lexer<'s> { if token.code.is_empty() { backslash_start = text_start.clone(); } else { - self.output.push(token); + self.submit_token(token); } self.last_spaces_offset = self.current_offset; text_start = self.text_escape(backslash_start, char); @@ -1144,7 +1192,7 @@ impl<'s> Lexer<'s> { if token.code.is_empty() { splice_quote_start = text_start; } else { - self.output.push(token); + self.submit_token(token); } self.take_next(); let splice_quote_end = self.mark_without_whitespace(); @@ -1153,7 +1201,7 @@ impl<'s> Lexer<'s> { splice_quote_end, token::Variant::open_symbol(), ); - self.output.push(token); + self.submit_token(token); self.stack.push(state); self.last_spaces_offset = self.current_offset; return TextEndedAt::Splice; @@ -1163,7 +1211,7 @@ impl<'s> Lexer<'s> { let text_end = self.mark_without_whitespace(); let token = self.make_token(text_start, text_end.clone(), token::Variant::text_section()); if !(token.code.is_empty() && token.left_offset.code.is_empty()) { - self.output.push(token); + self.submit_token(token); } let end_token = if self.current_char == closing_char { self.take_next(); @@ -1175,7 +1223,7 @@ impl<'s> Lexer<'s> { Code::empty(self.current_offset), )) }; - self.output.push(end_token); + self.submit_token(end_token); TextEndedAt::End } @@ -1213,7 +1261,7 @@ impl<'s> Lexer<'s> { sequence_end.clone(), token::Variant::text_escape(value.map(Codepoint::from_u32).unwrap_or_default()), ); - self.output.push(token); + self.submit_token(token); sequence_end } else { let value = match char { @@ -1239,7 +1287,7 @@ impl<'s> Lexer<'s> { escape_end.clone(), token::Variant::text_escape(value.map(Codepoint::from_char).unwrap_or_default()), ); - self.output.push(token); + self.submit_token(token); escape_end } } @@ -1486,7 +1534,7 @@ pub fn run(input: &'_ str) -> ParseResult>> { // === Tests === // ============= -/// Test utils for fast mock tokens creation. +/// Test utils for fast mock token creation. pub mod test { use super::*; pub use token::*; diff --git a/lib/rust/parser/src/macros/built_in.rs b/lib/rust/parser/src/macros/built_in.rs index f06804ac61b..1ae7172d343 100644 --- a/lib/rust/parser/src/macros/built_in.rs +++ b/lib/rust/parser/src/macros/built_in.rs @@ -726,7 +726,7 @@ fn splice_body<'s>( let expression = segment.result.tokens(); let expression = precedence.resolve(expression); let splice = syntax::tree::TextElement::Splice { open, expression, close }; - syntax::Tree::text_literal(default(), default(), vec![splice], default(), default()) + syntax::Tree::text_literal(default(), default(), vec![splice], default()) } fn foreign<'s>() -> Definition<'s> { diff --git a/lib/rust/parser/src/syntax.rs b/lib/rust/parser/src/syntax.rs index aee1b6ecff9..163343a1d3e 100644 --- a/lib/rust/parser/src/syntax.rs +++ b/lib/rust/parser/src/syntax.rs @@ -11,6 +11,10 @@ pub mod operator; pub mod token; pub mod tree; + + +mod treebuilding; + pub use item::Item; pub use token::Token; pub use tree::Tree; diff --git a/lib/rust/parser/src/syntax/operator.rs b/lib/rust/parser/src/syntax/operator.rs index dc4d1f4d20e..79b6f37e84a 100644 --- a/lib/rust/parser/src/syntax/operator.rs +++ b/lib/rust/parser/src/syntax/operator.rs @@ -1,11 +1,29 @@ //! Operator related functionalities. + + +mod application; +mod arity; +mod operand; +mod reducer; +mod types; + use crate::prelude::*; use crate::syntax; +use crate::syntax::operator::application::InsertApps; +use crate::syntax::operator::arity::ClassifyArity; +use crate::syntax::operator::operand::Operand; +use crate::syntax::operator::reducer::Reduce; +use crate::syntax::operator::types::Arity; +use crate::syntax::operator::types::BinaryOperand; +use crate::syntax::operator::types::ModifiedPrecedence; +use crate::syntax::operator::types::Operator; use crate::syntax::token; -use crate::syntax::token::Token; - +use crate::syntax::treebuilding; +use crate::syntax::treebuilding::Finish; +use crate::syntax::treebuilding::ItemConsumer; +use crate::syntax::Tree; // ================== @@ -13,28 +31,28 @@ use crate::syntax::token::Token; // ================== /// Operator precedence resolver. -#[derive(Debug)] +#[derive(Debug, Default)] pub struct Precedence<'s> { - nospace_builder: ExpressionBuilder<'s>, - builder: ExpressionBuilder<'s>, - /// Parses child blocks. Stores no semantic state, but is reused for performance. - child: Option>>, -} - -impl<'s> Default for Precedence<'s> { - fn default() -> Self { - Self::new() - } + #[rustfmt::skip] + resolver: + // Items -> Tokens/Trees + treebuilding::FlattenBlockTrees<'s, + // Tokens/Trees -> Tokens/Trees (proper tokens only) + treebuilding::AssembleCompoundTokens<'s, + // Tokens/Trees -> Tokens/Trees + Spacing-lookahead + treebuilding::PeekSpacing<'s, + // Tokens/Trees + Spacing-lookahead -> Operators/Operands + ClassifyArity<'s, + // Operators/Operands -> Operators/Operands (balanced) + InsertApps< + // Operators/Operands -> Tree + Reduce<'s>>>>>>, } impl<'s> Precedence<'s> { /// Return a new operator precedence resolver. pub fn new() -> Self { - Self { - nospace_builder: ExpressionBuilder { nospace: true, ..default() }, - builder: ExpressionBuilder { nospace: false, ..default() }, - child: default(), - } + Self::default() } /// Resolve precedence in a context where the result cannot be an operator section or template @@ -42,50 +60,28 @@ impl<'s> Precedence<'s> { pub fn resolve_non_section( &mut self, items: impl IntoIterator>, - ) -> Option> { + ) -> Option> { items.into_iter().for_each(|i| self.push(i)); - self.finish_().map(|op| op.value) + self.resolver.finish().map(|op| op.value) } /// Resolve precedence. pub fn resolve( &mut self, items: impl IntoIterator>, - ) -> Option> { - items.into_iter().for_each(|i| self.push(i)); + ) -> Option> { + self.extend(items); self.finish() } /// Extend the expression with a token. pub fn push(&mut self, item: syntax::Item<'s>) { - if starts_new_no_space_group(&item) { - self.builder.extend_from(&mut self.nospace_builder); - } - match item { - syntax::Item::Token(Token { - variant: token::Variant::Operator(opr), - left_offset, - code, - }) => self.nospace_builder.operator(Token(left_offset, code, opr)), - syntax::Item::Token(token) => - self.nospace_builder.operand(syntax::tree::to_ast(token).into()), - syntax::Item::Tree(tree) => self.nospace_builder.operand(tree.into()), - syntax::Item::Block(lines) => { - let mut child = self.child.take().unwrap_or_default(); - self.nospace_builder.operand(syntax::item::build_block(lines, &mut child).into()); - self.child = Some(child); - } - } - } - - fn finish_(&mut self) -> Option>> { - self.builder.extend_from(&mut self.nospace_builder); - self.builder.finish() + self.resolver.push_item(item); } /// Return the result. - pub fn finish(&mut self) -> Option> { - self.finish_().map(syntax::Tree::from) + pub fn finish(&mut self) -> Option> { + self.resolver.finish().map(Tree::from) } } @@ -97,486 +93,101 @@ impl<'s> Extend> for Precedence<'s> { } } -// Returns `true` for an item if that item should not follow any other item in a no-space group -// (i.e. the item has "space" before it). -fn starts_new_no_space_group(item: &syntax::item::Item) -> bool { - if item.left_visible_offset().width_in_spaces != 0 { - return true; + +// === Operator or Operand === + +#[derive(Debug)] +enum OperatorOrOperand<'s> { + Operand(Operand>), + Operator(Operator<'s>), +} + +impl<'s> From>> for OperatorOrOperand<'s> { + fn from(operand: Operand>) -> Self { + OperatorOrOperand::Operand(operand) } - if let syntax::item::Item::Block(_) = item { - return true; +} + +impl<'s> From> for OperatorOrOperand<'s> { + fn from(operator: Operator<'s>) -> Self { + OperatorOrOperand::Operator(operator) } - if let syntax::item::Item::Token(Token { variant: token::Variant::Operator(opr), .. }) = item - && opr.properties.is_sequence() +} + + +// === Applying operators === + +fn apply_operator<'s>( + tokens: Vec>, + lhs_section_termination: Option, + reify_rhs_section: bool, + lhs: Option>>, + rhs_: Option>>, +) -> Operand> { + if let Some(lhs_termination) = lhs_section_termination { + let lhs = match lhs_termination { + SectionTermination::Reify => lhs.map(Tree::from), + SectionTermination::Unwrap => lhs.map(|op| op.value), + }; + let rhs = rhs_.map(Tree::from); + let ast = syntax::tree::apply_operator(lhs, tokens, rhs); + Operand::from(ast) + } else if tokens.len() < 2 + && let Some(opr) = tokens.first() + && opr.properties.can_form_section() { - return true; - } - false -} - - -// === Expression builder === - -/// Stack machine that builds an expression from syntax nodes. -/// -/// The operator-precedence algorithm[1] used is based on the shunting yard algorithm[2], extended -/// to support *operator sections*, function application, and unary operators, and correctly report -/// errors relating to consecutive operators. -/// -/// [^1](https://en.wikipedia.org/wiki/Operator-precedence_parser) -/// [^2](https://en.wikipedia.org/wiki/Shunting_yard_algorithm) -#[derive(Default, Debug, PartialEq, Eq)] -struct ExpressionBuilder<'s> { - output: Vec>>, - operator_stack: Vec>, - prev_type: Option, - nospace: bool, -} - -impl<'s> ExpressionBuilder<'s> { - /// Extend the expression with an operand. - pub fn operand(&mut self, operand: Operand>) { - if self.prev_type == Some(ItemType::Ast) { - if let Some(Operand { - value: - syntax::Tree { - variant: box syntax::tree::Variant::TextLiteral(ref mut lhs), - span: lhs_span, - }, - .. - }) = self.output.last_mut() - && !lhs.closed - && let box syntax::tree::Variant::TextLiteral(mut rhs) = operand.value.variant - { - syntax::tree::join_text_literals(lhs, &mut rhs, lhs_span, operand.value.span); - if let syntax::tree::TextLiteral { - open: Some(open), - newline: None, - elements, - closed: true, - close: None, - } = lhs - && open.code.starts_with('#') - { - let elements = mem::take(elements); - let mut open = open.clone(); - let lhs_tree = self.output.pop().unwrap().value; - open.left_offset += lhs_tree.span.left_offset; - let doc = syntax::tree::DocComment { open, elements, newlines: default() }; - self.output.push(syntax::Tree::documented(doc, default()).into()); - } - return; - } - self.application(); - } - self.output.push(operand); - self.prev_type = Some(ItemType::Ast); - } - - fn application(&mut self) { - let precedence = token::Precedence::application(); - let associativity = token::Associativity::Left; - let arity = Arity::Binary { - tokens: default(), - lhs_section_termination: default(), - }; - self.push_operator(precedence, associativity, arity); - } - - /// Extend the expression with an operator. - pub fn operator(&mut self, opr: token::Operator<'s>) { - use ItemType::*; - let assoc = opr.properties.associativity(); - match ( - self.nospace, - opr.properties.binary_infix_precedence(), - opr.properties.unary_prefix_precedence(), - ) { - // If an operator has a binary role, and a LHS is available, it's acting as binary. - (_, Some(prec), _) if self.prev_type == Some(Ast) => - self.binary_operator(prec, assoc, opr), - // Otherwise, if the operator is inside a nospace group, and it has a unary role, - // it's acting as unary. - (true, _, Some(prec)) => self.unary_operator(prec, assoc, Unary::Simple(opr)), - // Outside of a nospace group, a unary-only operator is missing an operand. - (false, None, Some(_)) => self.unary_operator_section(opr), - // Binary operator section (no LHS). - (_, Some(prec), _) => self.binary_operator(prec, assoc, opr), - // Failed to compute a role for the operator; this should not be possible. - (_, None, None) => unreachable!(), - } - } - - fn unary_operator( - &mut self, - prec: token::Precedence, - assoc: token::Associativity, - mut arity: Unary<'s>, - ) { - if self.prev_type == Some(ItemType::Opr) - && let Some(prev_opr) = self.operator_stack.last_mut() - && let Arity::Binary { tokens, .. } = &mut prev_opr.opr - && !self.nospace - && let Unary::Simple(opr) = arity - { - tokens.push(opr); - return; - } - if self.prev_type == Some(ItemType::Ast) { - self.application(); - if self.nospace { - if let Unary::Simple(token) = arity { - let error = "Space required between term and unary-operator expression.".into(); - arity = Unary::Invalid { token, error }; - } + let mut rhs = None; + let mut elided = 0; + let mut wildcards = 0; + if let Some(rhs_) = rhs_ { + if reify_rhs_section { + rhs = Some(Tree::from(rhs_)); + } else { + rhs = Some(rhs_.value); + elided += rhs_.elided; + wildcards += rhs_.wildcards; } } - self.push_operator(prec, assoc, Arity::Unary(arity)); - } - - fn unary_operator_section(&mut self, opr: token::Operator<'s>) { - if self.prev_type == Some(ItemType::Opr) - && let Some(prev_opr) = self.operator_stack.last_mut() - && let Arity::Binary { tokens, .. } = &mut prev_opr.opr - { - // Multiple-operator error. - tokens.push(opr); - } else { - self.operand(Operand { - elided: 1, - ..Operand::from(syntax::tree::apply_unary_operator(opr, None)) - }); - } - } - - /// Extend the expression with a binary operator, by pushing it to the `operator_stack` or - /// emitting a multiple-operator error. - fn binary_operator( - &mut self, - prec: token::Precedence, - assoc: token::Associativity, - opr: token::Operator<'s>, - ) { - if self.prev_type == Some(ItemType::Opr) - && let Some(prev_opr) = self.operator_stack.last_mut() - && let Arity::Binary { tokens, .. } = &mut prev_opr.opr - { - if tokens.len() == 1 && tokens[0].properties.is_dot() { - let Token { left_offset, code, .. } = opr; - let is_operator = true; - let opr_ident = token::ident( - left_offset, - code, - default(), - default(), - default(), - is_operator, - default(), - ); - self.output.push(Operand::from(syntax::Tree::ident(opr_ident))); - self.prev_type = Some(ItemType::Ast); - return; - } - tokens.push(opr); - return; - } - self.push_operator(prec, assoc, Arity::binary(opr)); - } - - /// Add an operator to the stack; [`reduce`] the stack first, as appropriate for the specified - /// precedence. - fn push_operator( - &mut self, - precedence: token::Precedence, - associativity: token::Associativity, - opr: Arity<'s>, - ) { - let opr = Operator { precedence, associativity, opr }; - // When a unary operator follows another operator, we defer reducing the stack because a - // unary operator's affinity for its operand is stronger than any operator precedence. - let defer_reducing_stack = match (&self.prev_type, &opr.opr) { - (Some(ItemType::Opr), Arity::Unary(Unary::Simple(_))) if self.nospace => true, - (Some(ItemType::Opr), Arity::Unary(Unary::Fragment { .. })) => true, - _ => false, - }; - if !defer_reducing_stack { - let mut rhs = self.output.pop(); - self.reduce(precedence, &mut rhs); - if let Some(rhs) = rhs { - self.output.push(rhs); - } - } - self.operator_stack.push(opr); - self.prev_type = Some(ItemType::Opr); - } - - /// Given a starting value, replace it with the result of successively applying to it all - /// operators in the `operator_stack` that have precedence greater than or equal to the - /// specified value, consuming LHS values from the `output` stack as needed. - fn reduce(&mut self, prec: token::Precedence, rhs: &mut Option>>) { - while let Some(opr) = self.operator_stack.pop_if(|opr| { - opr.precedence > prec - || (opr.precedence == prec && opr.associativity == token::Associativity::Left) - }) { - let rhs_ = rhs.take(); - let ast = match opr.opr { - Arity::Unary(Unary::Simple(opr)) => - Operand::new(rhs_).map(|item| syntax::tree::apply_unary_operator(opr, item)), - Arity::Unary(Unary::Invalid { token, error }) => Operand::from(rhs_) - .map(|item| syntax::tree::apply_unary_operator(token, item).with_error(error)), - Arity::Unary(Unary::Fragment { mut fragment }) => { - if let Some(rhs_) = rhs_ { - fragment.operand(rhs_); - } - fragment.finish().unwrap() - } - Arity::Binary { tokens, lhs_section_termination } => { - let lhs = self.output.pop(); - if let Some(lhs_termination) = lhs_section_termination { - let lhs = match lhs_termination { - SectionTermination::Reify => lhs.map(syntax::Tree::from), - SectionTermination::Unwrap => lhs.map(|op| op.value), - }; - let rhs = rhs_.map(syntax::Tree::from); - let ast = syntax::tree::apply_operator(lhs, tokens, rhs); - Operand::from(ast) - } else if self.nospace - && tokens.len() < 2 - && let Some(opr) = tokens.first() - && opr.properties.can_form_section() - { - let mut rhs = None; - let mut elided = 0; - let mut wildcards = 0; - if let Some(rhs_) = rhs_ { - rhs = Some(rhs_.value); - elided += rhs_.elided; - wildcards += rhs_.wildcards; - } - elided += lhs.is_none() as u32 + rhs.is_none() as u32; - let mut operand = Operand::from(lhs) - .map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs)); - operand.elided += elided; - operand.wildcards += wildcards; - operand - } else { - let rhs = rhs_.map(syntax::Tree::from); - let mut elided = 0; - if tokens.len() != 1 || tokens[0].properties.can_form_section() { - elided += lhs.is_none() as u32 + rhs.is_none() as u32; - } - let mut operand = Operand::from(lhs) - .map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs)); - operand.elided += elided; - operand - } - } - }; - *rhs = Some(ast); - } - } - - /// Return an expression constructed from the accumulated state. Will return `None` only if no - /// inputs were provided. `self` will be reset to its initial state. - pub fn finish(&mut self) -> Option>> { - use ItemType::*; - let mut out = (self.prev_type == Some(Ast)).and_option_from(|| self.output.pop()); - self.reduce(token::Precedence::min(), &mut out); - debug_assert!(self.operator_stack.is_empty()); - debug_assert_eq!( - &self.output, - &[], - "Internal error. Not all tokens were consumed while constructing the expression." - ); - self.prev_type = None; - out - } - - /// Extend the expression with the contents of a [`Self`] built from a subexpression that - /// contains no spaces. - pub fn extend_from(&mut self, child: &mut Self) { - if child.output.is_empty() { - // If the unspaced subexpression doesn't contain any non-operators, promote each - // operator in the (unspaced) child to an operator in the (spaced) parent. - // - // The case where `child.operator_stack.len() > 1` is subtle: - // - // A sequence of operator characters without intervening whitespace is lexed as multiple - // operators in some cases where the last character is `-`. - // - // In such a case, an unspaced expression-builder will: - // 1. Push the first operator to the operator stack (composed of all the operator - // characters except the trailing `-`). - // 2. Push `-` to the operator stack, without reducing the expression (because the `-` - // should be interpreted as a unary operator if a value follows it within the - // unspaced subexpression). - // - // Thus, if we encounter an unspaced subexpression consisting only of multiple - // operators: When we append each operator to the parent (spaced) expression-builder, it - // will be reinterpreted in a *spaced* context. In a spaced context, the sequence of - // operators will cause a multiple-operator error. - for op in child.operator_stack.drain(..) { - match op.opr { - Arity::Unary(Unary::Simple(un)) => self.operator(un), - Arity::Unary(Unary::Invalid { .. }) => unreachable!(), - Arity::Unary(Unary::Fragment { .. }) => unreachable!(), - Arity::Binary { tokens, .. } => - tokens.into_iter().for_each(|op| self.operator(op)), - } - } - child.prev_type = None; - return; - } - if child.prev_type == Some(ItemType::Opr) - && let Arity::Binary { tokens, .. } = &child.operator_stack.last().unwrap().opr - && let Some(token) = tokens.last() - && token.properties.is_arrow() - { - let precedence = token::Precedence::min_valid(); - let associativity = token::Associativity::Right; - let fragment = ExpressionBuilder { - output: mem::take(&mut child.output), - operator_stack: mem::take(&mut child.operator_stack), - prev_type: mem::take(&mut child.prev_type), - nospace: child.nospace, - }; - let arity = Unary::Fragment { fragment }; - self.unary_operator(precedence, associativity, arity); - return; - } - if let Some(o) = child.finish() { - self.operand(o); + elided += lhs.is_none() as u32 + rhs.is_none() as u32; + let mut operand = + Operand::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs)); + operand.elided += elided; + operand.wildcards += wildcards; + operand + } else { + let rhs = rhs_.map(Tree::from); + let mut elided = 0; + if tokens.len() != 1 || tokens[0].properties.can_form_section() { + elided += lhs.is_none() as u32 + rhs.is_none() as u32; } + let mut operand = + Operand::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs)); + operand.elided += elided; + operand } } -/// Classify an item as an operator, or operand; this is used in [`Precedence::resolve`] to -/// merge consecutive nodes of the same type. -#[derive(PartialEq, Eq, Debug)] -enum ItemType { - Ast, - Opr, -} - - -// === Operator === - -/// An operator, whose arity and precedence have been determined. -#[derive(Debug, PartialEq, Eq)] -struct Operator<'s> { - precedence: token::Precedence, - associativity: token::Associativity, - opr: Arity<'s>, -} - -/// Classifies the role of an operator. -#[derive(Debug, PartialEq, Eq)] -enum Arity<'s> { - Unary(Unary<'s>), - Binary { - tokens: Vec>, - lhs_section_termination: Option, - }, -} - -impl<'s> Arity<'s> { - fn binary(tok: token::Operator<'s>) -> Self { - let lhs_section_termination = tok.properties.lhs_section_termination(); - let tokens = vec![tok]; - Self::Binary { tokens, lhs_section_termination } - } - - fn unary(tok: token::Operator<'s>) -> Self { - Self::Unary(Unary::Simple(tok)) +fn apply_unary_operator<'s>( + token: token::Operator<'s>, + rhs: Option>>, + error: Option>, +) -> Operand> { + match error { + None => Operand::new(rhs).map(|item| syntax::tree::apply_unary_operator(token, item)), + Some(error) => Operand::from(rhs) + .map(|item| syntax::tree::apply_unary_operator(token, item).with_error(error)), } } -#[derive(Debug, PartialEq, Eq)] -enum Unary<'s> { - Simple(token::Operator<'s>), - Invalid { token: token::Operator<'s>, error: Cow<'static, str> }, - Fragment { fragment: ExpressionBuilder<'s> }, + +// === Operator and Operand Consumers === + +trait OperandConsumer<'s> { + fn push_operand(&mut self, operand: Operand>); } - -// === Operand === - -/// Wraps a value, tracking the number of wildcards or elided operands within it. -#[derive(Default, Debug, PartialEq, Eq)] -struct Operand { - value: T, - /// Number of elided operands in the subtree, potentially forming an *operator section*. - elided: u32, - /// Number of wildcards in the subtree, potentially forming a *template function*. - wildcards: u32, -} - -/// Transpose. Note that an absent input will not be treated as an elided value; for that -/// conversion, use [`Operand::new`]. -impl From>> for Operand> { - fn from(operand: Option>) -> Self { - match operand { - Some(Operand { value, elided, wildcards }) => - Self { value: Some(value), elided, wildcards }, - None => default(), - } - } -} - -/// Unit. Creates an Operand from a node. -impl<'s> From> for Operand> { - fn from(mut value: syntax::Tree<'s>) -> Self { - let elided = 0; - let wildcards = if let syntax::Tree { - variant: - box syntax::tree::Variant::Wildcard(syntax::tree::Wildcard { de_bruijn_index, .. }), - .. - } = &mut value - { - debug_assert_eq!(*de_bruijn_index, None); - *de_bruijn_index = Some(0); - 1 - } else { - 0 - }; - Self { value, wildcards, elided } - } -} - -/// Counit. Bakes any information about elided operands into the tree. -impl<'s> From>> for syntax::Tree<'s> { - fn from(operand: Operand>) -> Self { - let Operand { mut value, elided, wildcards } = operand; - if elided != 0 { - value = syntax::Tree::opr_section_boundary(elided, value); - } - if wildcards != 0 { - value = syntax::Tree::template_function(wildcards, value); - } - value - } -} - -impl Operand> { - /// Lift an option value to a potentially-elided operand. - fn new(value: Option>) -> Self { - match value { - None => Self { value: None, elided: 1, wildcards: default() }, - Some(value) => { - let Operand { value, elided, wildcards } = value; - Self { value: Some(value), elided, wildcards } - } - } - } -} - -impl Operand { - /// Operate on the contained value without altering the elided-operand information. - fn map(self, f: impl FnOnce(T) -> U) -> Operand { - let Self { value, elided, wildcards } = self; - let value = f(value); - Operand { value, elided, wildcards } - } +trait OperatorConsumer<'s> { + fn push_operator(&mut self, operator: Operator<'s>); } @@ -584,17 +195,12 @@ impl Operand { /// Operator-section/template-function termination behavior of an operator with regard to an /// operand. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)] pub enum SectionTermination { /// If the operand is an operator-section/template-function, indicate it by wrapping it in a /// suitable node. + #[default] Reify, /// Discard any operator-section/template-function properties associated with the operand. Unwrap, } - -impl Default for SectionTermination { - fn default() -> Self { - Self::Reify - } -} diff --git a/lib/rust/parser/src/syntax/operator/application.rs b/lib/rust/parser/src/syntax/operator/application.rs new file mode 100644 index 00000000000..a13ca181737 --- /dev/null +++ b/lib/rust/parser/src/syntax/operator/application.rs @@ -0,0 +1,79 @@ +use enso_prelude::*; + +use crate::syntax::operator::operand::Operand; +use crate::syntax::operator::types::Arity; +use crate::syntax::operator::types::BinaryOperand; +use crate::syntax::operator::types::ModifiedPrecedence; +use crate::syntax::operator::types::Operator; +use crate::syntax::operator::OperandConsumer; +use crate::syntax::operator::OperatorConsumer; +use crate::syntax::token; +use crate::syntax::treebuilding::Finish; +use crate::syntax::treebuilding::Spacing; +use crate::syntax::Tree; + + + +// =================== +// === Insert Apps === +// =================== + +/// Inserts applications between terms as needed. +#[derive(Default, Debug)] +pub struct InsertApps { + prev_applicable: bool, + inner: Inner, +} + +impl<'s, Inner: OperatorConsumer<'s> + OperandConsumer<'s>> OperandConsumer<'s> + for InsertApps +{ + fn push_operand(&mut self, operand: Operand>) { + if mem::replace(&mut self.prev_applicable, true) { + self.inner.push_operator(application(Spacing::of_tree(&operand.value))); + } + self.inner.push_operand(operand) + } +} + +impl<'s, Inner: OperatorConsumer<'s>> OperatorConsumer<'s> for InsertApps { + fn push_operator(&mut self, operator: Operator<'s>) { + let prev_applicable = mem::replace( + &mut self.prev_applicable, + matches!(operator.arity, Arity::Binary { missing: Some(BinaryOperand::Right), .. }), + ); + if prev_applicable + && matches!( + operator.arity, + Arity::Unary { .. } | Arity::Binary { missing: Some(BinaryOperand::Left), .. } + ) + { + self.inner.push_operator(application(Spacing::Spaced)); + } + self.inner.push_operator(operator) + } +} + +impl Finish for InsertApps { + type Result = Inner::Result; + + fn finish(&mut self) -> Self::Result { + self.prev_applicable = false; + self.inner.finish() + } +} + +fn application<'s>(spacing: Spacing) -> Operator<'s> { + let precedence = ModifiedPrecedence { spacing, precedence: token::Precedence::application() }; + Operator { + left_precedence: Some(precedence), + right_precedence: precedence, + associativity: token::Associativity::Left, + arity: Arity::Binary { + tokens: default(), + lhs_section_termination: default(), + missing: None, + reify_rhs_section: true, + }, + } +} diff --git a/lib/rust/parser/src/syntax/operator/arity.rs b/lib/rust/parser/src/syntax/operator/arity.rs new file mode 100644 index 00000000000..bfc8196ddda --- /dev/null +++ b/lib/rust/parser/src/syntax/operator/arity.rs @@ -0,0 +1,213 @@ +use enso_prelude::*; + +use crate::syntax::operator::apply_operator; +use crate::syntax::operator::apply_unary_operator; +use crate::syntax::operator::operand::Operand; +use crate::syntax::operator::types::Arity; +use crate::syntax::operator::types::BinaryOperand; +use crate::syntax::operator::types::ModifiedPrecedence; +use crate::syntax::operator::types::Operator; +use crate::syntax::operator::OperandConsumer; +use crate::syntax::operator::OperatorConsumer; +use crate::syntax::operator::OperatorOrOperand; +use crate::syntax::token; +use crate::syntax::tree; +use crate::syntax::treebuilding::Finish; +use crate::syntax::treebuilding::Spacing; +use crate::syntax::treebuilding::SpacingLookaheadTokenConsumer; +use crate::syntax::treebuilding::TreeConsumer; +use crate::syntax::Token; +use crate::syntax::Tree; + + + +// ====================== +// === Classify Arity === +// ====================== + +/// Determines the number of operands consumed by each term. +#[derive(Default, Debug)] +pub struct ClassifyArity<'s, Inner> { + /// Next item that will be emitted. If it is an operator, it may still be extended with + /// additional operators to become a multiple-operator error. + lhs_item: Option>, + inner: Inner, +} + +impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> SpacingLookaheadTokenConsumer<'s> + for ClassifyArity<'s, Inner> +{ + fn push_token(&mut self, tt: Token<'s>, rhs: Option) { + match tt { + Token { variant: token::Variant::Operator(opr), left_offset, code } => + self.operator(Token(left_offset, code, opr), rhs), + token => self.push_tree(tree::to_ast(token)), + } + } +} + +impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> TreeConsumer<'s> + for ClassifyArity<'s, Inner> +{ + fn push_tree(&mut self, tree: Tree<'s>) { + self.emit(Operand::from(tree)) + } +} + +impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s> + Finish> Finish + for ClassifyArity<'s, Inner> +{ + type Result = Inner::Result; + + fn finish(&mut self) -> Self::Result { + self.step(None); + self.inner.finish() + } +} + +impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, Inner> { + fn emit>>(&mut self, item: T) { + self.step(Some(item.into())); + } + + fn step(&mut self, item: Option>) { + match mem::replace(&mut self.lhs_item, item) { + Some(OperatorOrOperand::Operand(item)) => self.inner.push_operand(item), + Some(OperatorOrOperand::Operator(item)) => self.inner.push_operator(item), + None => (), + } + } + + fn operator(&mut self, token: token::Operator<'s>, rhs: Option) { + let properties = &token.variant.properties; + let lhs = match self.lhs_item { + Some( + OperatorOrOperand::Operand(_) + | OperatorOrOperand::Operator(Operator { + arity: Arity::Binary { missing: Some(BinaryOperand::Right), .. }, + .. + }), + ) => Some(Spacing::of_token(&token)), + _ => None, + }; + // Asymmetric whitespace creates operator sections. + // Exception: If an operator cannot form sections, and its LHS is unspaced, a spaced RHS is + // accepted. + let (lhs, rhs) = match (properties.can_form_section(), lhs, rhs) { + (true, Some(Spacing::Unspaced), Some(Spacing::Spaced)) => + (Some(Spacing::Unspaced), None), + (_, Some(Spacing::Spaced), Some(Spacing::Unspaced)) => (None, Some(Spacing::Unspaced)), + (_, lhs, rhs) => (lhs, rhs), + }; + let assoc = properties.associativity(); + let binary = properties.binary_infix_precedence(); + let unary = properties.unary_prefix_precedence(); + match (binary, unary, lhs, rhs) { + (_, Some(unary), None, Some(Spacing::Unspaced)) => + self.unary_operator_applied(unary, assoc, token), + (Some(binary), _, _, _) => self.binary_operator(binary, assoc, token, lhs, rhs), + (_, Some(_), _, _) => self.unary_operator_section(token, rhs), + (None, None, _, _) => unreachable!(), + } + } + + fn unary_operator_applied( + &mut self, + precedence: token::Precedence, + associativity: token::Associativity, + token: token::Operator<'s>, + ) { + let error = match self.lhs_item { + Some(OperatorOrOperand::Operand(_)) + if token.left_offset.visible.width_in_spaces == 0 => + Some("Space required between term and unary-operator expression.".into()), + _ => None, + }; + self.emit(Operator { + left_precedence: None, + right_precedence: ModifiedPrecedence { spacing: Spacing::Unspaced, precedence }, + associativity, + arity: Arity::Unary { token, error }, + }); + } + + fn unary_operator_section(&mut self, token: token::Operator<'s>, rhs: Option) { + match &mut self.lhs_item { + Some(OperatorOrOperand::Operator(Operator { + arity: Arity::Binary { tokens, .. }, + .. + })) if !(tokens.first().unwrap().left_offset.visible.width_in_spaces == 0 + && token.left_offset.visible.width_in_spaces == 0) => + self.multiple_operator_error(token, rhs), + _ => self.emit(apply_unary_operator(token, None, None)), + } + } + + fn binary_operator( + &mut self, + precedence: token::Precedence, + associativity: token::Associativity, + token: token::Operator<'s>, + lhs: Option, + rhs: Option, + ) { + if let Some(OperatorOrOperand::Operator(Operator { + arity: Arity::Binary { missing: None | Some(BinaryOperand::Left), .. }, + .. + })) = &self.lhs_item + && !matches!(rhs, Some(Spacing::Unspaced)) + { + self.multiple_operator_error(token, rhs); + return; + } + let lhs_section_termination = token.properties.lhs_section_termination(); + let missing = match (lhs, rhs) { + (None, None) => { + self.emit(apply_operator(vec![token], lhs_section_termination, false, None, None)); + return; + } + (Some(_), None) => Some(BinaryOperand::Right), + (None, Some(_)) => Some(BinaryOperand::Left), + (Some(_), Some(_)) => None, + }; + let reify_rhs_section = token.properties.can_form_section() + && (lhs == Some(Spacing::Spaced) || rhs == Some(Spacing::Spaced)); + self.emit(Operator { + left_precedence: lhs.map(|spacing| ModifiedPrecedence { spacing, precedence }), + right_precedence: ModifiedPrecedence { spacing: rhs.or(lhs).unwrap(), precedence }, + associativity, + arity: Arity::Binary { + tokens: vec![token], + lhs_section_termination, + missing, + reify_rhs_section, + }, + }); + } + + fn multiple_operator_error(&mut self, token: token::Operator<'s>, rhs: Option) { + match &mut self.lhs_item { + Some(OperatorOrOperand::Operator(Operator { + arity: Arity::Binary { tokens, lhs_section_termination, missing, reify_rhs_section }, + .. + })) => { + tokens.push(token); + if rhs.is_none() { + match missing { + None => *missing = Some(BinaryOperand::Right), + Some(BinaryOperand::Left) => + self.lhs_item = Some(OperatorOrOperand::Operand(apply_operator( + mem::take(tokens), + *lhs_section_termination, + *reify_rhs_section, + None, + None, + ))), + Some(BinaryOperand::Right) => unreachable!(), + } + } + } + _ => unreachable!(), + } + } +} diff --git a/lib/rust/parser/src/syntax/operator/operand.rs b/lib/rust/parser/src/syntax/operator/operand.rs new file mode 100644 index 00000000000..9853d8a4c80 --- /dev/null +++ b/lib/rust/parser/src/syntax/operator/operand.rs @@ -0,0 +1,87 @@ +use crate::syntax::tree; +use crate::syntax::Tree; + +use enso_prelude::default; + + + +// =============== +// === Operand === +// =============== + +/// Wraps a value, tracking the number of wildcards or elided operands within it. +#[derive(Default, Debug, PartialEq, Eq)] +pub struct Operand { + pub value: T, + /// Number of elided operands in the subtree, potentially forming an *operator section*. + pub elided: u32, + /// Number of wildcards in the subtree, potentially forming a *template function*. + pub wildcards: u32, +} + +/// Transpose. Note that an absent input will not be treated as an elided value; for that +/// conversion, use [`Operand::new`]. +impl From>> for Operand> { + fn from(operand: Option>) -> Self { + match operand { + Some(Operand { value, elided, wildcards }) => + Self { value: Some(value), elided, wildcards }, + None => default(), + } + } +} + +/// Unit. Creates an Operand from a node. +impl<'s> From> for Operand> { + fn from(mut value: Tree<'s>) -> Self { + let elided = 0; + let wildcards = if let Tree { + variant: box tree::Variant::Wildcard(tree::Wildcard { de_bruijn_index, .. }), + .. + } = &mut value + { + debug_assert_eq!(*de_bruijn_index, None); + *de_bruijn_index = Some(0); + 1 + } else { + 0 + }; + Self { value, wildcards, elided } + } +} + +/// Counit. Bakes any information about elided operands into the tree. +impl<'s> From>> for Tree<'s> { + fn from(operand: Operand>) -> Self { + let Operand { mut value, elided, wildcards } = operand; + if elided != 0 { + value = Tree::opr_section_boundary(elided, value); + } + if wildcards != 0 { + value = Tree::template_function(wildcards, value); + } + value + } +} + +impl Operand> { + /// Lift an option value to a potentially-elided operand. + pub fn new(value: Option>) -> Self { + match value { + None => Self { value: None, elided: 1, wildcards: default() }, + Some(value) => { + let Operand { value, elided, wildcards } = value; + Self { value: Some(value), elided, wildcards } + } + } + } +} + +impl Operand { + /// Operate on the contained value without altering the elided-operand information. + pub fn map(self, f: impl FnOnce(T) -> U) -> Operand { + let Self { value, elided, wildcards } = self; + let value = f(value); + Operand { value, elided, wildcards } + } +} diff --git a/lib/rust/parser/src/syntax/operator/reducer.rs b/lib/rust/parser/src/syntax/operator/reducer.rs new file mode 100644 index 00000000000..e278f45ff6d --- /dev/null +++ b/lib/rust/parser/src/syntax/operator/reducer.rs @@ -0,0 +1,113 @@ +use crate::syntax::operator::apply_operator; +use crate::syntax::operator::apply_unary_operator; +use crate::syntax::operator::Arity; +use crate::syntax::operator::BinaryOperand; +use crate::syntax::operator::ModifiedPrecedence; +use crate::syntax::operator::Operand; +use crate::syntax::operator::OperandConsumer; +use crate::syntax::operator::Operator; +use crate::syntax::operator::OperatorConsumer; +use crate::syntax::token; +use crate::syntax::treebuilding::Finish; +use crate::syntax::treebuilding::Spacing; +use crate::syntax::Tree; + +use enso_prelude::VecOps; + + + +// =============== +// === Reducer === +// =============== + +/// Stack machine that builds an expression from syntax nodes. +/// +/// The operator-precedence algorithm[1] used is based on the shunting yard algorithm[2], extended +/// to support *operator sections*, function application, and unary operators, and correctly report +/// errors relating to consecutive operators. +/// +/// [^1](https://en.wikipedia.org/wiki/Operator-precedence_parser) +/// [^2](https://en.wikipedia.org/wiki/Shunting_yard_algorithm) +#[derive(Default, Debug)] +pub struct Reduce<'s> { + output: Vec>>, + operator_stack: Vec>, +} + +impl<'s> OperandConsumer<'s> for Reduce<'s> { + fn push_operand(&mut self, operand: Operand>) { + self.output.push(operand) + } +} + +impl<'s> OperatorConsumer<'s> for Reduce<'s> { + fn push_operator(&mut self, operator: Operator<'s>) { + if let Some(precedence) = operator.left_precedence { + self.reduce(precedence); + } + self.operator_stack.push(operator); + } +} + +impl<'s> Finish for Reduce<'s> { + type Result = Option>>; + + fn finish(&mut self) -> Self::Result { + self.reduce(ModifiedPrecedence { + spacing: Spacing::Spaced, + precedence: token::Precedence::min(), + }); + let out = self.output.pop(); + debug_assert!(self.operator_stack.is_empty()); + debug_assert_eq!( + &self.output, + &[], + "Internal error. Not all tokens were consumed while constructing the expression." + ); + out + } +} + +impl<'s> Reduce<'s> { + /// Given a starting value, replace it with the result of successively applying to it all + /// operators in the `operator_stack` that have precedence greater than or equal to the + /// specified value, consuming LHS values from the `output` stack as needed. + fn reduce(&mut self, prec: ModifiedPrecedence) { + let mut rhs = self.output.pop(); + while let Some(opr) = self.operator_stack.pop_if(|opr| { + opr.right_precedence > prec + || (opr.right_precedence == prec && opr.associativity == token::Associativity::Left) + }) { + match opr.arity { + Arity::Unary { token, error } => { + let rhs_ = rhs.take(); + debug_assert_ne!(rhs_, None); + rhs = Some(apply_unary_operator(token, rhs_, error)); + } + Arity::Binary { tokens, lhs_section_termination, missing, reify_rhs_section } => { + let operand = rhs.take(); + debug_assert_ne!(operand, None); + let (lhs, rhs_) = match missing { + Some(BinaryOperand::Left) => (None, operand), + Some(BinaryOperand::Right) => (operand, None), + None => { + let lhs = self.output.pop(); + debug_assert_ne!(lhs, None); + (lhs, operand) + } + }; + rhs = Some(apply_operator( + tokens, + lhs_section_termination, + reify_rhs_section, + lhs, + rhs_, + )); + } + }; + } + if let Some(rhs) = rhs { + self.output.push(rhs); + } + } +} diff --git a/lib/rust/parser/src/syntax/operator/types.rs b/lib/rust/parser/src/syntax/operator/types.rs new file mode 100644 index 00000000000..bb698089db1 --- /dev/null +++ b/lib/rust/parser/src/syntax/operator/types.rs @@ -0,0 +1,73 @@ +use crate::syntax::operator::SectionTermination; +use crate::syntax::token; +use crate::syntax::treebuilding::Spacing; + +use std::borrow::Cow; +use std::cmp::Ordering; + + + +// ================ +// === Operator === +// ================ + +/// An operator, whose arity and precedence have been determined. +#[derive(Debug)] +pub struct Operator<'s> { + pub left_precedence: Option, + pub right_precedence: ModifiedPrecedence, + pub associativity: token::Associativity, + pub arity: Arity<'s>, +} + + +// === Arity === + +/// Classifies the role of an operator. +#[derive(Debug)] +pub enum Arity<'s> { + Unary { + token: token::Operator<'s>, + error: Option>, + }, + Binary { + tokens: Vec>, + lhs_section_termination: Option, + missing: Option, + reify_rhs_section: bool, + }, +} + +impl<'s> Arity<'s> { + fn unary(token: token::Operator<'s>) -> Self { + Self::Unary { token, error: None } + } +} + + +// === Binary operand === + +#[derive(Debug)] +pub enum BinaryOperand { + Left, + Right, +} + + +// === Modified precedence === + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct ModifiedPrecedence { + pub spacing: Spacing, + pub precedence: token::Precedence, +} + +impl PartialOrd for ModifiedPrecedence { + fn partial_cmp(&self, other: &Self) -> Option { + match (self.spacing, other.spacing) { + (Spacing::Spaced, Spacing::Unspaced) => Some(Ordering::Less), + (Spacing::Unspaced, Spacing::Spaced) => Some(Ordering::Greater), + _ => self.precedence.partial_cmp(&other.precedence), + } + } +} diff --git a/lib/rust/parser/src/syntax/token.rs b/lib/rust/parser/src/syntax/token.rs index 5212c734393..3ba4e797008 100644 --- a/lib/rust/parser/src/syntax/token.rs +++ b/lib/rust/parser/src/syntax/token.rs @@ -305,6 +305,11 @@ impl Variant { | Variant::Invalid(_) ) } + + /// Return a token variant for an identifier composed of operator characters. + pub fn operator_ident() -> variant::Ident { + variant::Ident(false, 0, false, true, false) + } } impl Default for Variant { @@ -366,6 +371,11 @@ impl OperatorProperties { Self { is_compile_time_operation: true, ..self } } + /// Return whether this operator is flagged as a compile time operation. + pub fn is_compile_time_operation(&self) -> bool { + self.is_compile_time_operation + } + /// Return a copy of this operator, modified to be flagged as right associative. pub fn as_right_associative(self) -> Self { Self { is_right_associative: true, ..self } diff --git a/lib/rust/parser/src/syntax/tree.rs b/lib/rust/parser/src/syntax/tree.rs index 54d50fb32ef..d2575944ef9 100644 --- a/lib/rust/parser/src/syntax/tree.rs +++ b/lib/rust/parser/src/syntax/tree.rs @@ -131,9 +131,6 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args) pub newline: Option>, pub elements: Vec>, pub close: Option>, - #[serde(skip)] - #[reflect(skip)] - pub closed: bool, }, /// A simple application, like `print "hello"`. App { @@ -841,29 +838,6 @@ fn maybe_apply<'s>(f: Option>, x: Tree<'s>) -> Tree<'s> { } } -/// Join two text literals, merging contents as appropriate to each field. -pub fn join_text_literals<'s>( - lhs: &mut TextLiteral<'s>, - rhs: &mut TextLiteral<'s>, - lhs_span: &mut Span<'s>, - rhs_span: Span<'s>, -) { - lhs_span.code_length += rhs_span.length_including_whitespace(); - match rhs.elements.first_mut() { - Some(TextElement::Section { text }) => text.left_offset += rhs_span.left_offset, - Some(TextElement::Escape { token }) => token.left_offset += rhs_span.left_offset, - Some(TextElement::Splice { open, .. }) => open.left_offset += rhs_span.left_offset, - Some(TextElement::Newline { newline }) => newline.left_offset += rhs_span.left_offset, - None => (), - } - if let Some(newline) = rhs.newline.take() { - lhs.newline = newline.into(); - } - lhs.elements.append(&mut rhs.elements); - lhs.close = rhs.close.take(); - lhs.closed = rhs.closed; -} - /// Join two nodes with an operator, in a way appropriate for their types. /// /// For most operands this will simply construct an `OprApp`; however, a non-operator block (i.e. an @@ -1006,28 +980,6 @@ pub fn to_ast(token: Token) -> Tree { Tree::number(None, Some(token.with_variant(number)), None), token::Variant::NumberBase(base) => Tree::number(Some(token.with_variant(base)), None, None), - token::Variant::TextStart(open) => - Tree::text_literal(Some(token.with_variant(open)), default(), default(), default(), default()), - token::Variant::TextSection(section) => { - let section = TextElement::Section { text: token.with_variant(section) }; - Tree::text_literal(default(), default(), vec![section], default(), default()) - } - token::Variant::TextEscape(escape) => { - let token = token.with_variant(escape); - let section = TextElement::Escape { token }; - Tree::text_literal(default(), default(), vec![section], default(), default()) - } - token::Variant::TextEnd(_) if token.code.is_empty() => - Tree::text_literal(default(), default(), default(), default(), true), - token::Variant::TextEnd(close) => - Tree::text_literal(default(), default(), default(), Some(token.with_variant(close)), true), - token::Variant::TextInitialNewline(_) => - Tree::text_literal(default(), Some(token::newline(token.left_offset, token.code)), default(), default(), default()), - token::Variant::TextNewline(_) => { - let newline = token::newline(token.left_offset, token.code); - let newline = TextElement::Newline { newline }; - Tree::text_literal(default(), default(), vec![newline], default(), default()) - } token::Variant::Wildcard(wildcard) => Tree::wildcard(token.with_variant(wildcard), default()), token::Variant::SuspendedDefaultArguments(t) => Tree::suspended_default_arguments(token.with_variant(t)), token::Variant::OpenSymbol(s) => @@ -1042,6 +994,13 @@ pub fn to_ast(token: Token) -> Tree { // This should be unreachable: `Precedence::resolve` doesn't calls `to_ast` for operators. | token::Variant::Operator(_) | token::Variant::Private(_) + // Handled during compound-token assembly. + | token::Variant::TextStart(_) + | token::Variant::TextSection(_) + | token::Variant::TextEscape(_) + | token::Variant::TextEnd(_) + | token::Variant::TextInitialNewline(_) + | token::Variant::TextNewline(_) // Map an error case in the lexer to an error in the AST. | token::Variant::Invalid(_) => { let message = format!("Unexpected token: {token:?}"); diff --git a/lib/rust/parser/src/syntax/treebuilding.rs b/lib/rust/parser/src/syntax/treebuilding.rs new file mode 100644 index 00000000000..9100d881366 --- /dev/null +++ b/lib/rust/parser/src/syntax/treebuilding.rs @@ -0,0 +1,46 @@ +use crate::syntax::Token; +use crate::syntax::Tree; + + + +mod block; +mod compound_token; +mod consumer; +mod whitespace; + + +// =============== +// === Exports === +// =============== + +pub use block::FlattenBlockTrees; +pub use compound_token::AssembleCompoundTokens; +pub use consumer::Finish; +pub use consumer::ItemConsumer; +pub use consumer::TreeConsumer; +pub use whitespace::PeekSpacing; +pub use whitespace::Spacing; +pub use whitespace::SpacingLookaheadTokenConsumer; + + +// =================== +// === TokenOrTree === +// =================== + +#[derive(Debug)] +enum TokenOrTree<'s> { + Token(Token<'s>), + Tree(Tree<'s>), +} + +impl<'s> From> for TokenOrTree<'s> { + fn from(token: Token<'s>) -> Self { + TokenOrTree::Token(token) + } +} + +impl<'s> From> for TokenOrTree<'s> { + fn from(tree: Tree<'s>) -> Self { + TokenOrTree::Tree(tree) + } +} diff --git a/lib/rust/parser/src/syntax/treebuilding/block.rs b/lib/rust/parser/src/syntax/treebuilding/block.rs new file mode 100644 index 00000000000..b6f3239918f --- /dev/null +++ b/lib/rust/parser/src/syntax/treebuilding/block.rs @@ -0,0 +1,44 @@ +use crate::syntax; +use crate::syntax::operator; +use crate::syntax::treebuilding::consumer::Finish; +use crate::syntax::treebuilding::consumer::ItemConsumer; +use crate::syntax::treebuilding::consumer::TokenConsumer; +use crate::syntax::treebuilding::consumer::TreeConsumer; +use crate::syntax::Item; + + + +// ========================== +// === BlockTreeFlattener === +// ========================== + +/// Consumes `Item`s and passes their content to a token/tree consumer, using an +/// [`operator::Precedence`] parser to flatten blocks. +#[derive(Debug, Default)] +pub struct FlattenBlockTrees<'s, T> { + inner: T, + /// Consumes child blocks. Stores no semantic state, but is reused for performance. + child: Option>>, +} + +impl<'s, T: TokenConsumer<'s> + TreeConsumer<'s>> ItemConsumer<'s> for FlattenBlockTrees<'s, T> { + fn push_item(&mut self, item: Item<'s>) { + match item { + Item::Block(lines) => { + let mut child = self.child.take().unwrap_or_default(); + self.inner.push_tree(syntax::item::build_block(lines, &mut child)); + self.child = Some(child); + } + Item::Token(token) => self.inner.push_token(token), + Item::Tree(tree) => self.inner.push_tree(tree), + } + } +} + +impl<'s, T: Finish> Finish for FlattenBlockTrees<'s, T> { + type Result = T::Result; + + fn finish(&mut self) -> Self::Result { + self.inner.finish() + } +} diff --git a/lib/rust/parser/src/syntax/treebuilding/compound_token.rs b/lib/rust/parser/src/syntax/treebuilding/compound_token.rs new file mode 100644 index 00000000000..1c3c2c19389 --- /dev/null +++ b/lib/rust/parser/src/syntax/treebuilding/compound_token.rs @@ -0,0 +1,161 @@ +use enso_prelude::*; + +use crate::syntax; +use crate::syntax::token; +use crate::syntax::treebuilding::consumer::Finish; +use crate::syntax::treebuilding::consumer::TokenConsumer; +use crate::syntax::treebuilding::consumer::TreeConsumer; +use crate::syntax::Token; + + + +// ================================ +// === Compound token assembler === +// ================================ + +/// Recognizes lexical tokens that are indivisible, and assembles them into trees. +#[derive(Default, Debug)] +pub struct AssembleCompoundTokens<'s, T> { + compounding: Option>, + inner: T, +} + +#[derive(Debug)] +enum CompoundToken<'s> { + TextLiteral(TextLiteralBuilder<'s>), +} + +impl<'s, T: TreeConsumer<'s> + TokenConsumer<'s>> TokenConsumer<'s> + for AssembleCompoundTokens<'s, T> +{ + fn push_token(&mut self, token: Token<'s>) { + match (&mut self.compounding, token.variant) { + (this @ None, token::Variant::TextStart(variant)) => { + let token = token.with_variant(variant); + *this = Some(CompoundToken::TextLiteral(TextLiteralBuilder { + open: token, + newline: default(), + elements: default(), + })); + } + ( + Some(CompoundToken::TextLiteral(TextLiteralBuilder { + newline: newline @ None, + .. + })), + token::Variant::TextInitialNewline(_), + ) => { + let token = token::newline(token.left_offset, token.code); + *newline = Some(token); + } + ( + Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })), + token::Variant::TextSection(variant), + ) => { + let token = token.with_variant(variant); + let element = syntax::tree::TextElement::Section { text: token }; + elements.push(element); + } + ( + Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })), + token::Variant::TextEscape(variant), + ) => { + let token = token.with_variant(variant); + let element = syntax::tree::TextElement::Escape { token }; + elements.push(element); + } + ( + Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })), + token::Variant::TextNewline(_), + ) => { + let token = token::newline(token.left_offset, token.code); + let element = syntax::tree::TextElement::Newline { newline: token }; + elements.push(element); + } + (this @ Some(CompoundToken::TextLiteral(_)), token::Variant::TextEnd(variant)) => { + let builder = match mem::take(this) { + Some(CompoundToken::TextLiteral(builder)) => builder, + _ => unreachable!(), + }; + let close = token.with_variant(variant); + self.inner.push_tree(builder.finish(Some(close))); + } + (_, token::Variant::TextStart(_)) => unreachable!(), + (_, token::Variant::TextInitialNewline(_)) => unreachable!(), + (_, token::Variant::TextSection(_)) => unreachable!(), + (_, token::Variant::TextEscape(_)) => unreachable!(), + (_, token::Variant::TextNewline(_)) => unreachable!(), + (_, token::Variant::TextEnd(_)) => unreachable!(), + _ => self.inner.push_token(token), + } + } +} + +impl<'s, T: TreeConsumer<'s>> TreeConsumer<'s> for AssembleCompoundTokens<'s, T> { + fn push_tree(&mut self, mut tree: syntax::Tree<'s>) { + match (&mut self.compounding, &mut tree.variant) { + ( + Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })), + box syntax::tree::Variant::TextLiteral(syntax::tree::TextLiteral { + open: None, + newline: None, + elements: rhs_elements, + close: None, + }), + ) => { + match rhs_elements.first_mut() { + Some(syntax::tree::TextElement::Splice { open, .. }) => + open.left_offset += tree.span.left_offset, + _ => unreachable!(), + } + elements.append(rhs_elements); + } + _ => { + self.flush(); + self.inner.push_tree(tree); + } + } + } +} + +impl<'s, T: TreeConsumer<'s>> AssembleCompoundTokens<'s, T> { + fn flush(&mut self) { + if let Some(CompoundToken::TextLiteral(builder)) = mem::take(&mut self.compounding) { + self.inner.push_tree(builder.finish(None)) + } + } +} + +impl<'s, T: TreeConsumer<'s> + Finish> Finish for AssembleCompoundTokens<'s, T> { + type Result = T::Result; + + fn finish(&mut self) -> Self::Result { + self.flush(); + self.inner.finish() + } +} + + +// === Text literal builder === + +#[derive(Debug)] +struct TextLiteralBuilder<'s> { + open: token::TextStart<'s>, + newline: Option>, + elements: Vec>, +} + +impl<'s> TextLiteralBuilder<'s> { + fn finish(self, close: Option>) -> syntax::Tree<'s> { + let Self { open, newline, elements } = self; + if open.code.starts_with('#') { + assert_eq!(newline, None); + let doc = syntax::tree::DocComment { open, elements, newlines: default() }; + syntax::Tree::documented(doc, default()) + } else { + let close = + close.and_then(|close| if close.code.is_empty() { None } else { Some(close) }); + syntax::Tree::text_literal(Some(open), newline, elements, close) + } + } +} diff --git a/lib/rust/parser/src/syntax/treebuilding/consumer.rs b/lib/rust/parser/src/syntax/treebuilding/consumer.rs new file mode 100644 index 00000000000..a9e0b8b84a8 --- /dev/null +++ b/lib/rust/parser/src/syntax/treebuilding/consumer.rs @@ -0,0 +1,23 @@ +use crate::syntax::Item; +use crate::syntax::Token; +use crate::syntax::Tree; + + + +pub trait ItemConsumer<'s> { + fn push_item(&mut self, tree: Item<'s>); +} + +pub trait TreeConsumer<'s> { + fn push_tree(&mut self, tree: Tree<'s>); +} + +pub trait TokenConsumer<'s> { + fn push_token(&mut self, token: Token<'s>); +} + +pub trait Finish { + type Result; + + fn finish(&mut self) -> Self::Result; +} diff --git a/lib/rust/parser/src/syntax/treebuilding/whitespace.rs b/lib/rust/parser/src/syntax/treebuilding/whitespace.rs new file mode 100644 index 00000000000..b881df062ab --- /dev/null +++ b/lib/rust/parser/src/syntax/treebuilding/whitespace.rs @@ -0,0 +1,128 @@ +use crate::syntax::token; +use crate::syntax::tree; +use crate::syntax::treebuilding::consumer::Finish; +use crate::syntax::treebuilding::consumer::TokenConsumer; +use crate::syntax::treebuilding::consumer::TreeConsumer; +use crate::syntax::treebuilding::TokenOrTree; +use crate::syntax::Token; +use crate::syntax::Tree; + + + +// =============== +// === Spacing === +// =============== + +/// Whether a term is logically separated from the previous term by whitespace. +#[derive(Debug, Default, PartialEq, Eq, Copy, Clone)] +pub enum Spacing { + #[default] + Spaced, + Unspaced, +} + +impl Spacing { + pub fn of_tree(tree: &Tree) -> Self { + match tree_starts_new_no_space_group(tree) { + false => Spacing::Unspaced, + true => Spacing::Spaced, + } + } + + pub fn of_token<'a: 'b, 'b, T: Into>>(token: T) -> Self { + match token_starts_new_no_space_group(token) { + false => Spacing::Unspaced, + true => Spacing::Spaced, + } + } +} + +// Returns `true` for an item if that item should not follow any other item in a no-space group +// (i.e. the item has "space" before it). +fn token_starts_new_no_space_group<'a: 'b, 'b, T: Into>>(token: T) -> bool { + let token = token.into(); + match &token.data { + token::Variant::Operator(opr) if opr.properties.is_sequence() => true, + _ => token.left_offset.visible.width_in_spaces != 0, + } +} + +fn tree_starts_new_no_space_group(tree: &Tree) -> bool { + tree.span.left_offset.visible.width_in_spaces != 0 + || matches!( + &tree.variant, + box tree::Variant::BodyBlock(_) + | box tree::Variant::OperatorBlockApplication(_) + | box tree::Variant::ArgumentBlockApplication(_) + ) +} + + +// ============================ +// === Whitespace Lookahead === +// ============================ + +pub trait SpacingLookaheadTreeConsumer<'s> { + fn push_tree(&mut self, tree: Tree<'s>, following_spacing: Option); +} + +pub trait SpacingLookaheadTokenConsumer<'s> { + fn push_token(&mut self, token: Token<'s>, following_spacing: Option); +} + +/// Maintains 1-token whitespace lookahead. +#[derive(Debug, Default)] +pub struct PeekSpacing<'s, T> { + current: Option>, + inner: T, +} + +impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> + PeekSpacing<'s, T> +{ + fn emit(&mut self, tt: Option>, rhs: Option) { + match tt { + Some(TokenOrTree::Token(token)) => self.inner.push_token(token, rhs), + Some(TokenOrTree::Tree(tree)) => self.inner.push_tree(tree, rhs), + None => {} + } + } +} + +impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s> + Finish> Finish + for PeekSpacing<'s, T> +{ + type Result = T::Result; + + fn finish(&mut self) -> T::Result { + let last = self.current.take(); + self.emit(last, None); + self.inner.finish() + } +} + +impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> TokenConsumer<'s> + for PeekSpacing<'s, T> +{ + fn push_token(&mut self, token: Token<'s>) { + let rhs = Spacing::of_token(&token); + let next = self.current.replace(token.into()); + self.emit(next, Some(rhs)) + } +} + +impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> TreeConsumer<'s> + for PeekSpacing<'s, T> +{ + fn push_tree(&mut self, tree: Tree<'s>) { + let rhs = Spacing::of_tree(&tree); + let next = self.current.replace(tree.into()); + self.emit(next, Some(rhs)); + } +} + +impl<'s, T: TreeConsumer<'s>> SpacingLookaheadTreeConsumer<'s> for T { + fn push_tree(&mut self, tree: Tree<'s>, _: Option) { + self.push_tree(tree); + } +}