mirror of
https://github.com/enso-org/enso.git
synced 2024-12-22 23:31:42 +03:00
Refactor precedence for whitespace changes (#10569)
Single-phase whitespace-aware precedence resolution. #### Performance ![newplot(4)](https://github.com/user-attachments/assets/9822b0dc-17c3-4d2d-adf7-eb8b1c240522) Since this is a major refactor of the core of the parser, I benchmarked it; it's about 3% faster. # Important Notes - Move operator-identifier recognition to lexer. - Move compound-token assembly out of precedence resolver
This commit is contained in:
parent
22495e0592
commit
4cff789b69
@ -668,20 +668,12 @@ export class App extends Ast {
|
||||
: ensureSpaced(nameSpecification.name, verbatim)
|
||||
yield ensureSpacedOnlyIf(nameSpecification.equals, spacedEquals, verbatim)
|
||||
}
|
||||
yield ensureSpacedOnlyIf(argument, !nameSpecification || spacedEquals, verbatim)
|
||||
// Some syntax trees, including many error conditions, involve unspaced applications.
|
||||
// If a parsed input lacked a space before the argument, reproduce it as-is.
|
||||
const verbatimArgument = true
|
||||
yield ensureSpacedOnlyIf(argument, !nameSpecification || spacedEquals, verbatimArgument)
|
||||
if (useParens) yield preferUnspaced(parens.close)
|
||||
}
|
||||
|
||||
printSubtree(
|
||||
info: SpanMap,
|
||||
offset: number,
|
||||
parentIndent: string | undefined,
|
||||
verbatim?: boolean,
|
||||
): string {
|
||||
const verbatim_ =
|
||||
verbatim ?? (this.function instanceof Invalid || this.argument instanceof Invalid)
|
||||
return super.printSubtree(info, offset, parentIndent, verbatim_)
|
||||
}
|
||||
}
|
||||
function ensureSpacedOnlyIf<T>(
|
||||
child: NodeChild<T>,
|
||||
|
@ -52,12 +52,7 @@ public class ErrorCompilerTest extends CompilerTest {
|
||||
main = Date.new day=-
|
||||
""");
|
||||
|
||||
assertSingleSyntaxError(
|
||||
ir,
|
||||
new Syntax.UnsupportedSyntax("Strange unary -"),
|
||||
"Syntax is not supported yet: Strange unary -",
|
||||
51,
|
||||
52);
|
||||
assertSingleSyntaxError(ir, Syntax.UnrecognizedToken$.MODULE$, "Unrecognized token", 51, 52);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -343,6 +343,9 @@ fn type_methods() {
|
||||
"=" (BodyBlock #((Ident self))))))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
test!("[foo., bar.]",
|
||||
(Array (OprSectionBoundary 1 (OprApp (Ident foo) (Ok ".") ()))
|
||||
#(("," (OprSectionBoundary 1 (OprApp (Ident bar) (Ok ".") ()))))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -365,6 +368,22 @@ fn type_operator_methods() {
|
||||
(Function (OprApp (Ident Foo) (Ok ".") (Ident #"+"))
|
||||
#((() (Ident self) () ()) (() (Ident b) () ())) () "=" (Ident b))))];
|
||||
test(&code.join("\n"), expected);
|
||||
test!("Any.==", (OprApp (Ident Any) (Ok ".") (Ident #"==")));
|
||||
expect_invalid_node("x.-y");
|
||||
expect_invalid_node("x.-1");
|
||||
expect_invalid_node("x.+y");
|
||||
expect_invalid_node("x.+1");
|
||||
expect_invalid_node("x.+'a'");
|
||||
// Compile-time operators are never operator-identifiers.
|
||||
test!("x.~y", (OprApp (Ident x) (Ok ".") (UnaryOprApp "~" (Ident y))));
|
||||
test!("x.~1", (OprApp (Ident x) (Ok ".") (UnaryOprApp "~" (Number () "1" ()))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unspaced_app() {
|
||||
test!("js_set_zone arr.at(0)", (App (Ident js_set_zone)
|
||||
(App (OprApp (Ident arr) (Ok ".") (Ident at))
|
||||
(Group (Number () "0" ())))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -727,16 +746,13 @@ fn first_line_indented() {
|
||||
|
||||
#[test]
|
||||
fn multiple_operator_error() {
|
||||
let code = ["x + + x"];
|
||||
let expected = block![
|
||||
(OprApp (Ident x) (Err (#("+" "+"))) (Ident x))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
let code = ["x + + + x"];
|
||||
let expected = block![
|
||||
(OprApp (Ident x) (Err (#("+" "+" "+"))) (Ident x))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
expect_multiple_operator_error("x + + x");
|
||||
expect_multiple_operator_error("x + + + x");
|
||||
expect_multiple_operator_error("x + +");
|
||||
expect_multiple_operator_error("+ + x");
|
||||
expect_multiple_operator_error("+ +");
|
||||
expect_multiple_operator_error("+ -");
|
||||
expect_multiple_operator_error("x + -");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -779,12 +795,9 @@ fn pipeline_operators() {
|
||||
#[test]
|
||||
fn accessor_operator() {
|
||||
// Test that the accessor operator `.` is treated like any other operator.
|
||||
let cases = [
|
||||
("Console.", block![(OprSectionBoundary 1 (OprApp (Ident Console) (Ok ".") ()))]),
|
||||
(".", block![(OprSectionBoundary 2 (OprApp () (Ok ".") ()))]),
|
||||
(".log", block![(OprSectionBoundary 1 (OprApp () (Ok ".") (Ident log)))]),
|
||||
];
|
||||
cases.into_iter().for_each(|(code, expected)| test(code, expected));
|
||||
test!("Console.", (OprSectionBoundary 1 (OprApp (Ident Console) (Ok ".") ())));
|
||||
test!(".", (OprSectionBoundary 2 (OprApp () (Ok ".") ())));
|
||||
test!(".log", (OprSectionBoundary 1 (OprApp () (Ok ".") (Ident log))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -808,6 +821,21 @@ fn operator_sections() {
|
||||
test("increment = 1 +", block![
|
||||
(Assignment (Ident increment) "="
|
||||
(OprSectionBoundary 1 (OprApp (Number () "1" ()) (Ok "+") ())))]);
|
||||
test!("1+ << 2*",
|
||||
(OprSectionBoundary 1
|
||||
(OprApp (OprApp (Number () "1" ()) (Ok "+") ())
|
||||
(Ok "<<")
|
||||
(OprSectionBoundary 1 (OprApp (Number () "2" ()) (Ok "*") ())))));
|
||||
test!("+1 << *2",
|
||||
(OprSectionBoundary 1
|
||||
(OprApp (OprApp () (Ok "+") (Number () "1" ()))
|
||||
(Ok "<<")
|
||||
(OprSectionBoundary 1 (OprApp () (Ok "*") (Number () "2" ()))))));
|
||||
test!("+1+1 << *2*2",
|
||||
(OprSectionBoundary 1
|
||||
(OprApp (OprApp (OprApp () (Ok "+") (Number () "1" ())) (Ok "+") (Number () "1" ()))
|
||||
(Ok "<<")
|
||||
(OprSectionBoundary 1 (OprApp (OprApp () (Ok "*") (Number () "2" ())) (Ok "*") (Number () "2" ()))))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -873,13 +901,8 @@ fn unspaced_operator_sequence() {
|
||||
|
||||
#[test]
|
||||
fn minus_binary() {
|
||||
let cases = [
|
||||
("x - x", block![(OprApp (Ident x) (Ok "-") (Ident x))]),
|
||||
("x-x", block![(OprApp (Ident x) (Ok "-") (Ident x))]),
|
||||
("x.-y", block![(OprApp (Ident x) (Ok ".") (UnaryOprApp "-" (Ident y)))]),
|
||||
("x.~y", block![(OprApp (Ident x) (Ok ".") (UnaryOprApp "~" (Ident y)))]),
|
||||
];
|
||||
cases.into_iter().for_each(|(code, expected)| test(code, expected));
|
||||
test!("x - x", (OprApp (Ident x) (Ok "-") (Ident x)));
|
||||
test!("x-x", (OprApp (Ident x) (Ok "-") (Ident x)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -939,6 +962,8 @@ fn autoscope_operator() {
|
||||
expect_invalid_node("x = f(.. ..)");
|
||||
expect_invalid_node("x = f(.. *)");
|
||||
expect_invalid_node("x = f(.. True)");
|
||||
expect_invalid_node("x = True..");
|
||||
expect_invalid_node("x = True..True");
|
||||
expect_multiple_operator_error("x = ..");
|
||||
expect_multiple_operator_error("x = .. True");
|
||||
expect_multiple_operator_error("x : .. True");
|
||||
@ -1231,6 +1256,7 @@ fn old_lambdas() {
|
||||
test("x -> y", block![(OprApp (Ident x) (Ok "->") (Ident y))]);
|
||||
test("x->y", block![(OprApp (Ident x) (Ok "->") (Ident y))]);
|
||||
test("x-> y", block![(OprApp (Ident x) (Ok "->") (Ident y))]);
|
||||
test("x-> x + y", block![(OprApp (Ident x) (Ok "->") (OprApp (Ident x) (Ok "+") (Ident y)))]);
|
||||
test("x->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]);
|
||||
test("x ->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]);
|
||||
test("f x->\n y", block![
|
||||
@ -1815,9 +1841,8 @@ struct Errors {
|
||||
}
|
||||
|
||||
impl Errors {
|
||||
fn collect(code: &str) -> Self {
|
||||
let ast = parse(code);
|
||||
expect_tree_representing_code(code, &ast);
|
||||
fn collect(ast: &enso_parser::syntax::Tree, code: &str) -> Self {
|
||||
expect_tree_representing_code(code, ast);
|
||||
let errors = core::cell::Cell::new(Errors::default());
|
||||
ast.visit_trees(|tree| match &*tree.variant {
|
||||
enso_parser::syntax::tree::Variant::Invalid(_) => {
|
||||
@ -1834,18 +1859,22 @@ impl Errors {
|
||||
|
||||
/// Checks that an input contains an `Invalid` node somewhere.
|
||||
fn expect_invalid_node(code: &str) {
|
||||
let errors = Errors::collect(code);
|
||||
assert!(errors.invalid_node, "{:?}", enso_parser::Parser::new().run(code));
|
||||
let ast = enso_parser::Parser::new().run(code);
|
||||
let errors = Errors::collect(&ast, code);
|
||||
assert!(errors.invalid_node, "{}", to_s_expr(&ast, code));
|
||||
}
|
||||
|
||||
/// Checks that an input contains a multiple-operator error somewhere.
|
||||
fn expect_multiple_operator_error(code: &str) {
|
||||
let errors = Errors::collect(code);
|
||||
assert!(errors.multiple_operator, "{:?}", enso_parser::Parser::new().run(code));
|
||||
let ast = enso_parser::Parser::new().run(code);
|
||||
let errors = Errors::collect(&ast, code);
|
||||
assert!(errors.multiple_operator || errors.invalid_node, "{}", to_s_expr(&ast, code));
|
||||
assert!(errors.multiple_operator, "{:?}", ast);
|
||||
}
|
||||
|
||||
/// Check that the input can be parsed, and doesn't yield any `Invalid` nodes.
|
||||
fn expect_valid(code: &str) {
|
||||
let errors = Errors::collect(code);
|
||||
let ast = enso_parser::Parser::new().run(code);
|
||||
let errors = Errors::collect(&ast, code);
|
||||
assert!(!errors.invalid_node);
|
||||
}
|
||||
|
@ -44,7 +44,7 @@ trait Pattern {
|
||||
impl<T: FnMut(char) -> bool> Pattern for T {
|
||||
#[inline(always)]
|
||||
fn match_pattern(&mut self, input: char) -> bool {
|
||||
(self)(input)
|
||||
self(input)
|
||||
}
|
||||
}
|
||||
|
||||
@ -236,6 +236,12 @@ impl<'s> Lexer<'s> {
|
||||
self.output.push(token);
|
||||
}
|
||||
|
||||
/// Push the [`tokens`] to the result stream.
|
||||
#[inline(always)]
|
||||
fn submit_tokens<T: IntoIterator<Item = Token<'s>>>(&mut self, tokens: T) {
|
||||
self.output.extend(tokens);
|
||||
}
|
||||
|
||||
/// Start a new block.
|
||||
#[inline(always)]
|
||||
fn start_block(&mut self, new_indent: VisibleOffset) {
|
||||
@ -600,6 +606,9 @@ impl<'s> Lexer<'s> {
|
||||
this.take_while_1(is_ident_char);
|
||||
}
|
||||
}) {
|
||||
if token.left_offset.is_empty() {
|
||||
self.unspaced_term();
|
||||
}
|
||||
let tp = token::Variant::new_ident_or_wildcard_unchecked(&token.code);
|
||||
let token = token.with_variant(tp);
|
||||
self.submit_token(token);
|
||||
@ -672,6 +681,17 @@ impl<'s> Lexer<'s> {
|
||||
let token = token.with_variant(token::Variant::operator(opr));
|
||||
self.submit_token(token);
|
||||
}
|
||||
// Operator-identifiers.
|
||||
_ if self.prev_token_is_dot_operator() => {
|
||||
let properties = analyze_operator(&token.code);
|
||||
if properties.is_compile_time_operation() {
|
||||
self.submit_token(token.with_variant(token::Variant::operator(properties)));
|
||||
} else {
|
||||
self.submit_token(
|
||||
token.with_variant(token::Variant::operator_ident().into()),
|
||||
);
|
||||
}
|
||||
}
|
||||
// The unary-negation operator binds tighter to numeric literals than other
|
||||
// expressions.
|
||||
"-" if self.last_spaces_visible_offset.width_in_spaces == 0
|
||||
@ -693,6 +713,28 @@ impl<'s> Lexer<'s> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn prev_token_is_dot_operator(&self) -> bool {
|
||||
match self.output.last() {
|
||||
Some(Token { variant: token::Variant::Operator(operator), .. }) =>
|
||||
operator.properties.is_dot(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn unspaced_term(&mut self) {
|
||||
if let Some(Token {
|
||||
variant:
|
||||
variant @ token::Variant::Ident(token::variant::Ident {
|
||||
is_operator_lexically: true,
|
||||
..
|
||||
}),
|
||||
..
|
||||
}) = self.output.last_mut()
|
||||
{
|
||||
*variant = token::Variant::invalid();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -881,6 +923,9 @@ impl<'s> Lexer<'s> {
|
||||
}
|
||||
});
|
||||
if let Some(token) = token {
|
||||
if token.left_offset.is_empty() {
|
||||
self.unspaced_term();
|
||||
}
|
||||
if let Some(base) = base {
|
||||
self.submit_token(token.with_variant(token::Variant::number_base()));
|
||||
let after_base = self.current_offset;
|
||||
@ -933,6 +978,9 @@ impl<'s> Lexer<'s> {
|
||||
}
|
||||
_ => return,
|
||||
};
|
||||
if self.last_spaces_visible_offset == VisibleOffset(0) {
|
||||
self.unspaced_term();
|
||||
}
|
||||
let indent = self.current_block_indent;
|
||||
let open_quote_start = self.mark();
|
||||
self.take_next();
|
||||
@ -963,17 +1011,17 @@ impl<'s> Lexer<'s> {
|
||||
close_quote_start.clone(),
|
||||
token::Variant::text_start(),
|
||||
);
|
||||
self.output.push(token);
|
||||
self.submit_token(token);
|
||||
let token =
|
||||
self.make_token(close_quote_start, close_quote_end, token::Variant::text_end());
|
||||
self.output.push(token);
|
||||
self.submit_token(token);
|
||||
}
|
||||
} else {
|
||||
// One quote followed by non-quote character: Inline quote.
|
||||
let open_quote_end = self.mark_without_whitespace();
|
||||
let token =
|
||||
self.make_token(open_quote_start, open_quote_end, token::Variant::text_start());
|
||||
self.output.push(token);
|
||||
self.submit_token(token);
|
||||
self.inline_quote(quote_char, text_type);
|
||||
}
|
||||
self.spaces_after_lexeme();
|
||||
@ -987,12 +1035,12 @@ impl<'s> Lexer<'s> {
|
||||
) {
|
||||
let open_quote_end = self.mark_without_whitespace();
|
||||
let token = self.make_token(open_quote_start, open_quote_end, token::Variant::text_start());
|
||||
self.output.push(token);
|
||||
self.submit_token(token);
|
||||
let mut initial_indent = None;
|
||||
if text_type.expects_initial_newline()
|
||||
&& let Some(newline) = self.line_break()
|
||||
{
|
||||
self.output.push(newline.with_variant(token::Variant::text_initial_newline()));
|
||||
self.submit_token(newline.with_variant(token::Variant::text_initial_newline()));
|
||||
if self.last_spaces_visible_offset > block_indent {
|
||||
initial_indent = self.last_spaces_visible_offset.into();
|
||||
}
|
||||
@ -1014,7 +1062,7 @@ impl<'s> Lexer<'s> {
|
||||
let splice_quote_end = self.mark_without_whitespace();
|
||||
let token =
|
||||
self.make_token(splice_quote_start, splice_quote_end, token::Variant::close_symbol());
|
||||
self.output.push(token);
|
||||
self.submit_token(token);
|
||||
match state {
|
||||
State::InlineText => self.inline_quote('\'', TextType::Interpolated),
|
||||
State::MultilineText { .. } => {
|
||||
@ -1061,8 +1109,8 @@ impl<'s> Lexer<'s> {
|
||||
);
|
||||
// If `token.code.is_empty()`, we ignore the `token.left_offset` here even if
|
||||
// it is non-empty, because it will be attached to the newline token.
|
||||
if !(token.code.is_empty()) {
|
||||
self.output.push(token);
|
||||
if !token.code.is_empty() {
|
||||
self.submit_token(token);
|
||||
} else {
|
||||
before_newline = text_start;
|
||||
}
|
||||
@ -1097,9 +1145,9 @@ impl<'s> Lexer<'s> {
|
||||
let offset = Offset(VisibleOffset(0), location.clone());
|
||||
Token(offset, location, token::Variant::text_end())
|
||||
};
|
||||
self.output.push(text_end);
|
||||
self.submit_token(text_end);
|
||||
self.end_blocks(indent, newlines.first().as_ref().unwrap());
|
||||
self.output.extend(newlines);
|
||||
self.submit_tokens(newlines);
|
||||
if self.current_offset == text_start.location {
|
||||
self.last_spaces_visible_offset = text_start.offset.visible;
|
||||
self.last_spaces_offset = text_start.offset.code.range().start;
|
||||
@ -1109,7 +1157,7 @@ impl<'s> Lexer<'s> {
|
||||
let newlines = newlines
|
||||
.into_iter()
|
||||
.map(|token| token.with_variant(token::Variant::text_newline()));
|
||||
self.output.extend(newlines);
|
||||
self.submit_tokens(newlines);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -1125,7 +1173,7 @@ impl<'s> Lexer<'s> {
|
||||
if token.code.is_empty() {
|
||||
backslash_start = text_start.clone();
|
||||
} else {
|
||||
self.output.push(token);
|
||||
self.submit_token(token);
|
||||
}
|
||||
self.last_spaces_offset = self.current_offset;
|
||||
text_start = self.text_escape(backslash_start, char);
|
||||
@ -1144,7 +1192,7 @@ impl<'s> Lexer<'s> {
|
||||
if token.code.is_empty() {
|
||||
splice_quote_start = text_start;
|
||||
} else {
|
||||
self.output.push(token);
|
||||
self.submit_token(token);
|
||||
}
|
||||
self.take_next();
|
||||
let splice_quote_end = self.mark_without_whitespace();
|
||||
@ -1153,7 +1201,7 @@ impl<'s> Lexer<'s> {
|
||||
splice_quote_end,
|
||||
token::Variant::open_symbol(),
|
||||
);
|
||||
self.output.push(token);
|
||||
self.submit_token(token);
|
||||
self.stack.push(state);
|
||||
self.last_spaces_offset = self.current_offset;
|
||||
return TextEndedAt::Splice;
|
||||
@ -1163,7 +1211,7 @@ impl<'s> Lexer<'s> {
|
||||
let text_end = self.mark_without_whitespace();
|
||||
let token = self.make_token(text_start, text_end.clone(), token::Variant::text_section());
|
||||
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
|
||||
self.output.push(token);
|
||||
self.submit_token(token);
|
||||
}
|
||||
let end_token = if self.current_char == closing_char {
|
||||
self.take_next();
|
||||
@ -1175,7 +1223,7 @@ impl<'s> Lexer<'s> {
|
||||
Code::empty(self.current_offset),
|
||||
))
|
||||
};
|
||||
self.output.push(end_token);
|
||||
self.submit_token(end_token);
|
||||
TextEndedAt::End
|
||||
}
|
||||
|
||||
@ -1213,7 +1261,7 @@ impl<'s> Lexer<'s> {
|
||||
sequence_end.clone(),
|
||||
token::Variant::text_escape(value.map(Codepoint::from_u32).unwrap_or_default()),
|
||||
);
|
||||
self.output.push(token);
|
||||
self.submit_token(token);
|
||||
sequence_end
|
||||
} else {
|
||||
let value = match char {
|
||||
@ -1239,7 +1287,7 @@ impl<'s> Lexer<'s> {
|
||||
escape_end.clone(),
|
||||
token::Variant::text_escape(value.map(Codepoint::from_char).unwrap_or_default()),
|
||||
);
|
||||
self.output.push(token);
|
||||
self.submit_token(token);
|
||||
escape_end
|
||||
}
|
||||
}
|
||||
@ -1486,7 +1534,7 @@ pub fn run(input: &'_ str) -> ParseResult<Vec<Token<'_>>> {
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
/// Test utils for fast mock tokens creation.
|
||||
/// Test utils for fast mock token creation.
|
||||
pub mod test {
|
||||
use super::*;
|
||||
pub use token::*;
|
||||
|
@ -726,7 +726,7 @@ fn splice_body<'s>(
|
||||
let expression = segment.result.tokens();
|
||||
let expression = precedence.resolve(expression);
|
||||
let splice = syntax::tree::TextElement::Splice { open, expression, close };
|
||||
syntax::Tree::text_literal(default(), default(), vec![splice], default(), default())
|
||||
syntax::Tree::text_literal(default(), default(), vec![splice], default())
|
||||
}
|
||||
|
||||
fn foreign<'s>() -> Definition<'s> {
|
||||
|
@ -11,6 +11,10 @@ pub mod operator;
|
||||
pub mod token;
|
||||
pub mod tree;
|
||||
|
||||
|
||||
|
||||
mod treebuilding;
|
||||
|
||||
pub use item::Item;
|
||||
pub use token::Token;
|
||||
pub use tree::Tree;
|
||||
|
@ -1,11 +1,29 @@
|
||||
//! Operator related functionalities.
|
||||
|
||||
|
||||
|
||||
mod application;
|
||||
mod arity;
|
||||
mod operand;
|
||||
mod reducer;
|
||||
mod types;
|
||||
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::syntax;
|
||||
use crate::syntax::operator::application::InsertApps;
|
||||
use crate::syntax::operator::arity::ClassifyArity;
|
||||
use crate::syntax::operator::operand::Operand;
|
||||
use crate::syntax::operator::reducer::Reduce;
|
||||
use crate::syntax::operator::types::Arity;
|
||||
use crate::syntax::operator::types::BinaryOperand;
|
||||
use crate::syntax::operator::types::ModifiedPrecedence;
|
||||
use crate::syntax::operator::types::Operator;
|
||||
use crate::syntax::token;
|
||||
use crate::syntax::token::Token;
|
||||
|
||||
use crate::syntax::treebuilding;
|
||||
use crate::syntax::treebuilding::Finish;
|
||||
use crate::syntax::treebuilding::ItemConsumer;
|
||||
use crate::syntax::Tree;
|
||||
|
||||
|
||||
// ==================
|
||||
@ -13,28 +31,28 @@ use crate::syntax::token::Token;
|
||||
// ==================
|
||||
|
||||
/// Operator precedence resolver.
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Precedence<'s> {
|
||||
nospace_builder: ExpressionBuilder<'s>,
|
||||
builder: ExpressionBuilder<'s>,
|
||||
/// Parses child blocks. Stores no semantic state, but is reused for performance.
|
||||
child: Option<Box<Precedence<'s>>>,
|
||||
}
|
||||
|
||||
impl<'s> Default for Precedence<'s> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
#[rustfmt::skip]
|
||||
resolver:
|
||||
// Items -> Tokens/Trees
|
||||
treebuilding::FlattenBlockTrees<'s,
|
||||
// Tokens/Trees -> Tokens/Trees (proper tokens only)
|
||||
treebuilding::AssembleCompoundTokens<'s,
|
||||
// Tokens/Trees -> Tokens/Trees + Spacing-lookahead
|
||||
treebuilding::PeekSpacing<'s,
|
||||
// Tokens/Trees + Spacing-lookahead -> Operators/Operands
|
||||
ClassifyArity<'s,
|
||||
// Operators/Operands -> Operators/Operands (balanced)
|
||||
InsertApps<
|
||||
// Operators/Operands -> Tree
|
||||
Reduce<'s>>>>>>,
|
||||
}
|
||||
|
||||
impl<'s> Precedence<'s> {
|
||||
/// Return a new operator precedence resolver.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
nospace_builder: ExpressionBuilder { nospace: true, ..default() },
|
||||
builder: ExpressionBuilder { nospace: false, ..default() },
|
||||
child: default(),
|
||||
}
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Resolve precedence in a context where the result cannot be an operator section or template
|
||||
@ -42,50 +60,28 @@ impl<'s> Precedence<'s> {
|
||||
pub fn resolve_non_section(
|
||||
&mut self,
|
||||
items: impl IntoIterator<Item = syntax::Item<'s>>,
|
||||
) -> Option<syntax::Tree<'s>> {
|
||||
) -> Option<Tree<'s>> {
|
||||
items.into_iter().for_each(|i| self.push(i));
|
||||
self.finish_().map(|op| op.value)
|
||||
self.resolver.finish().map(|op| op.value)
|
||||
}
|
||||
|
||||
/// Resolve precedence.
|
||||
pub fn resolve(
|
||||
&mut self,
|
||||
items: impl IntoIterator<Item = syntax::Item<'s>>,
|
||||
) -> Option<syntax::Tree<'s>> {
|
||||
items.into_iter().for_each(|i| self.push(i));
|
||||
) -> Option<Tree<'s>> {
|
||||
self.extend(items);
|
||||
self.finish()
|
||||
}
|
||||
|
||||
/// Extend the expression with a token.
|
||||
pub fn push(&mut self, item: syntax::Item<'s>) {
|
||||
if starts_new_no_space_group(&item) {
|
||||
self.builder.extend_from(&mut self.nospace_builder);
|
||||
}
|
||||
match item {
|
||||
syntax::Item::Token(Token {
|
||||
variant: token::Variant::Operator(opr),
|
||||
left_offset,
|
||||
code,
|
||||
}) => self.nospace_builder.operator(Token(left_offset, code, opr)),
|
||||
syntax::Item::Token(token) =>
|
||||
self.nospace_builder.operand(syntax::tree::to_ast(token).into()),
|
||||
syntax::Item::Tree(tree) => self.nospace_builder.operand(tree.into()),
|
||||
syntax::Item::Block(lines) => {
|
||||
let mut child = self.child.take().unwrap_or_default();
|
||||
self.nospace_builder.operand(syntax::item::build_block(lines, &mut child).into());
|
||||
self.child = Some(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn finish_(&mut self) -> Option<Operand<syntax::Tree<'s>>> {
|
||||
self.builder.extend_from(&mut self.nospace_builder);
|
||||
self.builder.finish()
|
||||
self.resolver.push_item(item);
|
||||
}
|
||||
|
||||
/// Return the result.
|
||||
pub fn finish(&mut self) -> Option<syntax::Tree<'s>> {
|
||||
self.finish_().map(syntax::Tree::from)
|
||||
pub fn finish(&mut self) -> Option<Tree<'s>> {
|
||||
self.resolver.finish().map(Tree::from)
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,251 +93,46 @@ impl<'s> Extend<syntax::Item<'s>> for Precedence<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
// Returns `true` for an item if that item should not follow any other item in a no-space group
|
||||
// (i.e. the item has "space" before it).
|
||||
fn starts_new_no_space_group(item: &syntax::item::Item) -> bool {
|
||||
if item.left_visible_offset().width_in_spaces != 0 {
|
||||
return true;
|
||||
}
|
||||
if let syntax::item::Item::Block(_) = item {
|
||||
return true;
|
||||
}
|
||||
if let syntax::item::Item::Token(Token { variant: token::Variant::Operator(opr), .. }) = item
|
||||
&& opr.properties.is_sequence()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
false
|
||||
|
||||
// === Operator or Operand ===
|
||||
|
||||
#[derive(Debug)]
|
||||
enum OperatorOrOperand<'s> {
|
||||
Operand(Operand<Tree<'s>>),
|
||||
Operator(Operator<'s>),
|
||||
}
|
||||
|
||||
|
||||
// === Expression builder ===
|
||||
|
||||
/// Stack machine that builds an expression from syntax nodes.
|
||||
///
|
||||
/// The operator-precedence algorithm[1] used is based on the shunting yard algorithm[2], extended
|
||||
/// to support *operator sections*, function application, and unary operators, and correctly report
|
||||
/// errors relating to consecutive operators.
|
||||
///
|
||||
/// [^1](https://en.wikipedia.org/wiki/Operator-precedence_parser)
|
||||
/// [^2](https://en.wikipedia.org/wiki/Shunting_yard_algorithm)
|
||||
#[derive(Default, Debug, PartialEq, Eq)]
|
||||
struct ExpressionBuilder<'s> {
|
||||
output: Vec<Operand<syntax::Tree<'s>>>,
|
||||
operator_stack: Vec<Operator<'s>>,
|
||||
prev_type: Option<ItemType>,
|
||||
nospace: bool,
|
||||
}
|
||||
|
||||
impl<'s> ExpressionBuilder<'s> {
|
||||
/// Extend the expression with an operand.
|
||||
pub fn operand(&mut self, operand: Operand<syntax::Tree<'s>>) {
|
||||
if self.prev_type == Some(ItemType::Ast) {
|
||||
if let Some(Operand {
|
||||
value:
|
||||
syntax::Tree {
|
||||
variant: box syntax::tree::Variant::TextLiteral(ref mut lhs),
|
||||
span: lhs_span,
|
||||
},
|
||||
..
|
||||
}) = self.output.last_mut()
|
||||
&& !lhs.closed
|
||||
&& let box syntax::tree::Variant::TextLiteral(mut rhs) = operand.value.variant
|
||||
{
|
||||
syntax::tree::join_text_literals(lhs, &mut rhs, lhs_span, operand.value.span);
|
||||
if let syntax::tree::TextLiteral {
|
||||
open: Some(open),
|
||||
newline: None,
|
||||
elements,
|
||||
closed: true,
|
||||
close: None,
|
||||
} = lhs
|
||||
&& open.code.starts_with('#')
|
||||
{
|
||||
let elements = mem::take(elements);
|
||||
let mut open = open.clone();
|
||||
let lhs_tree = self.output.pop().unwrap().value;
|
||||
open.left_offset += lhs_tree.span.left_offset;
|
||||
let doc = syntax::tree::DocComment { open, elements, newlines: default() };
|
||||
self.output.push(syntax::Tree::documented(doc, default()).into());
|
||||
}
|
||||
return;
|
||||
}
|
||||
self.application();
|
||||
}
|
||||
self.output.push(operand);
|
||||
self.prev_type = Some(ItemType::Ast);
|
||||
}
|
||||
|
||||
fn application(&mut self) {
|
||||
let precedence = token::Precedence::application();
|
||||
let associativity = token::Associativity::Left;
|
||||
let arity = Arity::Binary {
|
||||
tokens: default(),
|
||||
lhs_section_termination: default(),
|
||||
};
|
||||
self.push_operator(precedence, associativity, arity);
|
||||
}
|
||||
|
||||
/// Extend the expression with an operator.
|
||||
pub fn operator(&mut self, opr: token::Operator<'s>) {
|
||||
use ItemType::*;
|
||||
let assoc = opr.properties.associativity();
|
||||
match (
|
||||
self.nospace,
|
||||
opr.properties.binary_infix_precedence(),
|
||||
opr.properties.unary_prefix_precedence(),
|
||||
) {
|
||||
// If an operator has a binary role, and a LHS is available, it's acting as binary.
|
||||
(_, Some(prec), _) if self.prev_type == Some(Ast) =>
|
||||
self.binary_operator(prec, assoc, opr),
|
||||
// Otherwise, if the operator is inside a nospace group, and it has a unary role,
|
||||
// it's acting as unary.
|
||||
(true, _, Some(prec)) => self.unary_operator(prec, assoc, Unary::Simple(opr)),
|
||||
// Outside of a nospace group, a unary-only operator is missing an operand.
|
||||
(false, None, Some(_)) => self.unary_operator_section(opr),
|
||||
// Binary operator section (no LHS).
|
||||
(_, Some(prec), _) => self.binary_operator(prec, assoc, opr),
|
||||
// Failed to compute a role for the operator; this should not be possible.
|
||||
(_, None, None) => unreachable!(),
|
||||
impl<'s> From<Operand<Tree<'s>>> for OperatorOrOperand<'s> {
|
||||
fn from(operand: Operand<Tree<'s>>) -> Self {
|
||||
OperatorOrOperand::Operand(operand)
|
||||
}
|
||||
}
|
||||
|
||||
fn unary_operator(
|
||||
&mut self,
|
||||
prec: token::Precedence,
|
||||
assoc: token::Associativity,
|
||||
mut arity: Unary<'s>,
|
||||
) {
|
||||
if self.prev_type == Some(ItemType::Opr)
|
||||
&& let Some(prev_opr) = self.operator_stack.last_mut()
|
||||
&& let Arity::Binary { tokens, .. } = &mut prev_opr.opr
|
||||
&& !self.nospace
|
||||
&& let Unary::Simple(opr) = arity
|
||||
{
|
||||
tokens.push(opr);
|
||||
return;
|
||||
}
|
||||
if self.prev_type == Some(ItemType::Ast) {
|
||||
self.application();
|
||||
if self.nospace {
|
||||
if let Unary::Simple(token) = arity {
|
||||
let error = "Space required between term and unary-operator expression.".into();
|
||||
arity = Unary::Invalid { token, error };
|
||||
}
|
||||
}
|
||||
}
|
||||
self.push_operator(prec, assoc, Arity::Unary(arity));
|
||||
}
|
||||
|
||||
fn unary_operator_section(&mut self, opr: token::Operator<'s>) {
|
||||
if self.prev_type == Some(ItemType::Opr)
|
||||
&& let Some(prev_opr) = self.operator_stack.last_mut()
|
||||
&& let Arity::Binary { tokens, .. } = &mut prev_opr.opr
|
||||
{
|
||||
// Multiple-operator error.
|
||||
tokens.push(opr);
|
||||
} else {
|
||||
self.operand(Operand {
|
||||
elided: 1,
|
||||
..Operand::from(syntax::tree::apply_unary_operator(opr, None))
|
||||
});
|
||||
impl<'s> From<Operator<'s>> for OperatorOrOperand<'s> {
|
||||
fn from(operator: Operator<'s>) -> Self {
|
||||
OperatorOrOperand::Operator(operator)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extend the expression with a binary operator, by pushing it to the `operator_stack` or
|
||||
/// emitting a multiple-operator error.
|
||||
fn binary_operator(
|
||||
&mut self,
|
||||
prec: token::Precedence,
|
||||
assoc: token::Associativity,
|
||||
opr: token::Operator<'s>,
|
||||
) {
|
||||
if self.prev_type == Some(ItemType::Opr)
|
||||
&& let Some(prev_opr) = self.operator_stack.last_mut()
|
||||
&& let Arity::Binary { tokens, .. } = &mut prev_opr.opr
|
||||
{
|
||||
if tokens.len() == 1 && tokens[0].properties.is_dot() {
|
||||
let Token { left_offset, code, .. } = opr;
|
||||
let is_operator = true;
|
||||
let opr_ident = token::ident(
|
||||
left_offset,
|
||||
code,
|
||||
default(),
|
||||
default(),
|
||||
default(),
|
||||
is_operator,
|
||||
default(),
|
||||
);
|
||||
self.output.push(Operand::from(syntax::Tree::ident(opr_ident)));
|
||||
self.prev_type = Some(ItemType::Ast);
|
||||
return;
|
||||
}
|
||||
tokens.push(opr);
|
||||
return;
|
||||
}
|
||||
self.push_operator(prec, assoc, Arity::binary(opr));
|
||||
}
|
||||
|
||||
/// Add an operator to the stack; [`reduce`] the stack first, as appropriate for the specified
|
||||
/// precedence.
|
||||
fn push_operator(
|
||||
&mut self,
|
||||
precedence: token::Precedence,
|
||||
associativity: token::Associativity,
|
||||
opr: Arity<'s>,
|
||||
) {
|
||||
let opr = Operator { precedence, associativity, opr };
|
||||
// When a unary operator follows another operator, we defer reducing the stack because a
|
||||
// unary operator's affinity for its operand is stronger than any operator precedence.
|
||||
let defer_reducing_stack = match (&self.prev_type, &opr.opr) {
|
||||
(Some(ItemType::Opr), Arity::Unary(Unary::Simple(_))) if self.nospace => true,
|
||||
(Some(ItemType::Opr), Arity::Unary(Unary::Fragment { .. })) => true,
|
||||
_ => false,
|
||||
};
|
||||
if !defer_reducing_stack {
|
||||
let mut rhs = self.output.pop();
|
||||
self.reduce(precedence, &mut rhs);
|
||||
if let Some(rhs) = rhs {
|
||||
self.output.push(rhs);
|
||||
}
|
||||
}
|
||||
self.operator_stack.push(opr);
|
||||
self.prev_type = Some(ItemType::Opr);
|
||||
}
|
||||
// === Applying operators ===
|
||||
|
||||
/// Given a starting value, replace it with the result of successively applying to it all
|
||||
/// operators in the `operator_stack` that have precedence greater than or equal to the
|
||||
/// specified value, consuming LHS values from the `output` stack as needed.
|
||||
fn reduce(&mut self, prec: token::Precedence, rhs: &mut Option<Operand<syntax::Tree<'s>>>) {
|
||||
while let Some(opr) = self.operator_stack.pop_if(|opr| {
|
||||
opr.precedence > prec
|
||||
|| (opr.precedence == prec && opr.associativity == token::Associativity::Left)
|
||||
}) {
|
||||
let rhs_ = rhs.take();
|
||||
let ast = match opr.opr {
|
||||
Arity::Unary(Unary::Simple(opr)) =>
|
||||
Operand::new(rhs_).map(|item| syntax::tree::apply_unary_operator(opr, item)),
|
||||
Arity::Unary(Unary::Invalid { token, error }) => Operand::from(rhs_)
|
||||
.map(|item| syntax::tree::apply_unary_operator(token, item).with_error(error)),
|
||||
Arity::Unary(Unary::Fragment { mut fragment }) => {
|
||||
if let Some(rhs_) = rhs_ {
|
||||
fragment.operand(rhs_);
|
||||
}
|
||||
fragment.finish().unwrap()
|
||||
}
|
||||
Arity::Binary { tokens, lhs_section_termination } => {
|
||||
let lhs = self.output.pop();
|
||||
fn apply_operator<'s>(
|
||||
tokens: Vec<token::Operator<'s>>,
|
||||
lhs_section_termination: Option<SectionTermination>,
|
||||
reify_rhs_section: bool,
|
||||
lhs: Option<Operand<Tree<'s>>>,
|
||||
rhs_: Option<Operand<Tree<'s>>>,
|
||||
) -> Operand<Tree<'s>> {
|
||||
if let Some(lhs_termination) = lhs_section_termination {
|
||||
let lhs = match lhs_termination {
|
||||
SectionTermination::Reify => lhs.map(syntax::Tree::from),
|
||||
SectionTermination::Reify => lhs.map(Tree::from),
|
||||
SectionTermination::Unwrap => lhs.map(|op| op.value),
|
||||
};
|
||||
let rhs = rhs_.map(syntax::Tree::from);
|
||||
let rhs = rhs_.map(Tree::from);
|
||||
let ast = syntax::tree::apply_operator(lhs, tokens, rhs);
|
||||
Operand::from(ast)
|
||||
} else if self.nospace
|
||||
&& tokens.len() < 2
|
||||
} else if tokens.len() < 2
|
||||
&& let Some(opr) = tokens.first()
|
||||
&& opr.properties.can_form_section()
|
||||
{
|
||||
@ -349,234 +140,54 @@ impl<'s> ExpressionBuilder<'s> {
|
||||
let mut elided = 0;
|
||||
let mut wildcards = 0;
|
||||
if let Some(rhs_) = rhs_ {
|
||||
if reify_rhs_section {
|
||||
rhs = Some(Tree::from(rhs_));
|
||||
} else {
|
||||
rhs = Some(rhs_.value);
|
||||
elided += rhs_.elided;
|
||||
wildcards += rhs_.wildcards;
|
||||
}
|
||||
}
|
||||
elided += lhs.is_none() as u32 + rhs.is_none() as u32;
|
||||
let mut operand = Operand::from(lhs)
|
||||
.map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs));
|
||||
let mut operand =
|
||||
Operand::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs));
|
||||
operand.elided += elided;
|
||||
operand.wildcards += wildcards;
|
||||
operand
|
||||
} else {
|
||||
let rhs = rhs_.map(syntax::Tree::from);
|
||||
let rhs = rhs_.map(Tree::from);
|
||||
let mut elided = 0;
|
||||
if tokens.len() != 1 || tokens[0].properties.can_form_section() {
|
||||
elided += lhs.is_none() as u32 + rhs.is_none() as u32;
|
||||
}
|
||||
let mut operand = Operand::from(lhs)
|
||||
.map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs));
|
||||
let mut operand =
|
||||
Operand::from(lhs).map(|lhs| syntax::tree::apply_operator(lhs, tokens, rhs));
|
||||
operand.elided += elided;
|
||||
operand
|
||||
}
|
||||
}
|
||||
};
|
||||
*rhs = Some(ast);
|
||||
|
||||
fn apply_unary_operator<'s>(
|
||||
token: token::Operator<'s>,
|
||||
rhs: Option<Operand<Tree<'s>>>,
|
||||
error: Option<Cow<'static, str>>,
|
||||
) -> Operand<Tree<'s>> {
|
||||
match error {
|
||||
None => Operand::new(rhs).map(|item| syntax::tree::apply_unary_operator(token, item)),
|
||||
Some(error) => Operand::from(rhs)
|
||||
.map(|item| syntax::tree::apply_unary_operator(token, item).with_error(error)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an expression constructed from the accumulated state. Will return `None` only if no
|
||||
/// inputs were provided. `self` will be reset to its initial state.
|
||||
pub fn finish(&mut self) -> Option<Operand<syntax::Tree<'s>>> {
|
||||
use ItemType::*;
|
||||
let mut out = (self.prev_type == Some(Ast)).and_option_from(|| self.output.pop());
|
||||
self.reduce(token::Precedence::min(), &mut out);
|
||||
debug_assert!(self.operator_stack.is_empty());
|
||||
debug_assert_eq!(
|
||||
&self.output,
|
||||
&[],
|
||||
"Internal error. Not all tokens were consumed while constructing the expression."
|
||||
);
|
||||
self.prev_type = None;
|
||||
out
|
||||
|
||||
// === Operator and Operand Consumers ===
|
||||
|
||||
trait OperandConsumer<'s> {
|
||||
fn push_operand(&mut self, operand: Operand<Tree<'s>>);
|
||||
}
|
||||
|
||||
/// Extend the expression with the contents of a [`Self`] built from a subexpression that
|
||||
/// contains no spaces.
|
||||
pub fn extend_from(&mut self, child: &mut Self) {
|
||||
if child.output.is_empty() {
|
||||
// If the unspaced subexpression doesn't contain any non-operators, promote each
|
||||
// operator in the (unspaced) child to an operator in the (spaced) parent.
|
||||
//
|
||||
// The case where `child.operator_stack.len() > 1` is subtle:
|
||||
//
|
||||
// A sequence of operator characters without intervening whitespace is lexed as multiple
|
||||
// operators in some cases where the last character is `-`.
|
||||
//
|
||||
// In such a case, an unspaced expression-builder will:
|
||||
// 1. Push the first operator to the operator stack (composed of all the operator
|
||||
// characters except the trailing `-`).
|
||||
// 2. Push `-` to the operator stack, without reducing the expression (because the `-`
|
||||
// should be interpreted as a unary operator if a value follows it within the
|
||||
// unspaced subexpression).
|
||||
//
|
||||
// Thus, if we encounter an unspaced subexpression consisting only of multiple
|
||||
// operators: When we append each operator to the parent (spaced) expression-builder, it
|
||||
// will be reinterpreted in a *spaced* context. In a spaced context, the sequence of
|
||||
// operators will cause a multiple-operator error.
|
||||
for op in child.operator_stack.drain(..) {
|
||||
match op.opr {
|
||||
Arity::Unary(Unary::Simple(un)) => self.operator(un),
|
||||
Arity::Unary(Unary::Invalid { .. }) => unreachable!(),
|
||||
Arity::Unary(Unary::Fragment { .. }) => unreachable!(),
|
||||
Arity::Binary { tokens, .. } =>
|
||||
tokens.into_iter().for_each(|op| self.operator(op)),
|
||||
}
|
||||
}
|
||||
child.prev_type = None;
|
||||
return;
|
||||
}
|
||||
if child.prev_type == Some(ItemType::Opr)
|
||||
&& let Arity::Binary { tokens, .. } = &child.operator_stack.last().unwrap().opr
|
||||
&& let Some(token) = tokens.last()
|
||||
&& token.properties.is_arrow()
|
||||
{
|
||||
let precedence = token::Precedence::min_valid();
|
||||
let associativity = token::Associativity::Right;
|
||||
let fragment = ExpressionBuilder {
|
||||
output: mem::take(&mut child.output),
|
||||
operator_stack: mem::take(&mut child.operator_stack),
|
||||
prev_type: mem::take(&mut child.prev_type),
|
||||
nospace: child.nospace,
|
||||
};
|
||||
let arity = Unary::Fragment { fragment };
|
||||
self.unary_operator(precedence, associativity, arity);
|
||||
return;
|
||||
}
|
||||
if let Some(o) = child.finish() {
|
||||
self.operand(o);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Classify an item as an operator, or operand; this is used in [`Precedence::resolve`] to
|
||||
/// merge consecutive nodes of the same type.
|
||||
#[derive(PartialEq, Eq, Debug)]
|
||||
enum ItemType {
|
||||
Ast,
|
||||
Opr,
|
||||
}
|
||||
|
||||
|
||||
// === Operator ===
|
||||
|
||||
/// An operator, whose arity and precedence have been determined.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct Operator<'s> {
|
||||
precedence: token::Precedence,
|
||||
associativity: token::Associativity,
|
||||
opr: Arity<'s>,
|
||||
}
|
||||
|
||||
/// Classifies the role of an operator.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum Arity<'s> {
|
||||
Unary(Unary<'s>),
|
||||
Binary {
|
||||
tokens: Vec<token::Operator<'s>>,
|
||||
lhs_section_termination: Option<SectionTermination>,
|
||||
},
|
||||
}
|
||||
|
||||
impl<'s> Arity<'s> {
|
||||
fn binary(tok: token::Operator<'s>) -> Self {
|
||||
let lhs_section_termination = tok.properties.lhs_section_termination();
|
||||
let tokens = vec![tok];
|
||||
Self::Binary { tokens, lhs_section_termination }
|
||||
}
|
||||
|
||||
fn unary(tok: token::Operator<'s>) -> Self {
|
||||
Self::Unary(Unary::Simple(tok))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum Unary<'s> {
|
||||
Simple(token::Operator<'s>),
|
||||
Invalid { token: token::Operator<'s>, error: Cow<'static, str> },
|
||||
Fragment { fragment: ExpressionBuilder<'s> },
|
||||
}
|
||||
|
||||
|
||||
// === Operand ===
|
||||
|
||||
/// Wraps a value, tracking the number of wildcards or elided operands within it.
|
||||
#[derive(Default, Debug, PartialEq, Eq)]
|
||||
struct Operand<T> {
|
||||
value: T,
|
||||
/// Number of elided operands in the subtree, potentially forming an *operator section*.
|
||||
elided: u32,
|
||||
/// Number of wildcards in the subtree, potentially forming a *template function*.
|
||||
wildcards: u32,
|
||||
}
|
||||
|
||||
/// Transpose. Note that an absent input will not be treated as an elided value; for that
|
||||
/// conversion, use [`Operand::new`].
|
||||
impl<T> From<Option<Operand<T>>> for Operand<Option<T>> {
|
||||
fn from(operand: Option<Operand<T>>) -> Self {
|
||||
match operand {
|
||||
Some(Operand { value, elided, wildcards }) =>
|
||||
Self { value: Some(value), elided, wildcards },
|
||||
None => default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Unit. Creates an Operand from a node.
|
||||
impl<'s> From<syntax::Tree<'s>> for Operand<syntax::Tree<'s>> {
|
||||
fn from(mut value: syntax::Tree<'s>) -> Self {
|
||||
let elided = 0;
|
||||
let wildcards = if let syntax::Tree {
|
||||
variant:
|
||||
box syntax::tree::Variant::Wildcard(syntax::tree::Wildcard { de_bruijn_index, .. }),
|
||||
..
|
||||
} = &mut value
|
||||
{
|
||||
debug_assert_eq!(*de_bruijn_index, None);
|
||||
*de_bruijn_index = Some(0);
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
Self { value, wildcards, elided }
|
||||
}
|
||||
}
|
||||
|
||||
/// Counit. Bakes any information about elided operands into the tree.
|
||||
impl<'s> From<Operand<syntax::Tree<'s>>> for syntax::Tree<'s> {
|
||||
fn from(operand: Operand<syntax::Tree<'s>>) -> Self {
|
||||
let Operand { mut value, elided, wildcards } = operand;
|
||||
if elided != 0 {
|
||||
value = syntax::Tree::opr_section_boundary(elided, value);
|
||||
}
|
||||
if wildcards != 0 {
|
||||
value = syntax::Tree::template_function(wildcards, value);
|
||||
}
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Operand<Option<T>> {
|
||||
/// Lift an option value to a potentially-elided operand.
|
||||
fn new(value: Option<Operand<T>>) -> Self {
|
||||
match value {
|
||||
None => Self { value: None, elided: 1, wildcards: default() },
|
||||
Some(value) => {
|
||||
let Operand { value, elided, wildcards } = value;
|
||||
Self { value: Some(value), elided, wildcards }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Operand<T> {
|
||||
/// Operate on the contained value without altering the elided-operand information.
|
||||
fn map<U>(self, f: impl FnOnce(T) -> U) -> Operand<U> {
|
||||
let Self { value, elided, wildcards } = self;
|
||||
let value = f(value);
|
||||
Operand { value, elided, wildcards }
|
||||
}
|
||||
trait OperatorConsumer<'s> {
|
||||
fn push_operator(&mut self, operator: Operator<'s>);
|
||||
}
|
||||
|
||||
|
||||
@ -584,17 +195,12 @@ impl<T> Operand<T> {
|
||||
|
||||
/// Operator-section/template-function termination behavior of an operator with regard to an
|
||||
/// operand.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
|
||||
pub enum SectionTermination {
|
||||
/// If the operand is an operator-section/template-function, indicate it by wrapping it in a
|
||||
/// suitable node.
|
||||
#[default]
|
||||
Reify,
|
||||
/// Discard any operator-section/template-function properties associated with the operand.
|
||||
Unwrap,
|
||||
}
|
||||
|
||||
impl Default for SectionTermination {
|
||||
fn default() -> Self {
|
||||
Self::Reify
|
||||
}
|
||||
}
|
||||
|
79
lib/rust/parser/src/syntax/operator/application.rs
Normal file
79
lib/rust/parser/src/syntax/operator/application.rs
Normal file
@ -0,0 +1,79 @@
|
||||
use enso_prelude::*;
|
||||
|
||||
use crate::syntax::operator::operand::Operand;
|
||||
use crate::syntax::operator::types::Arity;
|
||||
use crate::syntax::operator::types::BinaryOperand;
|
||||
use crate::syntax::operator::types::ModifiedPrecedence;
|
||||
use crate::syntax::operator::types::Operator;
|
||||
use crate::syntax::operator::OperandConsumer;
|
||||
use crate::syntax::operator::OperatorConsumer;
|
||||
use crate::syntax::token;
|
||||
use crate::syntax::treebuilding::Finish;
|
||||
use crate::syntax::treebuilding::Spacing;
|
||||
use crate::syntax::Tree;
|
||||
|
||||
|
||||
|
||||
// ===================
|
||||
// === Insert Apps ===
|
||||
// ===================
|
||||
|
||||
/// Inserts applications between terms as needed.
|
||||
#[derive(Default, Debug)]
|
||||
pub struct InsertApps<Inner> {
|
||||
prev_applicable: bool,
|
||||
inner: Inner,
|
||||
}
|
||||
|
||||
impl<'s, Inner: OperatorConsumer<'s> + OperandConsumer<'s>> OperandConsumer<'s>
|
||||
for InsertApps<Inner>
|
||||
{
|
||||
fn push_operand(&mut self, operand: Operand<Tree<'s>>) {
|
||||
if mem::replace(&mut self.prev_applicable, true) {
|
||||
self.inner.push_operator(application(Spacing::of_tree(&operand.value)));
|
||||
}
|
||||
self.inner.push_operand(operand)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, Inner: OperatorConsumer<'s>> OperatorConsumer<'s> for InsertApps<Inner> {
|
||||
fn push_operator(&mut self, operator: Operator<'s>) {
|
||||
let prev_applicable = mem::replace(
|
||||
&mut self.prev_applicable,
|
||||
matches!(operator.arity, Arity::Binary { missing: Some(BinaryOperand::Right), .. }),
|
||||
);
|
||||
if prev_applicable
|
||||
&& matches!(
|
||||
operator.arity,
|
||||
Arity::Unary { .. } | Arity::Binary { missing: Some(BinaryOperand::Left), .. }
|
||||
)
|
||||
{
|
||||
self.inner.push_operator(application(Spacing::Spaced));
|
||||
}
|
||||
self.inner.push_operator(operator)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Inner: Finish> Finish for InsertApps<Inner> {
|
||||
type Result = Inner::Result;
|
||||
|
||||
fn finish(&mut self) -> Self::Result {
|
||||
self.prev_applicable = false;
|
||||
self.inner.finish()
|
||||
}
|
||||
}
|
||||
|
||||
fn application<'s>(spacing: Spacing) -> Operator<'s> {
|
||||
let precedence = ModifiedPrecedence { spacing, precedence: token::Precedence::application() };
|
||||
Operator {
|
||||
left_precedence: Some(precedence),
|
||||
right_precedence: precedence,
|
||||
associativity: token::Associativity::Left,
|
||||
arity: Arity::Binary {
|
||||
tokens: default(),
|
||||
lhs_section_termination: default(),
|
||||
missing: None,
|
||||
reify_rhs_section: true,
|
||||
},
|
||||
}
|
||||
}
|
213
lib/rust/parser/src/syntax/operator/arity.rs
Normal file
213
lib/rust/parser/src/syntax/operator/arity.rs
Normal file
@ -0,0 +1,213 @@
|
||||
use enso_prelude::*;
|
||||
|
||||
use crate::syntax::operator::apply_operator;
|
||||
use crate::syntax::operator::apply_unary_operator;
|
||||
use crate::syntax::operator::operand::Operand;
|
||||
use crate::syntax::operator::types::Arity;
|
||||
use crate::syntax::operator::types::BinaryOperand;
|
||||
use crate::syntax::operator::types::ModifiedPrecedence;
|
||||
use crate::syntax::operator::types::Operator;
|
||||
use crate::syntax::operator::OperandConsumer;
|
||||
use crate::syntax::operator::OperatorConsumer;
|
||||
use crate::syntax::operator::OperatorOrOperand;
|
||||
use crate::syntax::token;
|
||||
use crate::syntax::tree;
|
||||
use crate::syntax::treebuilding::Finish;
|
||||
use crate::syntax::treebuilding::Spacing;
|
||||
use crate::syntax::treebuilding::SpacingLookaheadTokenConsumer;
|
||||
use crate::syntax::treebuilding::TreeConsumer;
|
||||
use crate::syntax::Token;
|
||||
use crate::syntax::Tree;
|
||||
|
||||
|
||||
|
||||
// ======================
|
||||
// === Classify Arity ===
|
||||
// ======================
|
||||
|
||||
/// Determines the number of operands consumed by each term.
|
||||
#[derive(Default, Debug)]
|
||||
pub struct ClassifyArity<'s, Inner> {
|
||||
/// Next item that will be emitted. If it is an operator, it may still be extended with
|
||||
/// additional operators to become a multiple-operator error.
|
||||
lhs_item: Option<OperatorOrOperand<'s>>,
|
||||
inner: Inner,
|
||||
}
|
||||
|
||||
impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> SpacingLookaheadTokenConsumer<'s>
|
||||
for ClassifyArity<'s, Inner>
|
||||
{
|
||||
fn push_token(&mut self, tt: Token<'s>, rhs: Option<Spacing>) {
|
||||
match tt {
|
||||
Token { variant: token::Variant::Operator(opr), left_offset, code } =>
|
||||
self.operator(Token(left_offset, code, opr), rhs),
|
||||
token => self.push_tree(tree::to_ast(token)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> TreeConsumer<'s>
|
||||
for ClassifyArity<'s, Inner>
|
||||
{
|
||||
fn push_tree(&mut self, tree: Tree<'s>) {
|
||||
self.emit(Operand::from(tree))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s> + Finish> Finish
|
||||
for ClassifyArity<'s, Inner>
|
||||
{
|
||||
type Result = Inner::Result;
|
||||
|
||||
fn finish(&mut self) -> Self::Result {
|
||||
self.step(None);
|
||||
self.inner.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, Inner: OperandConsumer<'s> + OperatorConsumer<'s>> ClassifyArity<'s, Inner> {
|
||||
fn emit<T: Into<OperatorOrOperand<'s>>>(&mut self, item: T) {
|
||||
self.step(Some(item.into()));
|
||||
}
|
||||
|
||||
fn step(&mut self, item: Option<OperatorOrOperand<'s>>) {
|
||||
match mem::replace(&mut self.lhs_item, item) {
|
||||
Some(OperatorOrOperand::Operand(item)) => self.inner.push_operand(item),
|
||||
Some(OperatorOrOperand::Operator(item)) => self.inner.push_operator(item),
|
||||
None => (),
|
||||
}
|
||||
}
|
||||
|
||||
fn operator(&mut self, token: token::Operator<'s>, rhs: Option<Spacing>) {
|
||||
let properties = &token.variant.properties;
|
||||
let lhs = match self.lhs_item {
|
||||
Some(
|
||||
OperatorOrOperand::Operand(_)
|
||||
| OperatorOrOperand::Operator(Operator {
|
||||
arity: Arity::Binary { missing: Some(BinaryOperand::Right), .. },
|
||||
..
|
||||
}),
|
||||
) => Some(Spacing::of_token(&token)),
|
||||
_ => None,
|
||||
};
|
||||
// Asymmetric whitespace creates operator sections.
|
||||
// Exception: If an operator cannot form sections, and its LHS is unspaced, a spaced RHS is
|
||||
// accepted.
|
||||
let (lhs, rhs) = match (properties.can_form_section(), lhs, rhs) {
|
||||
(true, Some(Spacing::Unspaced), Some(Spacing::Spaced)) =>
|
||||
(Some(Spacing::Unspaced), None),
|
||||
(_, Some(Spacing::Spaced), Some(Spacing::Unspaced)) => (None, Some(Spacing::Unspaced)),
|
||||
(_, lhs, rhs) => (lhs, rhs),
|
||||
};
|
||||
let assoc = properties.associativity();
|
||||
let binary = properties.binary_infix_precedence();
|
||||
let unary = properties.unary_prefix_precedence();
|
||||
match (binary, unary, lhs, rhs) {
|
||||
(_, Some(unary), None, Some(Spacing::Unspaced)) =>
|
||||
self.unary_operator_applied(unary, assoc, token),
|
||||
(Some(binary), _, _, _) => self.binary_operator(binary, assoc, token, lhs, rhs),
|
||||
(_, Some(_), _, _) => self.unary_operator_section(token, rhs),
|
||||
(None, None, _, _) => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn unary_operator_applied(
|
||||
&mut self,
|
||||
precedence: token::Precedence,
|
||||
associativity: token::Associativity,
|
||||
token: token::Operator<'s>,
|
||||
) {
|
||||
let error = match self.lhs_item {
|
||||
Some(OperatorOrOperand::Operand(_))
|
||||
if token.left_offset.visible.width_in_spaces == 0 =>
|
||||
Some("Space required between term and unary-operator expression.".into()),
|
||||
_ => None,
|
||||
};
|
||||
self.emit(Operator {
|
||||
left_precedence: None,
|
||||
right_precedence: ModifiedPrecedence { spacing: Spacing::Unspaced, precedence },
|
||||
associativity,
|
||||
arity: Arity::Unary { token, error },
|
||||
});
|
||||
}
|
||||
|
||||
fn unary_operator_section(&mut self, token: token::Operator<'s>, rhs: Option<Spacing>) {
|
||||
match &mut self.lhs_item {
|
||||
Some(OperatorOrOperand::Operator(Operator {
|
||||
arity: Arity::Binary { tokens, .. },
|
||||
..
|
||||
})) if !(tokens.first().unwrap().left_offset.visible.width_in_spaces == 0
|
||||
&& token.left_offset.visible.width_in_spaces == 0) =>
|
||||
self.multiple_operator_error(token, rhs),
|
||||
_ => self.emit(apply_unary_operator(token, None, None)),
|
||||
}
|
||||
}
|
||||
|
||||
fn binary_operator(
|
||||
&mut self,
|
||||
precedence: token::Precedence,
|
||||
associativity: token::Associativity,
|
||||
token: token::Operator<'s>,
|
||||
lhs: Option<Spacing>,
|
||||
rhs: Option<Spacing>,
|
||||
) {
|
||||
if let Some(OperatorOrOperand::Operator(Operator {
|
||||
arity: Arity::Binary { missing: None | Some(BinaryOperand::Left), .. },
|
||||
..
|
||||
})) = &self.lhs_item
|
||||
&& !matches!(rhs, Some(Spacing::Unspaced))
|
||||
{
|
||||
self.multiple_operator_error(token, rhs);
|
||||
return;
|
||||
}
|
||||
let lhs_section_termination = token.properties.lhs_section_termination();
|
||||
let missing = match (lhs, rhs) {
|
||||
(None, None) => {
|
||||
self.emit(apply_operator(vec![token], lhs_section_termination, false, None, None));
|
||||
return;
|
||||
}
|
||||
(Some(_), None) => Some(BinaryOperand::Right),
|
||||
(None, Some(_)) => Some(BinaryOperand::Left),
|
||||
(Some(_), Some(_)) => None,
|
||||
};
|
||||
let reify_rhs_section = token.properties.can_form_section()
|
||||
&& (lhs == Some(Spacing::Spaced) || rhs == Some(Spacing::Spaced));
|
||||
self.emit(Operator {
|
||||
left_precedence: lhs.map(|spacing| ModifiedPrecedence { spacing, precedence }),
|
||||
right_precedence: ModifiedPrecedence { spacing: rhs.or(lhs).unwrap(), precedence },
|
||||
associativity,
|
||||
arity: Arity::Binary {
|
||||
tokens: vec![token],
|
||||
lhs_section_termination,
|
||||
missing,
|
||||
reify_rhs_section,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
fn multiple_operator_error(&mut self, token: token::Operator<'s>, rhs: Option<Spacing>) {
|
||||
match &mut self.lhs_item {
|
||||
Some(OperatorOrOperand::Operator(Operator {
|
||||
arity: Arity::Binary { tokens, lhs_section_termination, missing, reify_rhs_section },
|
||||
..
|
||||
})) => {
|
||||
tokens.push(token);
|
||||
if rhs.is_none() {
|
||||
match missing {
|
||||
None => *missing = Some(BinaryOperand::Right),
|
||||
Some(BinaryOperand::Left) =>
|
||||
self.lhs_item = Some(OperatorOrOperand::Operand(apply_operator(
|
||||
mem::take(tokens),
|
||||
*lhs_section_termination,
|
||||
*reify_rhs_section,
|
||||
None,
|
||||
None,
|
||||
))),
|
||||
Some(BinaryOperand::Right) => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
87
lib/rust/parser/src/syntax/operator/operand.rs
Normal file
87
lib/rust/parser/src/syntax/operator/operand.rs
Normal file
@ -0,0 +1,87 @@
|
||||
use crate::syntax::tree;
|
||||
use crate::syntax::Tree;
|
||||
|
||||
use enso_prelude::default;
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Operand ===
|
||||
// ===============
|
||||
|
||||
/// Wraps a value, tracking the number of wildcards or elided operands within it.
|
||||
#[derive(Default, Debug, PartialEq, Eq)]
|
||||
pub struct Operand<T> {
|
||||
pub value: T,
|
||||
/// Number of elided operands in the subtree, potentially forming an *operator section*.
|
||||
pub elided: u32,
|
||||
/// Number of wildcards in the subtree, potentially forming a *template function*.
|
||||
pub wildcards: u32,
|
||||
}
|
||||
|
||||
/// Transpose. Note that an absent input will not be treated as an elided value; for that
|
||||
/// conversion, use [`Operand::new`].
|
||||
impl<T> From<Option<Operand<T>>> for Operand<Option<T>> {
|
||||
fn from(operand: Option<Operand<T>>) -> Self {
|
||||
match operand {
|
||||
Some(Operand { value, elided, wildcards }) =>
|
||||
Self { value: Some(value), elided, wildcards },
|
||||
None => default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Unit. Creates an Operand from a node.
|
||||
impl<'s> From<Tree<'s>> for Operand<Tree<'s>> {
|
||||
fn from(mut value: Tree<'s>) -> Self {
|
||||
let elided = 0;
|
||||
let wildcards = if let Tree {
|
||||
variant: box tree::Variant::Wildcard(tree::Wildcard { de_bruijn_index, .. }),
|
||||
..
|
||||
} = &mut value
|
||||
{
|
||||
debug_assert_eq!(*de_bruijn_index, None);
|
||||
*de_bruijn_index = Some(0);
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
Self { value, wildcards, elided }
|
||||
}
|
||||
}
|
||||
|
||||
/// Counit. Bakes any information about elided operands into the tree.
|
||||
impl<'s> From<Operand<Tree<'s>>> for Tree<'s> {
|
||||
fn from(operand: Operand<Tree<'s>>) -> Self {
|
||||
let Operand { mut value, elided, wildcards } = operand;
|
||||
if elided != 0 {
|
||||
value = Tree::opr_section_boundary(elided, value);
|
||||
}
|
||||
if wildcards != 0 {
|
||||
value = Tree::template_function(wildcards, value);
|
||||
}
|
||||
value
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Operand<Option<T>> {
|
||||
/// Lift an option value to a potentially-elided operand.
|
||||
pub fn new(value: Option<Operand<T>>) -> Self {
|
||||
match value {
|
||||
None => Self { value: None, elided: 1, wildcards: default() },
|
||||
Some(value) => {
|
||||
let Operand { value, elided, wildcards } = value;
|
||||
Self { value: Some(value), elided, wildcards }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Operand<T> {
|
||||
/// Operate on the contained value without altering the elided-operand information.
|
||||
pub fn map<U>(self, f: impl FnOnce(T) -> U) -> Operand<U> {
|
||||
let Self { value, elided, wildcards } = self;
|
||||
let value = f(value);
|
||||
Operand { value, elided, wildcards }
|
||||
}
|
||||
}
|
113
lib/rust/parser/src/syntax/operator/reducer.rs
Normal file
113
lib/rust/parser/src/syntax/operator/reducer.rs
Normal file
@ -0,0 +1,113 @@
|
||||
use crate::syntax::operator::apply_operator;
|
||||
use crate::syntax::operator::apply_unary_operator;
|
||||
use crate::syntax::operator::Arity;
|
||||
use crate::syntax::operator::BinaryOperand;
|
||||
use crate::syntax::operator::ModifiedPrecedence;
|
||||
use crate::syntax::operator::Operand;
|
||||
use crate::syntax::operator::OperandConsumer;
|
||||
use crate::syntax::operator::Operator;
|
||||
use crate::syntax::operator::OperatorConsumer;
|
||||
use crate::syntax::token;
|
||||
use crate::syntax::treebuilding::Finish;
|
||||
use crate::syntax::treebuilding::Spacing;
|
||||
use crate::syntax::Tree;
|
||||
|
||||
use enso_prelude::VecOps;
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Reducer ===
|
||||
// ===============
|
||||
|
||||
/// Stack machine that builds an expression from syntax nodes.
|
||||
///
|
||||
/// The operator-precedence algorithm[1] used is based on the shunting yard algorithm[2], extended
|
||||
/// to support *operator sections*, function application, and unary operators, and correctly report
|
||||
/// errors relating to consecutive operators.
|
||||
///
|
||||
/// [^1](https://en.wikipedia.org/wiki/Operator-precedence_parser)
|
||||
/// [^2](https://en.wikipedia.org/wiki/Shunting_yard_algorithm)
|
||||
#[derive(Default, Debug)]
|
||||
pub struct Reduce<'s> {
|
||||
output: Vec<Operand<Tree<'s>>>,
|
||||
operator_stack: Vec<Operator<'s>>,
|
||||
}
|
||||
|
||||
impl<'s> OperandConsumer<'s> for Reduce<'s> {
|
||||
fn push_operand(&mut self, operand: Operand<Tree<'s>>) {
|
||||
self.output.push(operand)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> OperatorConsumer<'s> for Reduce<'s> {
|
||||
fn push_operator(&mut self, operator: Operator<'s>) {
|
||||
if let Some(precedence) = operator.left_precedence {
|
||||
self.reduce(precedence);
|
||||
}
|
||||
self.operator_stack.push(operator);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Finish for Reduce<'s> {
|
||||
type Result = Option<Operand<Tree<'s>>>;
|
||||
|
||||
fn finish(&mut self) -> Self::Result {
|
||||
self.reduce(ModifiedPrecedence {
|
||||
spacing: Spacing::Spaced,
|
||||
precedence: token::Precedence::min(),
|
||||
});
|
||||
let out = self.output.pop();
|
||||
debug_assert!(self.operator_stack.is_empty());
|
||||
debug_assert_eq!(
|
||||
&self.output,
|
||||
&[],
|
||||
"Internal error. Not all tokens were consumed while constructing the expression."
|
||||
);
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Reduce<'s> {
|
||||
/// Given a starting value, replace it with the result of successively applying to it all
|
||||
/// operators in the `operator_stack` that have precedence greater than or equal to the
|
||||
/// specified value, consuming LHS values from the `output` stack as needed.
|
||||
fn reduce(&mut self, prec: ModifiedPrecedence) {
|
||||
let mut rhs = self.output.pop();
|
||||
while let Some(opr) = self.operator_stack.pop_if(|opr| {
|
||||
opr.right_precedence > prec
|
||||
|| (opr.right_precedence == prec && opr.associativity == token::Associativity::Left)
|
||||
}) {
|
||||
match opr.arity {
|
||||
Arity::Unary { token, error } => {
|
||||
let rhs_ = rhs.take();
|
||||
debug_assert_ne!(rhs_, None);
|
||||
rhs = Some(apply_unary_operator(token, rhs_, error));
|
||||
}
|
||||
Arity::Binary { tokens, lhs_section_termination, missing, reify_rhs_section } => {
|
||||
let operand = rhs.take();
|
||||
debug_assert_ne!(operand, None);
|
||||
let (lhs, rhs_) = match missing {
|
||||
Some(BinaryOperand::Left) => (None, operand),
|
||||
Some(BinaryOperand::Right) => (operand, None),
|
||||
None => {
|
||||
let lhs = self.output.pop();
|
||||
debug_assert_ne!(lhs, None);
|
||||
(lhs, operand)
|
||||
}
|
||||
};
|
||||
rhs = Some(apply_operator(
|
||||
tokens,
|
||||
lhs_section_termination,
|
||||
reify_rhs_section,
|
||||
lhs,
|
||||
rhs_,
|
||||
));
|
||||
}
|
||||
};
|
||||
}
|
||||
if let Some(rhs) = rhs {
|
||||
self.output.push(rhs);
|
||||
}
|
||||
}
|
||||
}
|
73
lib/rust/parser/src/syntax/operator/types.rs
Normal file
73
lib/rust/parser/src/syntax/operator/types.rs
Normal file
@ -0,0 +1,73 @@
|
||||
use crate::syntax::operator::SectionTermination;
|
||||
use crate::syntax::token;
|
||||
use crate::syntax::treebuilding::Spacing;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
|
||||
|
||||
// ================
|
||||
// === Operator ===
|
||||
// ================
|
||||
|
||||
/// An operator, whose arity and precedence have been determined.
|
||||
#[derive(Debug)]
|
||||
pub struct Operator<'s> {
|
||||
pub left_precedence: Option<ModifiedPrecedence>,
|
||||
pub right_precedence: ModifiedPrecedence,
|
||||
pub associativity: token::Associativity,
|
||||
pub arity: Arity<'s>,
|
||||
}
|
||||
|
||||
|
||||
// === Arity ===
|
||||
|
||||
/// Classifies the role of an operator.
|
||||
#[derive(Debug)]
|
||||
pub enum Arity<'s> {
|
||||
Unary {
|
||||
token: token::Operator<'s>,
|
||||
error: Option<Cow<'static, str>>,
|
||||
},
|
||||
Binary {
|
||||
tokens: Vec<token::Operator<'s>>,
|
||||
lhs_section_termination: Option<SectionTermination>,
|
||||
missing: Option<BinaryOperand>,
|
||||
reify_rhs_section: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl<'s> Arity<'s> {
|
||||
fn unary(token: token::Operator<'s>) -> Self {
|
||||
Self::Unary { token, error: None }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Binary operand ===
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BinaryOperand {
|
||||
Left,
|
||||
Right,
|
||||
}
|
||||
|
||||
|
||||
// === Modified precedence ===
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub struct ModifiedPrecedence {
|
||||
pub spacing: Spacing,
|
||||
pub precedence: token::Precedence,
|
||||
}
|
||||
|
||||
impl PartialOrd for ModifiedPrecedence {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
match (self.spacing, other.spacing) {
|
||||
(Spacing::Spaced, Spacing::Unspaced) => Some(Ordering::Less),
|
||||
(Spacing::Unspaced, Spacing::Spaced) => Some(Ordering::Greater),
|
||||
_ => self.precedence.partial_cmp(&other.precedence),
|
||||
}
|
||||
}
|
||||
}
|
@ -305,6 +305,11 @@ impl Variant {
|
||||
| Variant::Invalid(_)
|
||||
)
|
||||
}
|
||||
|
||||
/// Return a token variant for an identifier composed of operator characters.
|
||||
pub fn operator_ident() -> variant::Ident {
|
||||
variant::Ident(false, 0, false, true, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Variant {
|
||||
@ -366,6 +371,11 @@ impl OperatorProperties {
|
||||
Self { is_compile_time_operation: true, ..self }
|
||||
}
|
||||
|
||||
/// Return whether this operator is flagged as a compile time operation.
|
||||
pub fn is_compile_time_operation(&self) -> bool {
|
||||
self.is_compile_time_operation
|
||||
}
|
||||
|
||||
/// Return a copy of this operator, modified to be flagged as right associative.
|
||||
pub fn as_right_associative(self) -> Self {
|
||||
Self { is_right_associative: true, ..self }
|
||||
|
@ -131,9 +131,6 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
pub newline: Option<token::Newline<'s>>,
|
||||
pub elements: Vec<TextElement<'s>>,
|
||||
pub close: Option<token::TextEnd<'s>>,
|
||||
#[serde(skip)]
|
||||
#[reflect(skip)]
|
||||
pub closed: bool,
|
||||
},
|
||||
/// A simple application, like `print "hello"`.
|
||||
App {
|
||||
@ -841,29 +838,6 @@ fn maybe_apply<'s>(f: Option<Tree<'s>>, x: Tree<'s>) -> Tree<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Join two text literals, merging contents as appropriate to each field.
|
||||
pub fn join_text_literals<'s>(
|
||||
lhs: &mut TextLiteral<'s>,
|
||||
rhs: &mut TextLiteral<'s>,
|
||||
lhs_span: &mut Span<'s>,
|
||||
rhs_span: Span<'s>,
|
||||
) {
|
||||
lhs_span.code_length += rhs_span.length_including_whitespace();
|
||||
match rhs.elements.first_mut() {
|
||||
Some(TextElement::Section { text }) => text.left_offset += rhs_span.left_offset,
|
||||
Some(TextElement::Escape { token }) => token.left_offset += rhs_span.left_offset,
|
||||
Some(TextElement::Splice { open, .. }) => open.left_offset += rhs_span.left_offset,
|
||||
Some(TextElement::Newline { newline }) => newline.left_offset += rhs_span.left_offset,
|
||||
None => (),
|
||||
}
|
||||
if let Some(newline) = rhs.newline.take() {
|
||||
lhs.newline = newline.into();
|
||||
}
|
||||
lhs.elements.append(&mut rhs.elements);
|
||||
lhs.close = rhs.close.take();
|
||||
lhs.closed = rhs.closed;
|
||||
}
|
||||
|
||||
/// Join two nodes with an operator, in a way appropriate for their types.
|
||||
///
|
||||
/// For most operands this will simply construct an `OprApp`; however, a non-operator block (i.e. an
|
||||
@ -1006,28 +980,6 @@ pub fn to_ast(token: Token) -> Tree {
|
||||
Tree::number(None, Some(token.with_variant(number)), None),
|
||||
token::Variant::NumberBase(base) =>
|
||||
Tree::number(Some(token.with_variant(base)), None, None),
|
||||
token::Variant::TextStart(open) =>
|
||||
Tree::text_literal(Some(token.with_variant(open)), default(), default(), default(), default()),
|
||||
token::Variant::TextSection(section) => {
|
||||
let section = TextElement::Section { text: token.with_variant(section) };
|
||||
Tree::text_literal(default(), default(), vec![section], default(), default())
|
||||
}
|
||||
token::Variant::TextEscape(escape) => {
|
||||
let token = token.with_variant(escape);
|
||||
let section = TextElement::Escape { token };
|
||||
Tree::text_literal(default(), default(), vec![section], default(), default())
|
||||
}
|
||||
token::Variant::TextEnd(_) if token.code.is_empty() =>
|
||||
Tree::text_literal(default(), default(), default(), default(), true),
|
||||
token::Variant::TextEnd(close) =>
|
||||
Tree::text_literal(default(), default(), default(), Some(token.with_variant(close)), true),
|
||||
token::Variant::TextInitialNewline(_) =>
|
||||
Tree::text_literal(default(), Some(token::newline(token.left_offset, token.code)), default(), default(), default()),
|
||||
token::Variant::TextNewline(_) => {
|
||||
let newline = token::newline(token.left_offset, token.code);
|
||||
let newline = TextElement::Newline { newline };
|
||||
Tree::text_literal(default(), default(), vec![newline], default(), default())
|
||||
}
|
||||
token::Variant::Wildcard(wildcard) => Tree::wildcard(token.with_variant(wildcard), default()),
|
||||
token::Variant::SuspendedDefaultArguments(t) => Tree::suspended_default_arguments(token.with_variant(t)),
|
||||
token::Variant::OpenSymbol(s) =>
|
||||
@ -1042,6 +994,13 @@ pub fn to_ast(token: Token) -> Tree {
|
||||
// This should be unreachable: `Precedence::resolve` doesn't calls `to_ast` for operators.
|
||||
| token::Variant::Operator(_)
|
||||
| token::Variant::Private(_)
|
||||
// Handled during compound-token assembly.
|
||||
| token::Variant::TextStart(_)
|
||||
| token::Variant::TextSection(_)
|
||||
| token::Variant::TextEscape(_)
|
||||
| token::Variant::TextEnd(_)
|
||||
| token::Variant::TextInitialNewline(_)
|
||||
| token::Variant::TextNewline(_)
|
||||
// Map an error case in the lexer to an error in the AST.
|
||||
| token::Variant::Invalid(_) => {
|
||||
let message = format!("Unexpected token: {token:?}");
|
||||
|
46
lib/rust/parser/src/syntax/treebuilding.rs
Normal file
46
lib/rust/parser/src/syntax/treebuilding.rs
Normal file
@ -0,0 +1,46 @@
|
||||
use crate::syntax::Token;
|
||||
use crate::syntax::Tree;
|
||||
|
||||
|
||||
|
||||
mod block;
|
||||
mod compound_token;
|
||||
mod consumer;
|
||||
mod whitespace;
|
||||
|
||||
|
||||
// ===============
|
||||
// === Exports ===
|
||||
// ===============
|
||||
|
||||
pub use block::FlattenBlockTrees;
|
||||
pub use compound_token::AssembleCompoundTokens;
|
||||
pub use consumer::Finish;
|
||||
pub use consumer::ItemConsumer;
|
||||
pub use consumer::TreeConsumer;
|
||||
pub use whitespace::PeekSpacing;
|
||||
pub use whitespace::Spacing;
|
||||
pub use whitespace::SpacingLookaheadTokenConsumer;
|
||||
|
||||
|
||||
// ===================
|
||||
// === TokenOrTree ===
|
||||
// ===================
|
||||
|
||||
#[derive(Debug)]
|
||||
enum TokenOrTree<'s> {
|
||||
Token(Token<'s>),
|
||||
Tree(Tree<'s>),
|
||||
}
|
||||
|
||||
impl<'s> From<Token<'s>> for TokenOrTree<'s> {
|
||||
fn from(token: Token<'s>) -> Self {
|
||||
TokenOrTree::Token(token)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<Tree<'s>> for TokenOrTree<'s> {
|
||||
fn from(tree: Tree<'s>) -> Self {
|
||||
TokenOrTree::Tree(tree)
|
||||
}
|
||||
}
|
44
lib/rust/parser/src/syntax/treebuilding/block.rs
Normal file
44
lib/rust/parser/src/syntax/treebuilding/block.rs
Normal file
@ -0,0 +1,44 @@
|
||||
use crate::syntax;
|
||||
use crate::syntax::operator;
|
||||
use crate::syntax::treebuilding::consumer::Finish;
|
||||
use crate::syntax::treebuilding::consumer::ItemConsumer;
|
||||
use crate::syntax::treebuilding::consumer::TokenConsumer;
|
||||
use crate::syntax::treebuilding::consumer::TreeConsumer;
|
||||
use crate::syntax::Item;
|
||||
|
||||
|
||||
|
||||
// ==========================
|
||||
// === BlockTreeFlattener ===
|
||||
// ==========================
|
||||
|
||||
/// Consumes `Item`s and passes their content to a token/tree consumer, using an
|
||||
/// [`operator::Precedence`] parser to flatten blocks.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct FlattenBlockTrees<'s, T> {
|
||||
inner: T,
|
||||
/// Consumes child blocks. Stores no semantic state, but is reused for performance.
|
||||
child: Option<Box<operator::Precedence<'s>>>,
|
||||
}
|
||||
|
||||
impl<'s, T: TokenConsumer<'s> + TreeConsumer<'s>> ItemConsumer<'s> for FlattenBlockTrees<'s, T> {
|
||||
fn push_item(&mut self, item: Item<'s>) {
|
||||
match item {
|
||||
Item::Block(lines) => {
|
||||
let mut child = self.child.take().unwrap_or_default();
|
||||
self.inner.push_tree(syntax::item::build_block(lines, &mut child));
|
||||
self.child = Some(child);
|
||||
}
|
||||
Item::Token(token) => self.inner.push_token(token),
|
||||
Item::Tree(tree) => self.inner.push_tree(tree),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T: Finish> Finish for FlattenBlockTrees<'s, T> {
|
||||
type Result = T::Result;
|
||||
|
||||
fn finish(&mut self) -> Self::Result {
|
||||
self.inner.finish()
|
||||
}
|
||||
}
|
161
lib/rust/parser/src/syntax/treebuilding/compound_token.rs
Normal file
161
lib/rust/parser/src/syntax/treebuilding/compound_token.rs
Normal file
@ -0,0 +1,161 @@
|
||||
use enso_prelude::*;
|
||||
|
||||
use crate::syntax;
|
||||
use crate::syntax::token;
|
||||
use crate::syntax::treebuilding::consumer::Finish;
|
||||
use crate::syntax::treebuilding::consumer::TokenConsumer;
|
||||
use crate::syntax::treebuilding::consumer::TreeConsumer;
|
||||
use crate::syntax::Token;
|
||||
|
||||
|
||||
|
||||
// ================================
|
||||
// === Compound token assembler ===
|
||||
// ================================
|
||||
|
||||
/// Recognizes lexical tokens that are indivisible, and assembles them into trees.
|
||||
#[derive(Default, Debug)]
|
||||
pub struct AssembleCompoundTokens<'s, T> {
|
||||
compounding: Option<CompoundToken<'s>>,
|
||||
inner: T,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum CompoundToken<'s> {
|
||||
TextLiteral(TextLiteralBuilder<'s>),
|
||||
}
|
||||
|
||||
impl<'s, T: TreeConsumer<'s> + TokenConsumer<'s>> TokenConsumer<'s>
|
||||
for AssembleCompoundTokens<'s, T>
|
||||
{
|
||||
fn push_token(&mut self, token: Token<'s>) {
|
||||
match (&mut self.compounding, token.variant) {
|
||||
(this @ None, token::Variant::TextStart(variant)) => {
|
||||
let token = token.with_variant(variant);
|
||||
*this = Some(CompoundToken::TextLiteral(TextLiteralBuilder {
|
||||
open: token,
|
||||
newline: default(),
|
||||
elements: default(),
|
||||
}));
|
||||
}
|
||||
(
|
||||
Some(CompoundToken::TextLiteral(TextLiteralBuilder {
|
||||
newline: newline @ None,
|
||||
..
|
||||
})),
|
||||
token::Variant::TextInitialNewline(_),
|
||||
) => {
|
||||
let token = token::newline(token.left_offset, token.code);
|
||||
*newline = Some(token);
|
||||
}
|
||||
(
|
||||
Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })),
|
||||
token::Variant::TextSection(variant),
|
||||
) => {
|
||||
let token = token.with_variant(variant);
|
||||
let element = syntax::tree::TextElement::Section { text: token };
|
||||
elements.push(element);
|
||||
}
|
||||
(
|
||||
Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })),
|
||||
token::Variant::TextEscape(variant),
|
||||
) => {
|
||||
let token = token.with_variant(variant);
|
||||
let element = syntax::tree::TextElement::Escape { token };
|
||||
elements.push(element);
|
||||
}
|
||||
(
|
||||
Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })),
|
||||
token::Variant::TextNewline(_),
|
||||
) => {
|
||||
let token = token::newline(token.left_offset, token.code);
|
||||
let element = syntax::tree::TextElement::Newline { newline: token };
|
||||
elements.push(element);
|
||||
}
|
||||
(this @ Some(CompoundToken::TextLiteral(_)), token::Variant::TextEnd(variant)) => {
|
||||
let builder = match mem::take(this) {
|
||||
Some(CompoundToken::TextLiteral(builder)) => builder,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let close = token.with_variant(variant);
|
||||
self.inner.push_tree(builder.finish(Some(close)));
|
||||
}
|
||||
(_, token::Variant::TextStart(_)) => unreachable!(),
|
||||
(_, token::Variant::TextInitialNewline(_)) => unreachable!(),
|
||||
(_, token::Variant::TextSection(_)) => unreachable!(),
|
||||
(_, token::Variant::TextEscape(_)) => unreachable!(),
|
||||
(_, token::Variant::TextNewline(_)) => unreachable!(),
|
||||
(_, token::Variant::TextEnd(_)) => unreachable!(),
|
||||
_ => self.inner.push_token(token),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T: TreeConsumer<'s>> TreeConsumer<'s> for AssembleCompoundTokens<'s, T> {
|
||||
fn push_tree(&mut self, mut tree: syntax::Tree<'s>) {
|
||||
match (&mut self.compounding, &mut tree.variant) {
|
||||
(
|
||||
Some(CompoundToken::TextLiteral(TextLiteralBuilder { elements, .. })),
|
||||
box syntax::tree::Variant::TextLiteral(syntax::tree::TextLiteral {
|
||||
open: None,
|
||||
newline: None,
|
||||
elements: rhs_elements,
|
||||
close: None,
|
||||
}),
|
||||
) => {
|
||||
match rhs_elements.first_mut() {
|
||||
Some(syntax::tree::TextElement::Splice { open, .. }) =>
|
||||
open.left_offset += tree.span.left_offset,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
elements.append(rhs_elements);
|
||||
}
|
||||
_ => {
|
||||
self.flush();
|
||||
self.inner.push_tree(tree);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T: TreeConsumer<'s>> AssembleCompoundTokens<'s, T> {
|
||||
fn flush(&mut self) {
|
||||
if let Some(CompoundToken::TextLiteral(builder)) = mem::take(&mut self.compounding) {
|
||||
self.inner.push_tree(builder.finish(None))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T: TreeConsumer<'s> + Finish> Finish for AssembleCompoundTokens<'s, T> {
|
||||
type Result = T::Result;
|
||||
|
||||
fn finish(&mut self) -> Self::Result {
|
||||
self.flush();
|
||||
self.inner.finish()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === Text literal builder ===
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TextLiteralBuilder<'s> {
|
||||
open: token::TextStart<'s>,
|
||||
newline: Option<token::Newline<'s>>,
|
||||
elements: Vec<syntax::tree::TextElement<'s>>,
|
||||
}
|
||||
|
||||
impl<'s> TextLiteralBuilder<'s> {
|
||||
fn finish(self, close: Option<token::TextEnd<'s>>) -> syntax::Tree<'s> {
|
||||
let Self { open, newline, elements } = self;
|
||||
if open.code.starts_with('#') {
|
||||
assert_eq!(newline, None);
|
||||
let doc = syntax::tree::DocComment { open, elements, newlines: default() };
|
||||
syntax::Tree::documented(doc, default())
|
||||
} else {
|
||||
let close =
|
||||
close.and_then(|close| if close.code.is_empty() { None } else { Some(close) });
|
||||
syntax::Tree::text_literal(Some(open), newline, elements, close)
|
||||
}
|
||||
}
|
||||
}
|
23
lib/rust/parser/src/syntax/treebuilding/consumer.rs
Normal file
23
lib/rust/parser/src/syntax/treebuilding/consumer.rs
Normal file
@ -0,0 +1,23 @@
|
||||
use crate::syntax::Item;
|
||||
use crate::syntax::Token;
|
||||
use crate::syntax::Tree;
|
||||
|
||||
|
||||
|
||||
pub trait ItemConsumer<'s> {
|
||||
fn push_item(&mut self, tree: Item<'s>);
|
||||
}
|
||||
|
||||
pub trait TreeConsumer<'s> {
|
||||
fn push_tree(&mut self, tree: Tree<'s>);
|
||||
}
|
||||
|
||||
pub trait TokenConsumer<'s> {
|
||||
fn push_token(&mut self, token: Token<'s>);
|
||||
}
|
||||
|
||||
pub trait Finish {
|
||||
type Result;
|
||||
|
||||
fn finish(&mut self) -> Self::Result;
|
||||
}
|
128
lib/rust/parser/src/syntax/treebuilding/whitespace.rs
Normal file
128
lib/rust/parser/src/syntax/treebuilding/whitespace.rs
Normal file
@ -0,0 +1,128 @@
|
||||
use crate::syntax::token;
|
||||
use crate::syntax::tree;
|
||||
use crate::syntax::treebuilding::consumer::Finish;
|
||||
use crate::syntax::treebuilding::consumer::TokenConsumer;
|
||||
use crate::syntax::treebuilding::consumer::TreeConsumer;
|
||||
use crate::syntax::treebuilding::TokenOrTree;
|
||||
use crate::syntax::Token;
|
||||
use crate::syntax::Tree;
|
||||
|
||||
|
||||
|
||||
// ===============
|
||||
// === Spacing ===
|
||||
// ===============
|
||||
|
||||
/// Whether a term is logically separated from the previous term by whitespace.
|
||||
#[derive(Debug, Default, PartialEq, Eq, Copy, Clone)]
|
||||
pub enum Spacing {
|
||||
#[default]
|
||||
Spaced,
|
||||
Unspaced,
|
||||
}
|
||||
|
||||
impl Spacing {
|
||||
pub fn of_tree(tree: &Tree) -> Self {
|
||||
match tree_starts_new_no_space_group(tree) {
|
||||
false => Spacing::Unspaced,
|
||||
true => Spacing::Spaced,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn of_token<'a: 'b, 'b, T: Into<token::Ref<'a, 'b>>>(token: T) -> Self {
|
||||
match token_starts_new_no_space_group(token) {
|
||||
false => Spacing::Unspaced,
|
||||
true => Spacing::Spaced,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns `true` for an item if that item should not follow any other item in a no-space group
|
||||
// (i.e. the item has "space" before it).
|
||||
fn token_starts_new_no_space_group<'a: 'b, 'b, T: Into<token::Ref<'a, 'b>>>(token: T) -> bool {
|
||||
let token = token.into();
|
||||
match &token.data {
|
||||
token::Variant::Operator(opr) if opr.properties.is_sequence() => true,
|
||||
_ => token.left_offset.visible.width_in_spaces != 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn tree_starts_new_no_space_group(tree: &Tree) -> bool {
|
||||
tree.span.left_offset.visible.width_in_spaces != 0
|
||||
|| matches!(
|
||||
&tree.variant,
|
||||
box tree::Variant::BodyBlock(_)
|
||||
| box tree::Variant::OperatorBlockApplication(_)
|
||||
| box tree::Variant::ArgumentBlockApplication(_)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
// ============================
|
||||
// === Whitespace Lookahead ===
|
||||
// ============================
|
||||
|
||||
pub trait SpacingLookaheadTreeConsumer<'s> {
|
||||
fn push_tree(&mut self, tree: Tree<'s>, following_spacing: Option<Spacing>);
|
||||
}
|
||||
|
||||
pub trait SpacingLookaheadTokenConsumer<'s> {
|
||||
fn push_token(&mut self, token: Token<'s>, following_spacing: Option<Spacing>);
|
||||
}
|
||||
|
||||
/// Maintains 1-token whitespace lookahead.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct PeekSpacing<'s, T> {
|
||||
current: Option<TokenOrTree<'s>>,
|
||||
inner: T,
|
||||
}
|
||||
|
||||
impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>>
|
||||
PeekSpacing<'s, T>
|
||||
{
|
||||
fn emit(&mut self, tt: Option<TokenOrTree<'s>>, rhs: Option<Spacing>) {
|
||||
match tt {
|
||||
Some(TokenOrTree::Token(token)) => self.inner.push_token(token, rhs),
|
||||
Some(TokenOrTree::Tree(tree)) => self.inner.push_tree(tree, rhs),
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s> + Finish> Finish
|
||||
for PeekSpacing<'s, T>
|
||||
{
|
||||
type Result = T::Result;
|
||||
|
||||
fn finish(&mut self) -> T::Result {
|
||||
let last = self.current.take();
|
||||
self.emit(last, None);
|
||||
self.inner.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> TokenConsumer<'s>
|
||||
for PeekSpacing<'s, T>
|
||||
{
|
||||
fn push_token(&mut self, token: Token<'s>) {
|
||||
let rhs = Spacing::of_token(&token);
|
||||
let next = self.current.replace(token.into());
|
||||
self.emit(next, Some(rhs))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T: SpacingLookaheadTreeConsumer<'s> + SpacingLookaheadTokenConsumer<'s>> TreeConsumer<'s>
|
||||
for PeekSpacing<'s, T>
|
||||
{
|
||||
fn push_tree(&mut self, tree: Tree<'s>) {
|
||||
let rhs = Spacing::of_tree(&tree);
|
||||
let next = self.current.replace(tree.into());
|
||||
self.emit(next, Some(rhs));
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, T: TreeConsumer<'s>> SpacingLookaheadTreeConsumer<'s> for T {
|
||||
fn push_tree(&mut self, tree: Tree<'s>, _: Option<Spacing>) {
|
||||
self.push_tree(tree);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user