Lex doc comments and attach text to AST (#3795)

- New `Documented` node attaches documentation, lexed as a raw text literal, to a statement.
- Handle a case of lambdas with body blocks.
This commit is contained in:
Kaz Wesley 2022-10-14 23:13:32 -07:00 committed by GitHub
parent e9260227c4
commit 2740406f93
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 383 additions and 175 deletions

View File

@ -666,7 +666,6 @@ public class EnsoCompilerTest {
}
@Test
@Ignore
public void testLambdaBody() throws Exception {
parseTest("""
list =

View File

@ -0,0 +1,32 @@
//! Run the lexer from the command line, for understanding the early stages of the parser.
// === Features ===
#![feature(exact_size_is_empty)]
#![feature(let_chains)]
#![feature(if_let_guard)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
#![allow(clippy::bool_to_int_with_if)]
#![allow(clippy::let_and_return)]
// === Non-Standard Linter Configuration ===
#![allow(clippy::option_map_unit_fn)]
#![allow(clippy::precedence)]
#![allow(dead_code)]
#![deny(unconditional_recursion)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
#![warn(unused_qualifications)]
/// Lexer main function used for ad-hoc testing during development.
pub fn main() {
use std::io::Read;
let mut input = String::new();
std::io::stdin().read_to_string(&mut input).unwrap();
println!("{:#?}", enso_parser::lexer::run(&input));
}

View File

@ -71,11 +71,6 @@ fn section_simple() {
test("a +", expected_rhs);
}
#[test]
fn comments() {
test("# a b c", block![()()]);
}
#[test]
fn inline_if() {
#[rustfmt::skip]
@ -102,6 +97,43 @@ fn else_block() {
}
// === Comments ===
#[test]
fn plain_comments() {
test("# a b c", block![()()]);
}
#[test]
fn doc_comments() {
#[rustfmt::skip]
let lines = vec![
"## The Identity Function",
"",
" Arguments:",
" - x: value to do nothing to",
"id x = x",
];
#[rustfmt::skip]
test(&lines.join("\n"), block![
(Documented
#((Section "# The Identity Function\n")
(Section "\n")
(Section "Arguments:\n")
(Section "- x: value to do nothing to"))
#(())
(Function (Ident id) #((() (Ident x) () ())) "=" (Ident x)))]);
#[rustfmt::skip]
let lines = vec![
" ## Test indent handling",
" foo",
];
#[rustfmt::skip]
test(&lines.join("\n"), block![
(Documented #((Section "# Test indent handling")) #(()) (Ident foo))]);
}
// === Type Definitions ===
#[test]
@ -491,11 +523,15 @@ fn multiple_operator_error() {
#[test]
fn precedence() {
let code = ["x * y + z"];
let expected = block![
(OprApp (OprApp (Ident x) (Ok "*") (Ident y)) (Ok "+") (Ident z))
#[rustfmt::skip]
let cases = [
("x * y + z", block![(OprApp (OprApp (Ident x) (Ok "*") (Ident y)) (Ok "+") (Ident z))]),
("x + y * z", block![(OprApp (Ident x) (Ok "+") (OprApp (Ident y) (Ok "*") (Ident z)))]),
("w + x + y * z", block![
(OprApp (OprApp (Ident w) (Ok "+") (Ident x)) (Ok "+")
(OprApp (Ident y) (Ok "*") (Ident z)))]),
];
test(&code.join("\n"), expected);
cases.into_iter().for_each(|(code, expected)| test(code, expected));
}
#[test]
@ -507,6 +543,15 @@ fn right_associative_operators() {
test(&code.join("\n"), expected);
}
#[test]
fn left_associative_operators() {
let code = ["x + y + z"];
let expected = block![
(OprApp (OprApp (Ident x) (Ok "+") (Ident y)) (Ok "+") (Ident z))
];
test(&code.join("\n"), expected);
}
#[test]
fn pipeline_operators() {
test("f <| a", block![(OprApp (Ident f) (Ok "<|") (Ident a))]);
@ -841,6 +886,27 @@ x"#;
(Ident x)
];
test(code, expected);
let code = " x = \"\"\"\n Indented multiline\n x";
#[rustfmt::skip]
let expected = block![
(Assignment (Ident x) "=" (TextLiteral #((Section "Indented multiline"))))
(Ident x)
];
test(code, expected);
let code = "'''\n \\nEscape at start\n";
#[rustfmt::skip]
let expected = block![
(TextLiteral #((Escape '\n') (Section "Escape at start\n")))
];
test(code, expected);
let code = "x =\n x = '''\n x\nx";
#[rustfmt::skip]
let expected = block![
(Function (Ident x) #() "="
(BodyBlock #((Assignment (Ident x) "=" (TextLiteral #((Section "x")))))))
(Ident x)
];
test(code, expected);
}
#[test]
@ -910,6 +976,8 @@ fn old_lambdas() {
("x-> y", block![(OprApp (Ident x) (Ok "->") (Ident y))]),
("x->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]),
("x ->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]),
("f x->\n y", block![
(App (Ident f) (OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y)))))]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
}

View File

@ -622,7 +622,7 @@ fn analyze_operator(token: &str) -> token::OperatorProperties {
// Operators that can be unary.
"\\" =>
return operator
.with_unary_prefix_mode(token::Precedence::min())
.with_unary_prefix_mode(token::Precedence::min_valid())
.as_compile_time_operation(),
"~" =>
return operator
@ -778,8 +778,9 @@ impl<'s> Lexer<'s> {
impl<'s> Lexer<'s> {
/// Read a text literal.
fn text(&mut self) {
let quote_char = match self.current_char {
Some(char @ ('"' | '\'')) => char,
let (quote_char, text_type) = match self.current_char {
Some(char @ '"') => (char, TextType::Raw),
Some(char @ '\'') => (char, TextType::Interpolated),
Some('`') => {
if let Some(state) = self.stack.pop() {
self.end_splice(state);
@ -791,86 +792,85 @@ impl<'s> Lexer<'s> {
}
_ => return,
};
let indent = self.last_spaces_visible_offset;
let indent = self.current_block_indent;
let open_quote_start = self.mark();
self.last_spaces_visible_offset = VisibleOffset(0);
self.last_spaces_offset = Bytes(0);
self.take_next();
let mut multiline = false;
// At least two quote characters.
if let Some(char) = self.current_char && char == quote_char {
let close_quote_start = self.mark();
self.take_next();
let mut multiline = false;
// If more than two quote characters: Start a multiline quote.
while let Some(char) = self.current_char && char == quote_char {
multiline = true;
self.take_next();
}
if multiline {
while self.current_char.is_some() {
let mut newline = self.take_1('\r');
newline = newline || self.take_1('\n');
if newline {
break;
}
}
let before_space = self.mark();
self.spaces_after_lexeme();
let text_start = self.mark();
let token = self.make_token(open_quote_start, before_space,
token::Variant::TextStart(token::variant::TextStart()));
self.output.push(token);
let interpolate = quote_char == '\'';
self.text_content(Some(text_start), None, interpolate, State::MultilineText { indent }, Some(indent));
self.multiline_text(open_quote_start, indent, text_type);
return;
} else {
// Exactly two quote characters: Open and shut case.
let close_quote_end = self.mark();
let token = self.make_token(open_quote_start, close_quote_start.clone(),
token::Variant::TextStart(token::variant::TextStart()));
token::Variant::text_start());
self.output.push(token);
let token = self.make_token(close_quote_start, close_quote_end,
token::Variant::TextEnd(token::variant::TextEnd()));
token::Variant::text_end());
self.output.push(token);
}
} else {
// One quote followed by non-quote character: Inline quote.
let open_quote_end = self.mark();
let token = self.make_token(open_quote_start, open_quote_end,
token::Variant::TextStart(token::variant::TextStart()));
token::Variant::text_start());
self.output.push(token);
self.inline_quote(quote_char);
self.inline_quote(quote_char, text_type);
}
self.spaces_after_lexeme();
}
fn inline_quote(&mut self, quote_char: char) {
if self.text_content(None, Some(quote_char), quote_char == '\'', State::InlineText, None) {
return;
}
if let Some(char) = self.current_char && char == quote_char {
let text_end = self.mark();
self.take_next();
let close_quote_end = self.mark();
let token = self.make_token(text_end, close_quote_end,
token::Variant::TextEnd(token::variant::TextEnd()));
self.output.push(token);
fn multiline_text(
&mut self,
open_quote_start: (Bytes, Offset<'s>),
indent: VisibleOffset,
text_type: TextType,
) {
let open_quote_end = self.mark();
let token =
self.make_token(open_quote_start, open_quote_end.clone(), token::Variant::text_start());
self.output.push(token);
if text_type.expects_initial_newline() && let Some(newline) = self.line_break() {
self.output.push(newline.with_variant(token::Variant::text_initial_newline()));
}
let text_start = self.mark();
self.text_content(
Some(text_start),
None,
text_type.is_interpolated(),
State::MultilineText { indent },
Some(indent),
);
}
fn inline_quote(&mut self, quote_char: char, text_type: TextType) {
let is_interpolated = text_type.is_interpolated();
self.text_content(None, quote_char.into(), is_interpolated, State::InlineText, None);
}
fn end_splice(&mut self, state: State) {
let splice_quote_start = self.mark();
self.take_next();
let splice_quote_end = self.mark();
let token = self.make_token(
splice_quote_start,
splice_quote_end,
token::Variant::CloseSymbol(token::variant::CloseSymbol()),
);
let token =
self.make_token(splice_quote_start, splice_quote_end, token::Variant::close_symbol());
self.output.push(token);
match state {
State::InlineText => self.inline_quote('\''),
State::MultilineText { indent } => self.text_lines(indent, true),
State::InlineText => self.inline_quote('\'', TextType::Interpolated),
State::MultilineText { indent } => {
self.text_content(None, None, true, State::MultilineText { indent }, Some(indent));
}
}
}
@ -881,7 +881,7 @@ impl<'s> Lexer<'s> {
interpolate: bool,
state: State,
multiline: Option<VisibleOffset>,
) -> bool {
) -> TextEndedAt {
let mut text_start = start.unwrap_or_else(|| self.mark());
while let Some(char) = self.current_char {
if closing_char == Some(char) || (multiline.is_none() && is_newline_char(char)) {
@ -894,31 +894,27 @@ impl<'s> Lexer<'s> {
let indent = multiline.unwrap();
let text_end = self.mark();
self.spaces_after_lexeme();
if let Some(char) = self.current_char {
if self.last_spaces_visible_offset <= indent && !is_newline_char(char) {
if let Some(char) = self.current_char && !is_newline_char(char) {
let block_indent = self.last_spaces_visible_offset;
if block_indent <= indent {
let token = self.make_token(
text_start,
before_newline.clone(),
token::Variant::TextSection(token::variant::TextSection()),
token::Variant::text_section(),
);
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
self.output.push(token);
}
let token = self.make_token(
before_newline,
text_end,
token::Variant::Newline(token::variant::Newline()),
);
self.output.push(Token::from(token::text_end("", "")));
self.end_blocks(block_indent);
let token =
self.make_token(before_newline, text_end, token::Variant::newline());
self.output.push(token);
self.spaces_after_lexeme();
return false;
return TextEndedAt::End;
}
};
let token = self.make_token(
text_start,
text_end.clone(),
token::Variant::TextSection(token::variant::TextSection()),
);
let token =
self.make_token(text_start, text_end.clone(), token::Variant::text_section());
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
self.output.push(token);
}
@ -926,27 +922,30 @@ impl<'s> Lexer<'s> {
continue;
}
if interpolate && char == '\\' {
let backslash_start = self.mark();
let mut backslash_start = self.mark();
self.take_next();
if let Some(char) = self.current_char {
let token = self.make_token(
text_start,
text_start.clone(),
backslash_start.clone(),
token::Variant::TextSection(token::variant::TextSection()),
token::Variant::text_section(),
);
if !token.code.is_empty() {
if token.code.is_empty() {
backslash_start = text_start.clone();
} else {
self.output.push(token);
}
text_start = self.text_escape(backslash_start, char);
continue;
}
continue;
}
if interpolate && char == '`' {
let splice_quote_start = self.mark();
let token = self.make_token(
text_start,
splice_quote_start.clone(),
token::Variant::TextSection(token::variant::TextSection()),
token::Variant::text_section(),
);
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
self.output.push(token);
@ -956,24 +955,28 @@ impl<'s> Lexer<'s> {
let token = self.make_token(
splice_quote_start,
splice_quote_end.clone(),
token::Variant::OpenSymbol(token::variant::OpenSymbol()),
token::Variant::open_symbol(),
);
self.output.push(token);
self.stack.push(state);
return true;
return TextEndedAt::Splice;
}
self.take_next();
}
let text_end = self.mark();
let token = self.make_token(
text_start,
text_end,
token::Variant::TextSection(token::variant::TextSection()),
);
let token = self.make_token(text_start, text_end.clone(), token::Variant::text_section());
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
self.output.push(token);
}
false
let end_token = if self.current_char == closing_char {
self.take_next();
let close_quote_end = self.mark();
self.make_token(text_end, close_quote_end, token::Variant::text_end())
} else {
Token::from(token::text_end("", ""))
};
self.output.push(end_token);
TextEndedAt::End
}
fn text_escape(
@ -1011,7 +1014,7 @@ impl<'s> Lexer<'s> {
let token = self.make_token(
backslash_start,
sequence_end.clone(),
token::Variant::TextEscape(token::variant::TextEscape(value)),
token::Variant::text_escape(value),
);
self.output.push(token);
sequence_end
@ -1025,6 +1028,7 @@ impl<'s> Lexer<'s> {
'r' => Some('\x0D'),
't' => Some('\x09'),
'v' => Some('\x0B'),
'e' => Some('\x1B'),
'\\' => Some('\\'),
'"' => Some('"'),
'\'' => Some('\''),
@ -1036,24 +1040,13 @@ impl<'s> Lexer<'s> {
let token = self.make_token(
backslash_start,
escape_end.clone(),
token::Variant::TextEscape(token::variant::TextEscape(value)),
token::Variant::text_escape(value),
);
self.output.push(token);
escape_end
}
}
/// Read the lines of a text literal.
fn text_lines(&mut self, indent: VisibleOffset, is_interpolated: bool) {
self.text_content(
None,
None,
is_interpolated,
State::MultilineText { indent },
Some(indent),
);
}
fn mark(&mut self) -> (Bytes, Offset<'s>) {
let start = self.current_offset;
let left_offset_start = start - self.last_spaces_offset;
@ -1078,6 +1071,29 @@ impl<'s> Lexer<'s> {
}
}
#[derive(PartialEq, Eq)]
enum TextEndedAt {
Splice,
End,
}
#[derive(PartialEq, Eq, Copy, Clone)]
enum TextType {
Raw,
Interpolated,
Documentation,
}
impl TextType {
fn is_interpolated(self) -> bool {
self == TextType::Interpolated
}
fn expects_initial_newline(self) -> bool {
self != TextType::Documentation
}
}
/// Move whitespace characters from the end of `left` to the beginning of `right` until the visible
/// length of `left` is not longer than `target`.
#[allow(unsafe_code)]
@ -1137,14 +1153,16 @@ impl<'s> Lexer<'s> {
}
fn comment(&mut self) {
if let Some(current) = self.current_char {
if current == '#' {
self.submit_line_as(token::Variant::newline());
let initial_ident = self.current_block_indent;
let check_indent = |this: &mut Self| this.current_block_indent > initial_ident;
while self.run_and_check_if_progressed(|t| t.newline()) && check_indent(self) {
self.submit_line_as(token::Variant::newline());
}
if let Some('#') = self.current_char {
let indent = self.current_block_indent;
let start = self.mark();
self.take_next();
if let Some('#') = self.current_char {
self.multiline_text(start, indent, TextType::Documentation);
} else {
self.take_rest_of_line();
let end_line = self.mark();
self.output.push(self.make_token(start, end_line, token::Variant::newline()));
}
}
}
@ -1180,23 +1198,32 @@ impl<'s> Lexer<'s> {
self.submit_token(block_start);
self.start_block(block_indent);
}
while block_indent < self.current_block_indent {
let previous_indent = self.block_indent_stack.last().copied().unwrap_or_default();
if block_indent > previous_indent {
// The new line indent is smaller than current block but bigger than the
// previous one. We are treating the line as belonging to the
// block. The warning should be reported by parser.
break;
}
self.end_block();
let block_end = self.marker_token(token::Variant::block_end());
self.submit_token(block_end);
}
self.end_blocks(block_indent);
self.submit_token(token.with_variant(token::Variant::newline()));
newlines.drain(..).for_each(|token| self.submit_token(token));
self.token_storage.set_from(newlines);
}
}
fn end_blocks(&mut self, block_indent: VisibleOffset) {
while block_indent < self.current_block_indent {
let Some(previous_indent) = self.block_indent_stack.last().copied() else {
// If the file starts at indent > 0, we treat that as the root indent level
// instead of creating a sub-block. If indent then decreases below that level,
// there's no block to exit.
break
};
if block_indent > previous_indent {
// The new line indent is smaller than current block but bigger than the
// previous one. We are treating the line as belonging to the
// block. The warning should be reported by parser.
break;
}
self.end_block();
let block_end = self.marker_token(token::Variant::block_end());
self.submit_token(block_end);
}
}
}
@ -1230,6 +1257,7 @@ impl<'s> Lexer<'s> {
/// as start and end tokens).
pub fn run_flat(mut self) -> ParseResult<Vec<Token<'s>>> {
self.spaces_after_lexeme();
self.current_block_indent = self.last_spaces_visible_offset;
let mut any_parser_matched = true;
while any_parser_matched {
any_parser_matched = false;
@ -1303,11 +1331,6 @@ pub fn build_block_hierarchy(tokens: Vec<Token<'_>>) -> Vec<Item<'_>> {
// === Tests ===
// =============
/// Lexer main function used for ad-hoc testing during development.
pub fn main() {
println!("{:#?}", run_flat("\n foo\n bar"));
}
/// Test utils for fast mock tokens creation.
pub mod test {
use super::*;

View File

@ -475,7 +475,7 @@ fn splice_body(segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
let expression = segment.result.tokens();
let expression = operator::resolve_operator_precedence_if_non_empty(expression);
let splice = syntax::tree::TextElement::Splice { open, expression, close };
syntax::Tree::text_literal(default(), vec![splice], default(), default())
syntax::Tree::text_literal(default(), default(), vec![splice], default(), default(), default())
}
fn into_open_symbol(token: syntax::token::Token) -> syntax::token::OpenSymbol {

View File

@ -57,7 +57,18 @@ fn check_file(path: &str, mut code: &str) {
let errors = RefCell::new(vec![]);
ast.map(|tree| {
if let enso_parser::syntax::tree::Variant::Invalid(err) = &*tree.variant {
errors.borrow_mut().push((err.clone(), tree.span.clone()));
let error = format!("{}: {}", err.error.message, tree.code());
errors.borrow_mut().push((error, tree.span.clone()));
} else if let enso_parser::syntax::tree::Variant::TextLiteral(text) = &*tree.variant {
for element in &text.elements {
if let enso_parser::syntax::tree::TextElement::Escape { token } = element {
if token.variant.value.is_none() {
let escape = token.code.to_string();
let error = format!("Invalid escape sequence: {escape}");
errors.borrow_mut().push((error, tree.span.clone()));
}
}
}
}
});
for (error, span) in &*errors.borrow() {
@ -77,9 +88,9 @@ fn check_file(path: &str, mut code: &str) {
char += 1;
}
}
eprintln!("{path}:{line}:{char}: {}", &error.error.message);
eprintln!("{path}:{line}:{char}: {}", &error);
} else {
eprintln!("{path}:?:?: {}", &error.error.message);
eprintln!("{path}:?:?: {}", &error);
};
}
for (parsed, original) in ast.code().lines().zip(code.lines()) {

View File

@ -146,27 +146,35 @@ struct ExpressionBuilder<'s> {
impl<'s> ExpressionBuilder<'s> {
/// Extend the expression with an operand.
pub fn operand(&mut self, mut operand: Operand<syntax::Tree<'s>>) {
if self.prev_type.replace(ItemType::Ast) == Some(ItemType::Ast) {
if let syntax::tree::Variant::OprApp(
syntax::tree::OprApp { lhs: Some(_), opr: Ok(opr), rhs: None })
= &*self.output.last().unwrap().value.variant
&& opr.properties.associativity() == token::Associativity::Right
&& opr.left_offset.is_empty() {
let syntax::Tree { span, variant: box syntax::tree::Variant::OprApp(
syntax::tree::OprApp { lhs: Some(mut lhs), opr: Ok(operator), rhs: None }) }
= self.output.pop().unwrap().value
else { unreachable!() };
lhs.span.left_offset += span.left_offset;
let precedence = operator.properties.binary_infix_precedence().unwrap();
let associativity = operator.properties.associativity();
let opr = Arity::Unary(Unary::LeftCurriedBinary { lhs, operator });
self.operator_stack.push(Operator { precedence, associativity, opr });
} else {
operand =
self.output.pop().unwrap().map(|lhs| syntax::tree::apply(lhs, operand.into()));
}
if self.prev_type == Some(ItemType::Ast) {
// Application is a token-less operator implied by juxtaposition of operands.
let precedence = token::Precedence::application();
let associativity = token::Associativity::Left;
let arity = Arity::Binary {
tokens: default(),
lhs_section_termination: default(),
};
self.push_operator(precedence, associativity, arity);
}
if let box syntax::tree::Variant::OprApp(
syntax::tree::OprApp { lhs, opr: Ok(operator), rhs: None })
= &mut operand.value.variant
&& lhs.is_some()
&& operator.properties.associativity() == token::Associativity::Right
&& operator.left_offset.is_empty() {
// Right-associative operators become unary-prefix operators when left-curried.
// E.g. `f = x-> y-> z` contains lambdas, not partially-applied arrow operators.
let mut lhs = lhs.take().unwrap();
lhs.span.left_offset += operand.value.span.left_offset;
let associativity = operator.properties.associativity();
let precedence = operator.properties.binary_infix_precedence().unwrap();
let operator = operator.clone();
let arity = Arity::Unary(Unary::LeftCurriedBinary { lhs, operator });
self.push_operator(precedence, associativity, arity);
return;
}
self.output.push(operand);
self.prev_type = Some(ItemType::Ast);
}
/// Extend the expression with an operator.

View File

@ -290,6 +290,7 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
#[reflect(as = "char")]
pub value: Option<char>,
},
TextInitialNewline,
Invalid,
}
}}}
@ -350,11 +351,14 @@ impl OperatorProperties {
/// Return a copy of this operator, with the given binary infix precedence.
pub fn with_binary_infix_precedence(self, value: usize) -> Self {
Self { binary_infix_precedence: Some(Precedence { value }), ..self }
let precedence = Precedence { value };
debug_assert!(precedence > Precedence::min());
Self { binary_infix_precedence: Some(precedence), ..self }
}
/// Return a copy of this operator, with unary prefix parsing allowed.
pub fn with_unary_prefix_mode(self, precedence: Precedence) -> Self {
debug_assert!(precedence > Precedence::min());
Self { unary_prefix_precedence: Some(precedence), ..self }
}
@ -489,19 +493,29 @@ impl OperatorProperties {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Reflect, Deserialize, PartialOrd, Ord)]
pub struct Precedence {
/// A numeric value determining precedence order.
pub value: usize,
value: usize,
}
impl Precedence {
/// Return a precedence that is not higher than any other precedence.
/// Return a precedence that is lower than the precedence of any operator.
pub fn min() -> Self {
Precedence { value: 0 }
}
/// Return the precedence for any operator.
pub fn min_valid() -> Self {
Precedence { value: 1 }
}
/// Return a precedence that is not lower than any other precedence.
pub fn max() -> Self {
Precedence { value: 100 }
}
/// Return the precedence of application.
pub fn application() -> Self {
Precedence { value: 80 }
}
}
/// Associativity (left or right).

View File

@ -132,10 +132,17 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
},
TextLiteral {
pub open: Option<token::TextStart<'s>>,
/// If there is no text on the first line of a multi-line literal, the initial newline
/// is non-semantic and included here. If there is text on the line with the opening
/// quote, this will be empty and the first newline, if any, will be in a text section.
pub newline: Option<token::Newline<'s>>,
pub elements: Vec<TextElement<'s>>,
pub close: Option<token::TextEnd<'s>>,
#[serde(skip)]
#[reflect(skip)]
pub closed: bool,
#[serde(skip)]
#[reflect(skip)]
pub trim: VisibleOffset,
},
/// A simple application, like `print "hello"`.
@ -327,6 +334,19 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
pub newlines: Vec<token::Newline<'s>>,
pub expression: Option<Tree<'s>>,
},
/// An expression preceded by a doc comment.
Documented {
pub open: token::TextStart<'s>,
/// The documentation text.
pub elements: Vec<TextElement<'s>>,
#[serde(skip)]
#[reflect(skip)]
pub trim: VisibleOffset,
/// Empty lines between the comment and the item.
pub newlines: Vec<token::Newline<'s>>,
/// The item being documented.
pub expression: Option<Tree<'s>>,
},
}
}};}
@ -736,8 +756,17 @@ impl<'s> span::Builder<'s> for OperatorDelimitedTree<'s> {
/// application has special semantics.
pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
match (&mut *func.variant, &mut *arg.variant) {
(Variant::TextLiteral(lhs), Variant::TextLiteral(rhs)) if lhs.close.is_none() => {
join_text_literals(lhs, rhs.clone(), mem::take(&mut arg.span));
(Variant::TextLiteral(lhs), Variant::TextLiteral(rhs)) if !lhs.closed => {
join_text_literals(lhs, rhs, mem::take(&mut arg.span));
if lhs.open.is_some() && lhs.closed {
trim_text(lhs.trim, &mut lhs.elements);
}
if let TextLiteral { open: Some(open), newline: None, elements, closed: true, close: None, trim } = lhs && open.code.starts_with('#') {
let mut open = open.clone();
open.left_offset += func.span.left_offset;
let elements = mem::take(elements);
return Tree::documented(open, elements, *trim, default(), default());
}
func
}
(Variant::Number(func_ @ Number { base: _, integer: None, fractional_digits: None }),
@ -804,8 +833,8 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
}
fn join_text_literals<'s>(
lhs: &'_ mut TextLiteral<'s>,
mut rhs: TextLiteral<'s>,
lhs: &mut TextLiteral<'s>,
rhs: &mut TextLiteral<'s>,
rhs_span: Span<'s>,
) {
if rhs.trim != VisibleOffset(0) && (lhs.trim == VisibleOffset(0) || rhs.trim < lhs.trim) {
@ -817,28 +846,32 @@ fn join_text_literals<'s>(
Some(TextElement::Splice { open, .. }) => open.left_offset += rhs_span.left_offset,
None => (),
}
if let Some(newline) = rhs.newline.take() {
lhs.newline = newline.into();
}
lhs.elements.append(&mut rhs.elements);
lhs.close = rhs.close.take();
if lhs.open.is_some() {
let trim = lhs.trim;
let mut remaining = lhs.elements.len();
let mut carried_offset = Offset::default();
lhs.elements.retain_mut(|e| {
remaining -= 1;
let (offset, code) = match e {
TextElement::Section { text } => (&mut text.left_offset, &mut text.code),
TextElement::Escape { token } => (&mut token.left_offset, &mut token.code),
TextElement::Splice { open, .. } => (&mut open.left_offset, &mut open.code),
};
*offset += mem::take(&mut carried_offset);
crate::lexer::untrim(trim, offset, code);
if remaining != 0 && code.is_empty() {
carried_offset = mem::take(offset);
return false;
}
true
});
}
lhs.closed = rhs.closed;
}
fn trim_text(trim: VisibleOffset, elements: &mut Vec<TextElement>) {
let mut remaining = elements.len();
let mut carried_offset = Offset::default();
elements.retain_mut(|e| {
remaining -= 1;
let (offset, code) = match e {
TextElement::Section { text } => (&mut text.left_offset, &mut text.code),
TextElement::Escape { token } => (&mut token.left_offset, &mut token.code),
TextElement::Splice { open, .. } => (&mut open.left_offset, &mut open.code),
};
*offset += mem::take(&mut carried_offset);
crate::lexer::untrim(trim, offset, code);
if remaining != 0 && code.is_empty() {
carried_offset = mem::take(offset);
return false;
}
true
});
}
/// Join two nodes with an operator, in a way appropriate for their types.
@ -920,20 +953,24 @@ impl<'s> From<Token<'s>> for Tree<'s> {
token::Variant::NumberBase(base) =>
Tree::number(Some(token.with_variant(base)), None, None),
token::Variant::TextStart(open) =>
Tree::text_literal(Some(token.with_variant(open)), default(), default(), default()),
Tree::text_literal(Some(token.with_variant(open)), default(), default(), default(), default(), default()),
token::Variant::TextSection(section) => {
let trim = token.left_offset.visible;
let section = TextElement::Section { text: token.with_variant(section) };
Tree::text_literal(default(), vec![section], default(), trim)
Tree::text_literal(default(), default(), vec![section], default(), default(), trim)
}
token::Variant::TextEscape(escape) => {
let trim = token.left_offset.visible;
let token = token.with_variant(escape);
let section = TextElement::Escape { token };
Tree::text_literal(default(), vec![section], default(), trim)
Tree::text_literal(default(), default(), vec![section], default(), default(), trim)
}
token::Variant::TextEnd(_) if token.code.is_empty() =>
Tree::text_literal(default(), default(), default(), default(), true, default()),
token::Variant::TextEnd(close) =>
Tree::text_literal(default(), default(), Some(token.with_variant(close)), default()),
Tree::text_literal(default(), default(), default(), Some(token.with_variant(close)), true, default()),
token::Variant::TextInitialNewline(_) =>
Tree::text_literal(default(), Some(token::newline(token.left_offset, token.code)), default(), default(), default(), default()),
token::Variant::Wildcard(wildcard) => Tree::wildcard(token.with_variant(wildcard), default()),
token::Variant::AutoScope(t) => Tree::auto_scope(token.with_variant(t)),
token::Variant::OpenSymbol(s) =>
@ -993,6 +1030,7 @@ pub fn recurse_left_mut_while<'s>(
| Variant::Annotated(_)
| Variant::OperatorFunction(_)
| Variant::OperatorTypeSignature(_)
| Variant::Documented(_)
| Variant::Tuple(_) => break,
// Optional LHS.
Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs, .. })
@ -1214,6 +1252,17 @@ impl<'s> span::Builder<'s> for u32 {
}
}
impl<'s, 'a> TreeVisitable<'s, 'a> for bool {}
impl<'s, 'a> TreeVisitableMut<'s, 'a> for bool {}
impl<'a, 's> SpanVisitable<'s, 'a> for bool {}
impl<'a, 's> SpanVisitableMut<'s, 'a> for bool {}
impl<'a, 's> ItemVisitable<'s, 'a> for bool {}
impl<'s> span::Builder<'s> for bool {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span
}
}
// === TreeVisitable special cases ===

View File

@ -57,11 +57,15 @@ pub fn body_from_lines<'s>(lines: impl IntoIterator<Item = Line<'s>>) -> Tree<'s
while let Some(line) = lines.next() {
let mut statement = line.map_expression(expression_to_statement);
if let Some(Tree {
variant: box Variant::Annotated(Annotated { newlines, expression, .. }),
variant:
box Variant::Annotated(Annotated { newlines, expression, .. })
| box Variant::Documented(Documented { newlines, expression, .. }),
..
}) = &mut statement.expression
{
while expression.is_none() && let Some(line) = lines.next() {
while expression.is_none() &&
let Some(line) = lines.next()
{
let statement = line.map_expression(expression_to_statement);
newlines.push(statement.newline);
*expression = statement.expression;