mirror of
https://github.com/enso-org/enso.git
synced 2024-11-30 05:35:09 +03:00
Lex doc comments and attach text to AST (#3795)
- New `Documented` node attaches documentation, lexed as a raw text literal, to a statement. - Handle a case of lambdas with body blocks.
This commit is contained in:
parent
e9260227c4
commit
2740406f93
@ -666,7 +666,6 @@ public class EnsoCompilerTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void testLambdaBody() throws Exception {
|
||||
parseTest("""
|
||||
list =
|
||||
|
32
lib/rust/parser/debug/src/bin/lexer.rs
Normal file
32
lib/rust/parser/debug/src/bin/lexer.rs
Normal file
@ -0,0 +1,32 @@
|
||||
//! Run the lexer from the command line, for understanding the early stages of the parser.
|
||||
|
||||
// === Features ===
|
||||
#![feature(exact_size_is_empty)]
|
||||
#![feature(let_chains)]
|
||||
#![feature(if_let_guard)]
|
||||
// === Standard Linter Configuration ===
|
||||
#![deny(non_ascii_idents)]
|
||||
#![warn(unsafe_code)]
|
||||
#![allow(clippy::bool_to_int_with_if)]
|
||||
#![allow(clippy::let_and_return)]
|
||||
// === Non-Standard Linter Configuration ===
|
||||
#![allow(clippy::option_map_unit_fn)]
|
||||
#![allow(clippy::precedence)]
|
||||
#![allow(dead_code)]
|
||||
#![deny(unconditional_recursion)]
|
||||
#![warn(missing_copy_implementations)]
|
||||
#![warn(missing_debug_implementations)]
|
||||
#![warn(missing_docs)]
|
||||
#![warn(trivial_casts)]
|
||||
#![warn(trivial_numeric_casts)]
|
||||
#![warn(unused_import_braces)]
|
||||
#![warn(unused_qualifications)]
|
||||
|
||||
|
||||
/// Lexer main function used for ad-hoc testing during development.
|
||||
pub fn main() {
|
||||
use std::io::Read;
|
||||
let mut input = String::new();
|
||||
std::io::stdin().read_to_string(&mut input).unwrap();
|
||||
println!("{:#?}", enso_parser::lexer::run(&input));
|
||||
}
|
@ -71,11 +71,6 @@ fn section_simple() {
|
||||
test("a +", expected_rhs);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn comments() {
|
||||
test("# a b c", block![()()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inline_if() {
|
||||
#[rustfmt::skip]
|
||||
@ -102,6 +97,43 @@ fn else_block() {
|
||||
}
|
||||
|
||||
|
||||
// === Comments ===
|
||||
|
||||
#[test]
|
||||
fn plain_comments() {
|
||||
test("# a b c", block![()()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doc_comments() {
|
||||
#[rustfmt::skip]
|
||||
let lines = vec![
|
||||
"## The Identity Function",
|
||||
"",
|
||||
" Arguments:",
|
||||
" - x: value to do nothing to",
|
||||
"id x = x",
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
test(&lines.join("\n"), block![
|
||||
(Documented
|
||||
#((Section "# The Identity Function\n")
|
||||
(Section "\n")
|
||||
(Section "Arguments:\n")
|
||||
(Section "- x: value to do nothing to"))
|
||||
#(())
|
||||
(Function (Ident id) #((() (Ident x) () ())) "=" (Ident x)))]);
|
||||
#[rustfmt::skip]
|
||||
let lines = vec![
|
||||
" ## Test indent handling",
|
||||
" foo",
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
test(&lines.join("\n"), block![
|
||||
(Documented #((Section "# Test indent handling")) #(()) (Ident foo))]);
|
||||
}
|
||||
|
||||
|
||||
// === Type Definitions ===
|
||||
|
||||
#[test]
|
||||
@ -491,11 +523,15 @@ fn multiple_operator_error() {
|
||||
|
||||
#[test]
|
||||
fn precedence() {
|
||||
let code = ["x * y + z"];
|
||||
let expected = block![
|
||||
(OprApp (OprApp (Ident x) (Ok "*") (Ident y)) (Ok "+") (Ident z))
|
||||
#[rustfmt::skip]
|
||||
let cases = [
|
||||
("x * y + z", block![(OprApp (OprApp (Ident x) (Ok "*") (Ident y)) (Ok "+") (Ident z))]),
|
||||
("x + y * z", block![(OprApp (Ident x) (Ok "+") (OprApp (Ident y) (Ok "*") (Ident z)))]),
|
||||
("w + x + y * z", block![
|
||||
(OprApp (OprApp (Ident w) (Ok "+") (Ident x)) (Ok "+")
|
||||
(OprApp (Ident y) (Ok "*") (Ident z)))]),
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
cases.into_iter().for_each(|(code, expected)| test(code, expected));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -507,6 +543,15 @@ fn right_associative_operators() {
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn left_associative_operators() {
|
||||
let code = ["x + y + z"];
|
||||
let expected = block![
|
||||
(OprApp (OprApp (Ident x) (Ok "+") (Ident y)) (Ok "+") (Ident z))
|
||||
];
|
||||
test(&code.join("\n"), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pipeline_operators() {
|
||||
test("f <| a", block![(OprApp (Ident f) (Ok "<|") (Ident a))]);
|
||||
@ -841,6 +886,27 @@ x"#;
|
||||
(Ident x)
|
||||
];
|
||||
test(code, expected);
|
||||
let code = " x = \"\"\"\n Indented multiline\n x";
|
||||
#[rustfmt::skip]
|
||||
let expected = block![
|
||||
(Assignment (Ident x) "=" (TextLiteral #((Section "Indented multiline"))))
|
||||
(Ident x)
|
||||
];
|
||||
test(code, expected);
|
||||
let code = "'''\n \\nEscape at start\n";
|
||||
#[rustfmt::skip]
|
||||
let expected = block![
|
||||
(TextLiteral #((Escape '\n') (Section "Escape at start\n")))
|
||||
];
|
||||
test(code, expected);
|
||||
let code = "x =\n x = '''\n x\nx";
|
||||
#[rustfmt::skip]
|
||||
let expected = block![
|
||||
(Function (Ident x) #() "="
|
||||
(BodyBlock #((Assignment (Ident x) "=" (TextLiteral #((Section "x")))))))
|
||||
(Ident x)
|
||||
];
|
||||
test(code, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -910,6 +976,8 @@ fn old_lambdas() {
|
||||
("x-> y", block![(OprApp (Ident x) (Ok "->") (Ident y))]),
|
||||
("x->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]),
|
||||
("x ->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]),
|
||||
("f x->\n y", block![
|
||||
(App (Ident f) (OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y)))))]),
|
||||
];
|
||||
cases.into_iter().for_each(|(code, expected)| test(code, expected));
|
||||
}
|
||||
|
@ -622,7 +622,7 @@ fn analyze_operator(token: &str) -> token::OperatorProperties {
|
||||
// Operators that can be unary.
|
||||
"\\" =>
|
||||
return operator
|
||||
.with_unary_prefix_mode(token::Precedence::min())
|
||||
.with_unary_prefix_mode(token::Precedence::min_valid())
|
||||
.as_compile_time_operation(),
|
||||
"~" =>
|
||||
return operator
|
||||
@ -778,8 +778,9 @@ impl<'s> Lexer<'s> {
|
||||
impl<'s> Lexer<'s> {
|
||||
/// Read a text literal.
|
||||
fn text(&mut self) {
|
||||
let quote_char = match self.current_char {
|
||||
Some(char @ ('"' | '\'')) => char,
|
||||
let (quote_char, text_type) = match self.current_char {
|
||||
Some(char @ '"') => (char, TextType::Raw),
|
||||
Some(char @ '\'') => (char, TextType::Interpolated),
|
||||
Some('`') => {
|
||||
if let Some(state) = self.stack.pop() {
|
||||
self.end_splice(state);
|
||||
@ -791,86 +792,85 @@ impl<'s> Lexer<'s> {
|
||||
}
|
||||
_ => return,
|
||||
};
|
||||
let indent = self.last_spaces_visible_offset;
|
||||
let indent = self.current_block_indent;
|
||||
let open_quote_start = self.mark();
|
||||
self.last_spaces_visible_offset = VisibleOffset(0);
|
||||
self.last_spaces_offset = Bytes(0);
|
||||
self.take_next();
|
||||
let mut multiline = false;
|
||||
// At least two quote characters.
|
||||
if let Some(char) = self.current_char && char == quote_char {
|
||||
let close_quote_start = self.mark();
|
||||
self.take_next();
|
||||
let mut multiline = false;
|
||||
// If more than two quote characters: Start a multiline quote.
|
||||
while let Some(char) = self.current_char && char == quote_char {
|
||||
multiline = true;
|
||||
self.take_next();
|
||||
}
|
||||
if multiline {
|
||||
while self.current_char.is_some() {
|
||||
let mut newline = self.take_1('\r');
|
||||
newline = newline || self.take_1('\n');
|
||||
if newline {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let before_space = self.mark();
|
||||
self.spaces_after_lexeme();
|
||||
let text_start = self.mark();
|
||||
let token = self.make_token(open_quote_start, before_space,
|
||||
token::Variant::TextStart(token::variant::TextStart()));
|
||||
self.output.push(token);
|
||||
let interpolate = quote_char == '\'';
|
||||
self.text_content(Some(text_start), None, interpolate, State::MultilineText { indent }, Some(indent));
|
||||
self.multiline_text(open_quote_start, indent, text_type);
|
||||
return;
|
||||
} else {
|
||||
// Exactly two quote characters: Open and shut case.
|
||||
let close_quote_end = self.mark();
|
||||
let token = self.make_token(open_quote_start, close_quote_start.clone(),
|
||||
token::Variant::TextStart(token::variant::TextStart()));
|
||||
token::Variant::text_start());
|
||||
self.output.push(token);
|
||||
let token = self.make_token(close_quote_start, close_quote_end,
|
||||
token::Variant::TextEnd(token::variant::TextEnd()));
|
||||
token::Variant::text_end());
|
||||
self.output.push(token);
|
||||
}
|
||||
} else {
|
||||
// One quote followed by non-quote character: Inline quote.
|
||||
let open_quote_end = self.mark();
|
||||
let token = self.make_token(open_quote_start, open_quote_end,
|
||||
token::Variant::TextStart(token::variant::TextStart()));
|
||||
token::Variant::text_start());
|
||||
self.output.push(token);
|
||||
self.inline_quote(quote_char);
|
||||
self.inline_quote(quote_char, text_type);
|
||||
}
|
||||
self.spaces_after_lexeme();
|
||||
}
|
||||
|
||||
fn inline_quote(&mut self, quote_char: char) {
|
||||
if self.text_content(None, Some(quote_char), quote_char == '\'', State::InlineText, None) {
|
||||
return;
|
||||
}
|
||||
if let Some(char) = self.current_char && char == quote_char {
|
||||
let text_end = self.mark();
|
||||
self.take_next();
|
||||
let close_quote_end = self.mark();
|
||||
let token = self.make_token(text_end, close_quote_end,
|
||||
token::Variant::TextEnd(token::variant::TextEnd()));
|
||||
self.output.push(token);
|
||||
fn multiline_text(
|
||||
&mut self,
|
||||
open_quote_start: (Bytes, Offset<'s>),
|
||||
indent: VisibleOffset,
|
||||
text_type: TextType,
|
||||
) {
|
||||
let open_quote_end = self.mark();
|
||||
let token =
|
||||
self.make_token(open_quote_start, open_quote_end.clone(), token::Variant::text_start());
|
||||
self.output.push(token);
|
||||
if text_type.expects_initial_newline() && let Some(newline) = self.line_break() {
|
||||
self.output.push(newline.with_variant(token::Variant::text_initial_newline()));
|
||||
}
|
||||
let text_start = self.mark();
|
||||
self.text_content(
|
||||
Some(text_start),
|
||||
None,
|
||||
text_type.is_interpolated(),
|
||||
State::MultilineText { indent },
|
||||
Some(indent),
|
||||
);
|
||||
}
|
||||
|
||||
fn inline_quote(&mut self, quote_char: char, text_type: TextType) {
|
||||
let is_interpolated = text_type.is_interpolated();
|
||||
self.text_content(None, quote_char.into(), is_interpolated, State::InlineText, None);
|
||||
}
|
||||
|
||||
fn end_splice(&mut self, state: State) {
|
||||
let splice_quote_start = self.mark();
|
||||
self.take_next();
|
||||
let splice_quote_end = self.mark();
|
||||
let token = self.make_token(
|
||||
splice_quote_start,
|
||||
splice_quote_end,
|
||||
token::Variant::CloseSymbol(token::variant::CloseSymbol()),
|
||||
);
|
||||
let token =
|
||||
self.make_token(splice_quote_start, splice_quote_end, token::Variant::close_symbol());
|
||||
self.output.push(token);
|
||||
match state {
|
||||
State::InlineText => self.inline_quote('\''),
|
||||
State::MultilineText { indent } => self.text_lines(indent, true),
|
||||
State::InlineText => self.inline_quote('\'', TextType::Interpolated),
|
||||
State::MultilineText { indent } => {
|
||||
self.text_content(None, None, true, State::MultilineText { indent }, Some(indent));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -881,7 +881,7 @@ impl<'s> Lexer<'s> {
|
||||
interpolate: bool,
|
||||
state: State,
|
||||
multiline: Option<VisibleOffset>,
|
||||
) -> bool {
|
||||
) -> TextEndedAt {
|
||||
let mut text_start = start.unwrap_or_else(|| self.mark());
|
||||
while let Some(char) = self.current_char {
|
||||
if closing_char == Some(char) || (multiline.is_none() && is_newline_char(char)) {
|
||||
@ -894,31 +894,27 @@ impl<'s> Lexer<'s> {
|
||||
let indent = multiline.unwrap();
|
||||
let text_end = self.mark();
|
||||
self.spaces_after_lexeme();
|
||||
if let Some(char) = self.current_char {
|
||||
if self.last_spaces_visible_offset <= indent && !is_newline_char(char) {
|
||||
if let Some(char) = self.current_char && !is_newline_char(char) {
|
||||
let block_indent = self.last_spaces_visible_offset;
|
||||
if block_indent <= indent {
|
||||
let token = self.make_token(
|
||||
text_start,
|
||||
before_newline.clone(),
|
||||
token::Variant::TextSection(token::variant::TextSection()),
|
||||
token::Variant::text_section(),
|
||||
);
|
||||
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
|
||||
self.output.push(token);
|
||||
}
|
||||
let token = self.make_token(
|
||||
before_newline,
|
||||
text_end,
|
||||
token::Variant::Newline(token::variant::Newline()),
|
||||
);
|
||||
self.output.push(Token::from(token::text_end("", "")));
|
||||
self.end_blocks(block_indent);
|
||||
let token =
|
||||
self.make_token(before_newline, text_end, token::Variant::newline());
|
||||
self.output.push(token);
|
||||
self.spaces_after_lexeme();
|
||||
return false;
|
||||
return TextEndedAt::End;
|
||||
}
|
||||
};
|
||||
let token = self.make_token(
|
||||
text_start,
|
||||
text_end.clone(),
|
||||
token::Variant::TextSection(token::variant::TextSection()),
|
||||
);
|
||||
let token =
|
||||
self.make_token(text_start, text_end.clone(), token::Variant::text_section());
|
||||
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
|
||||
self.output.push(token);
|
||||
}
|
||||
@ -926,27 +922,30 @@ impl<'s> Lexer<'s> {
|
||||
continue;
|
||||
}
|
||||
if interpolate && char == '\\' {
|
||||
let backslash_start = self.mark();
|
||||
let mut backslash_start = self.mark();
|
||||
self.take_next();
|
||||
if let Some(char) = self.current_char {
|
||||
let token = self.make_token(
|
||||
text_start,
|
||||
text_start.clone(),
|
||||
backslash_start.clone(),
|
||||
token::Variant::TextSection(token::variant::TextSection()),
|
||||
token::Variant::text_section(),
|
||||
);
|
||||
if !token.code.is_empty() {
|
||||
if token.code.is_empty() {
|
||||
backslash_start = text_start.clone();
|
||||
} else {
|
||||
self.output.push(token);
|
||||
}
|
||||
text_start = self.text_escape(backslash_start, char);
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if interpolate && char == '`' {
|
||||
let splice_quote_start = self.mark();
|
||||
let token = self.make_token(
|
||||
text_start,
|
||||
splice_quote_start.clone(),
|
||||
token::Variant::TextSection(token::variant::TextSection()),
|
||||
token::Variant::text_section(),
|
||||
);
|
||||
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
|
||||
self.output.push(token);
|
||||
@ -956,24 +955,28 @@ impl<'s> Lexer<'s> {
|
||||
let token = self.make_token(
|
||||
splice_quote_start,
|
||||
splice_quote_end.clone(),
|
||||
token::Variant::OpenSymbol(token::variant::OpenSymbol()),
|
||||
token::Variant::open_symbol(),
|
||||
);
|
||||
self.output.push(token);
|
||||
self.stack.push(state);
|
||||
return true;
|
||||
return TextEndedAt::Splice;
|
||||
}
|
||||
self.take_next();
|
||||
}
|
||||
let text_end = self.mark();
|
||||
let token = self.make_token(
|
||||
text_start,
|
||||
text_end,
|
||||
token::Variant::TextSection(token::variant::TextSection()),
|
||||
);
|
||||
let token = self.make_token(text_start, text_end.clone(), token::Variant::text_section());
|
||||
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
|
||||
self.output.push(token);
|
||||
}
|
||||
false
|
||||
let end_token = if self.current_char == closing_char {
|
||||
self.take_next();
|
||||
let close_quote_end = self.mark();
|
||||
self.make_token(text_end, close_quote_end, token::Variant::text_end())
|
||||
} else {
|
||||
Token::from(token::text_end("", ""))
|
||||
};
|
||||
self.output.push(end_token);
|
||||
TextEndedAt::End
|
||||
}
|
||||
|
||||
fn text_escape(
|
||||
@ -1011,7 +1014,7 @@ impl<'s> Lexer<'s> {
|
||||
let token = self.make_token(
|
||||
backslash_start,
|
||||
sequence_end.clone(),
|
||||
token::Variant::TextEscape(token::variant::TextEscape(value)),
|
||||
token::Variant::text_escape(value),
|
||||
);
|
||||
self.output.push(token);
|
||||
sequence_end
|
||||
@ -1025,6 +1028,7 @@ impl<'s> Lexer<'s> {
|
||||
'r' => Some('\x0D'),
|
||||
't' => Some('\x09'),
|
||||
'v' => Some('\x0B'),
|
||||
'e' => Some('\x1B'),
|
||||
'\\' => Some('\\'),
|
||||
'"' => Some('"'),
|
||||
'\'' => Some('\''),
|
||||
@ -1036,24 +1040,13 @@ impl<'s> Lexer<'s> {
|
||||
let token = self.make_token(
|
||||
backslash_start,
|
||||
escape_end.clone(),
|
||||
token::Variant::TextEscape(token::variant::TextEscape(value)),
|
||||
token::Variant::text_escape(value),
|
||||
);
|
||||
self.output.push(token);
|
||||
escape_end
|
||||
}
|
||||
}
|
||||
|
||||
/// Read the lines of a text literal.
|
||||
fn text_lines(&mut self, indent: VisibleOffset, is_interpolated: bool) {
|
||||
self.text_content(
|
||||
None,
|
||||
None,
|
||||
is_interpolated,
|
||||
State::MultilineText { indent },
|
||||
Some(indent),
|
||||
);
|
||||
}
|
||||
|
||||
fn mark(&mut self) -> (Bytes, Offset<'s>) {
|
||||
let start = self.current_offset;
|
||||
let left_offset_start = start - self.last_spaces_offset;
|
||||
@ -1078,6 +1071,29 @@ impl<'s> Lexer<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
enum TextEndedAt {
|
||||
Splice,
|
||||
End,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Copy, Clone)]
|
||||
enum TextType {
|
||||
Raw,
|
||||
Interpolated,
|
||||
Documentation,
|
||||
}
|
||||
|
||||
impl TextType {
|
||||
fn is_interpolated(self) -> bool {
|
||||
self == TextType::Interpolated
|
||||
}
|
||||
|
||||
fn expects_initial_newline(self) -> bool {
|
||||
self != TextType::Documentation
|
||||
}
|
||||
}
|
||||
|
||||
/// Move whitespace characters from the end of `left` to the beginning of `right` until the visible
|
||||
/// length of `left` is not longer than `target`.
|
||||
#[allow(unsafe_code)]
|
||||
@ -1137,14 +1153,16 @@ impl<'s> Lexer<'s> {
|
||||
}
|
||||
|
||||
fn comment(&mut self) {
|
||||
if let Some(current) = self.current_char {
|
||||
if current == '#' {
|
||||
self.submit_line_as(token::Variant::newline());
|
||||
let initial_ident = self.current_block_indent;
|
||||
let check_indent = |this: &mut Self| this.current_block_indent > initial_ident;
|
||||
while self.run_and_check_if_progressed(|t| t.newline()) && check_indent(self) {
|
||||
self.submit_line_as(token::Variant::newline());
|
||||
}
|
||||
if let Some('#') = self.current_char {
|
||||
let indent = self.current_block_indent;
|
||||
let start = self.mark();
|
||||
self.take_next();
|
||||
if let Some('#') = self.current_char {
|
||||
self.multiline_text(start, indent, TextType::Documentation);
|
||||
} else {
|
||||
self.take_rest_of_line();
|
||||
let end_line = self.mark();
|
||||
self.output.push(self.make_token(start, end_line, token::Variant::newline()));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1180,23 +1198,32 @@ impl<'s> Lexer<'s> {
|
||||
self.submit_token(block_start);
|
||||
self.start_block(block_indent);
|
||||
}
|
||||
while block_indent < self.current_block_indent {
|
||||
let previous_indent = self.block_indent_stack.last().copied().unwrap_or_default();
|
||||
if block_indent > previous_indent {
|
||||
// The new line indent is smaller than current block but bigger than the
|
||||
// previous one. We are treating the line as belonging to the
|
||||
// block. The warning should be reported by parser.
|
||||
break;
|
||||
}
|
||||
self.end_block();
|
||||
let block_end = self.marker_token(token::Variant::block_end());
|
||||
self.submit_token(block_end);
|
||||
}
|
||||
self.end_blocks(block_indent);
|
||||
self.submit_token(token.with_variant(token::Variant::newline()));
|
||||
newlines.drain(..).for_each(|token| self.submit_token(token));
|
||||
self.token_storage.set_from(newlines);
|
||||
}
|
||||
}
|
||||
|
||||
fn end_blocks(&mut self, block_indent: VisibleOffset) {
|
||||
while block_indent < self.current_block_indent {
|
||||
let Some(previous_indent) = self.block_indent_stack.last().copied() else {
|
||||
// If the file starts at indent > 0, we treat that as the root indent level
|
||||
// instead of creating a sub-block. If indent then decreases below that level,
|
||||
// there's no block to exit.
|
||||
break
|
||||
};
|
||||
if block_indent > previous_indent {
|
||||
// The new line indent is smaller than current block but bigger than the
|
||||
// previous one. We are treating the line as belonging to the
|
||||
// block. The warning should be reported by parser.
|
||||
break;
|
||||
}
|
||||
self.end_block();
|
||||
let block_end = self.marker_token(token::Variant::block_end());
|
||||
self.submit_token(block_end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1230,6 +1257,7 @@ impl<'s> Lexer<'s> {
|
||||
/// as start and end tokens).
|
||||
pub fn run_flat(mut self) -> ParseResult<Vec<Token<'s>>> {
|
||||
self.spaces_after_lexeme();
|
||||
self.current_block_indent = self.last_spaces_visible_offset;
|
||||
let mut any_parser_matched = true;
|
||||
while any_parser_matched {
|
||||
any_parser_matched = false;
|
||||
@ -1303,11 +1331,6 @@ pub fn build_block_hierarchy(tokens: Vec<Token<'_>>) -> Vec<Item<'_>> {
|
||||
// === Tests ===
|
||||
// =============
|
||||
|
||||
/// Lexer main function used for ad-hoc testing during development.
|
||||
pub fn main() {
|
||||
println!("{:#?}", run_flat("\n foo\n bar"));
|
||||
}
|
||||
|
||||
/// Test utils for fast mock tokens creation.
|
||||
pub mod test {
|
||||
use super::*;
|
||||
|
@ -475,7 +475,7 @@ fn splice_body(segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
|
||||
let expression = segment.result.tokens();
|
||||
let expression = operator::resolve_operator_precedence_if_non_empty(expression);
|
||||
let splice = syntax::tree::TextElement::Splice { open, expression, close };
|
||||
syntax::Tree::text_literal(default(), vec![splice], default(), default())
|
||||
syntax::Tree::text_literal(default(), default(), vec![splice], default(), default(), default())
|
||||
}
|
||||
|
||||
fn into_open_symbol(token: syntax::token::Token) -> syntax::token::OpenSymbol {
|
||||
|
@ -57,7 +57,18 @@ fn check_file(path: &str, mut code: &str) {
|
||||
let errors = RefCell::new(vec![]);
|
||||
ast.map(|tree| {
|
||||
if let enso_parser::syntax::tree::Variant::Invalid(err) = &*tree.variant {
|
||||
errors.borrow_mut().push((err.clone(), tree.span.clone()));
|
||||
let error = format!("{}: {}", err.error.message, tree.code());
|
||||
errors.borrow_mut().push((error, tree.span.clone()));
|
||||
} else if let enso_parser::syntax::tree::Variant::TextLiteral(text) = &*tree.variant {
|
||||
for element in &text.elements {
|
||||
if let enso_parser::syntax::tree::TextElement::Escape { token } = element {
|
||||
if token.variant.value.is_none() {
|
||||
let escape = token.code.to_string();
|
||||
let error = format!("Invalid escape sequence: {escape}");
|
||||
errors.borrow_mut().push((error, tree.span.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
for (error, span) in &*errors.borrow() {
|
||||
@ -77,9 +88,9 @@ fn check_file(path: &str, mut code: &str) {
|
||||
char += 1;
|
||||
}
|
||||
}
|
||||
eprintln!("{path}:{line}:{char}: {}", &error.error.message);
|
||||
eprintln!("{path}:{line}:{char}: {}", &error);
|
||||
} else {
|
||||
eprintln!("{path}:?:?: {}", &error.error.message);
|
||||
eprintln!("{path}:?:?: {}", &error);
|
||||
};
|
||||
}
|
||||
for (parsed, original) in ast.code().lines().zip(code.lines()) {
|
||||
|
@ -146,27 +146,35 @@ struct ExpressionBuilder<'s> {
|
||||
impl<'s> ExpressionBuilder<'s> {
|
||||
/// Extend the expression with an operand.
|
||||
pub fn operand(&mut self, mut operand: Operand<syntax::Tree<'s>>) {
|
||||
if self.prev_type.replace(ItemType::Ast) == Some(ItemType::Ast) {
|
||||
if let syntax::tree::Variant::OprApp(
|
||||
syntax::tree::OprApp { lhs: Some(_), opr: Ok(opr), rhs: None })
|
||||
= &*self.output.last().unwrap().value.variant
|
||||
&& opr.properties.associativity() == token::Associativity::Right
|
||||
&& opr.left_offset.is_empty() {
|
||||
let syntax::Tree { span, variant: box syntax::tree::Variant::OprApp(
|
||||
syntax::tree::OprApp { lhs: Some(mut lhs), opr: Ok(operator), rhs: None }) }
|
||||
= self.output.pop().unwrap().value
|
||||
else { unreachable!() };
|
||||
lhs.span.left_offset += span.left_offset;
|
||||
let precedence = operator.properties.binary_infix_precedence().unwrap();
|
||||
let associativity = operator.properties.associativity();
|
||||
let opr = Arity::Unary(Unary::LeftCurriedBinary { lhs, operator });
|
||||
self.operator_stack.push(Operator { precedence, associativity, opr });
|
||||
} else {
|
||||
operand =
|
||||
self.output.pop().unwrap().map(|lhs| syntax::tree::apply(lhs, operand.into()));
|
||||
}
|
||||
if self.prev_type == Some(ItemType::Ast) {
|
||||
// Application is a token-less operator implied by juxtaposition of operands.
|
||||
let precedence = token::Precedence::application();
|
||||
let associativity = token::Associativity::Left;
|
||||
let arity = Arity::Binary {
|
||||
tokens: default(),
|
||||
lhs_section_termination: default(),
|
||||
};
|
||||
self.push_operator(precedence, associativity, arity);
|
||||
}
|
||||
if let box syntax::tree::Variant::OprApp(
|
||||
syntax::tree::OprApp { lhs, opr: Ok(operator), rhs: None })
|
||||
= &mut operand.value.variant
|
||||
&& lhs.is_some()
|
||||
&& operator.properties.associativity() == token::Associativity::Right
|
||||
&& operator.left_offset.is_empty() {
|
||||
// Right-associative operators become unary-prefix operators when left-curried.
|
||||
// E.g. `f = x-> y-> z` contains lambdas, not partially-applied arrow operators.
|
||||
let mut lhs = lhs.take().unwrap();
|
||||
lhs.span.left_offset += operand.value.span.left_offset;
|
||||
let associativity = operator.properties.associativity();
|
||||
let precedence = operator.properties.binary_infix_precedence().unwrap();
|
||||
let operator = operator.clone();
|
||||
let arity = Arity::Unary(Unary::LeftCurriedBinary { lhs, operator });
|
||||
self.push_operator(precedence, associativity, arity);
|
||||
return;
|
||||
}
|
||||
self.output.push(operand);
|
||||
self.prev_type = Some(ItemType::Ast);
|
||||
}
|
||||
|
||||
/// Extend the expression with an operator.
|
||||
|
@ -290,6 +290,7 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
|
||||
#[reflect(as = "char")]
|
||||
pub value: Option<char>,
|
||||
},
|
||||
TextInitialNewline,
|
||||
Invalid,
|
||||
}
|
||||
}}}
|
||||
@ -350,11 +351,14 @@ impl OperatorProperties {
|
||||
|
||||
/// Return a copy of this operator, with the given binary infix precedence.
|
||||
pub fn with_binary_infix_precedence(self, value: usize) -> Self {
|
||||
Self { binary_infix_precedence: Some(Precedence { value }), ..self }
|
||||
let precedence = Precedence { value };
|
||||
debug_assert!(precedence > Precedence::min());
|
||||
Self { binary_infix_precedence: Some(precedence), ..self }
|
||||
}
|
||||
|
||||
/// Return a copy of this operator, with unary prefix parsing allowed.
|
||||
pub fn with_unary_prefix_mode(self, precedence: Precedence) -> Self {
|
||||
debug_assert!(precedence > Precedence::min());
|
||||
Self { unary_prefix_precedence: Some(precedence), ..self }
|
||||
}
|
||||
|
||||
@ -489,19 +493,29 @@ impl OperatorProperties {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Reflect, Deserialize, PartialOrd, Ord)]
|
||||
pub struct Precedence {
|
||||
/// A numeric value determining precedence order.
|
||||
pub value: usize,
|
||||
value: usize,
|
||||
}
|
||||
|
||||
impl Precedence {
|
||||
/// Return a precedence that is not higher than any other precedence.
|
||||
/// Return a precedence that is lower than the precedence of any operator.
|
||||
pub fn min() -> Self {
|
||||
Precedence { value: 0 }
|
||||
}
|
||||
|
||||
/// Return the precedence for any operator.
|
||||
pub fn min_valid() -> Self {
|
||||
Precedence { value: 1 }
|
||||
}
|
||||
|
||||
/// Return a precedence that is not lower than any other precedence.
|
||||
pub fn max() -> Self {
|
||||
Precedence { value: 100 }
|
||||
}
|
||||
|
||||
/// Return the precedence of application.
|
||||
pub fn application() -> Self {
|
||||
Precedence { value: 80 }
|
||||
}
|
||||
}
|
||||
|
||||
/// Associativity (left or right).
|
||||
|
@ -132,10 +132,17 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
},
|
||||
TextLiteral {
|
||||
pub open: Option<token::TextStart<'s>>,
|
||||
/// If there is no text on the first line of a multi-line literal, the initial newline
|
||||
/// is non-semantic and included here. If there is text on the line with the opening
|
||||
/// quote, this will be empty and the first newline, if any, will be in a text section.
|
||||
pub newline: Option<token::Newline<'s>>,
|
||||
pub elements: Vec<TextElement<'s>>,
|
||||
pub close: Option<token::TextEnd<'s>>,
|
||||
#[serde(skip)]
|
||||
#[reflect(skip)]
|
||||
pub closed: bool,
|
||||
#[serde(skip)]
|
||||
#[reflect(skip)]
|
||||
pub trim: VisibleOffset,
|
||||
},
|
||||
/// A simple application, like `print "hello"`.
|
||||
@ -327,6 +334,19 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
|
||||
pub newlines: Vec<token::Newline<'s>>,
|
||||
pub expression: Option<Tree<'s>>,
|
||||
},
|
||||
/// An expression preceded by a doc comment.
|
||||
Documented {
|
||||
pub open: token::TextStart<'s>,
|
||||
/// The documentation text.
|
||||
pub elements: Vec<TextElement<'s>>,
|
||||
#[serde(skip)]
|
||||
#[reflect(skip)]
|
||||
pub trim: VisibleOffset,
|
||||
/// Empty lines between the comment and the item.
|
||||
pub newlines: Vec<token::Newline<'s>>,
|
||||
/// The item being documented.
|
||||
pub expression: Option<Tree<'s>>,
|
||||
},
|
||||
}
|
||||
}};}
|
||||
|
||||
@ -736,8 +756,17 @@ impl<'s> span::Builder<'s> for OperatorDelimitedTree<'s> {
|
||||
/// application has special semantics.
|
||||
pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
|
||||
match (&mut *func.variant, &mut *arg.variant) {
|
||||
(Variant::TextLiteral(lhs), Variant::TextLiteral(rhs)) if lhs.close.is_none() => {
|
||||
join_text_literals(lhs, rhs.clone(), mem::take(&mut arg.span));
|
||||
(Variant::TextLiteral(lhs), Variant::TextLiteral(rhs)) if !lhs.closed => {
|
||||
join_text_literals(lhs, rhs, mem::take(&mut arg.span));
|
||||
if lhs.open.is_some() && lhs.closed {
|
||||
trim_text(lhs.trim, &mut lhs.elements);
|
||||
}
|
||||
if let TextLiteral { open: Some(open), newline: None, elements, closed: true, close: None, trim } = lhs && open.code.starts_with('#') {
|
||||
let mut open = open.clone();
|
||||
open.left_offset += func.span.left_offset;
|
||||
let elements = mem::take(elements);
|
||||
return Tree::documented(open, elements, *trim, default(), default());
|
||||
}
|
||||
func
|
||||
}
|
||||
(Variant::Number(func_ @ Number { base: _, integer: None, fractional_digits: None }),
|
||||
@ -804,8 +833,8 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
|
||||
}
|
||||
|
||||
fn join_text_literals<'s>(
|
||||
lhs: &'_ mut TextLiteral<'s>,
|
||||
mut rhs: TextLiteral<'s>,
|
||||
lhs: &mut TextLiteral<'s>,
|
||||
rhs: &mut TextLiteral<'s>,
|
||||
rhs_span: Span<'s>,
|
||||
) {
|
||||
if rhs.trim != VisibleOffset(0) && (lhs.trim == VisibleOffset(0) || rhs.trim < lhs.trim) {
|
||||
@ -817,28 +846,32 @@ fn join_text_literals<'s>(
|
||||
Some(TextElement::Splice { open, .. }) => open.left_offset += rhs_span.left_offset,
|
||||
None => (),
|
||||
}
|
||||
if let Some(newline) = rhs.newline.take() {
|
||||
lhs.newline = newline.into();
|
||||
}
|
||||
lhs.elements.append(&mut rhs.elements);
|
||||
lhs.close = rhs.close.take();
|
||||
if lhs.open.is_some() {
|
||||
let trim = lhs.trim;
|
||||
let mut remaining = lhs.elements.len();
|
||||
let mut carried_offset = Offset::default();
|
||||
lhs.elements.retain_mut(|e| {
|
||||
remaining -= 1;
|
||||
let (offset, code) = match e {
|
||||
TextElement::Section { text } => (&mut text.left_offset, &mut text.code),
|
||||
TextElement::Escape { token } => (&mut token.left_offset, &mut token.code),
|
||||
TextElement::Splice { open, .. } => (&mut open.left_offset, &mut open.code),
|
||||
};
|
||||
*offset += mem::take(&mut carried_offset);
|
||||
crate::lexer::untrim(trim, offset, code);
|
||||
if remaining != 0 && code.is_empty() {
|
||||
carried_offset = mem::take(offset);
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
}
|
||||
lhs.closed = rhs.closed;
|
||||
}
|
||||
|
||||
fn trim_text(trim: VisibleOffset, elements: &mut Vec<TextElement>) {
|
||||
let mut remaining = elements.len();
|
||||
let mut carried_offset = Offset::default();
|
||||
elements.retain_mut(|e| {
|
||||
remaining -= 1;
|
||||
let (offset, code) = match e {
|
||||
TextElement::Section { text } => (&mut text.left_offset, &mut text.code),
|
||||
TextElement::Escape { token } => (&mut token.left_offset, &mut token.code),
|
||||
TextElement::Splice { open, .. } => (&mut open.left_offset, &mut open.code),
|
||||
};
|
||||
*offset += mem::take(&mut carried_offset);
|
||||
crate::lexer::untrim(trim, offset, code);
|
||||
if remaining != 0 && code.is_empty() {
|
||||
carried_offset = mem::take(offset);
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
}
|
||||
|
||||
/// Join two nodes with an operator, in a way appropriate for their types.
|
||||
@ -920,20 +953,24 @@ impl<'s> From<Token<'s>> for Tree<'s> {
|
||||
token::Variant::NumberBase(base) =>
|
||||
Tree::number(Some(token.with_variant(base)), None, None),
|
||||
token::Variant::TextStart(open) =>
|
||||
Tree::text_literal(Some(token.with_variant(open)), default(), default(), default()),
|
||||
Tree::text_literal(Some(token.with_variant(open)), default(), default(), default(), default(), default()),
|
||||
token::Variant::TextSection(section) => {
|
||||
let trim = token.left_offset.visible;
|
||||
let section = TextElement::Section { text: token.with_variant(section) };
|
||||
Tree::text_literal(default(), vec![section], default(), trim)
|
||||
Tree::text_literal(default(), default(), vec![section], default(), default(), trim)
|
||||
}
|
||||
token::Variant::TextEscape(escape) => {
|
||||
let trim = token.left_offset.visible;
|
||||
let token = token.with_variant(escape);
|
||||
let section = TextElement::Escape { token };
|
||||
Tree::text_literal(default(), vec![section], default(), trim)
|
||||
Tree::text_literal(default(), default(), vec![section], default(), default(), trim)
|
||||
}
|
||||
token::Variant::TextEnd(_) if token.code.is_empty() =>
|
||||
Tree::text_literal(default(), default(), default(), default(), true, default()),
|
||||
token::Variant::TextEnd(close) =>
|
||||
Tree::text_literal(default(), default(), Some(token.with_variant(close)), default()),
|
||||
Tree::text_literal(default(), default(), default(), Some(token.with_variant(close)), true, default()),
|
||||
token::Variant::TextInitialNewline(_) =>
|
||||
Tree::text_literal(default(), Some(token::newline(token.left_offset, token.code)), default(), default(), default(), default()),
|
||||
token::Variant::Wildcard(wildcard) => Tree::wildcard(token.with_variant(wildcard), default()),
|
||||
token::Variant::AutoScope(t) => Tree::auto_scope(token.with_variant(t)),
|
||||
token::Variant::OpenSymbol(s) =>
|
||||
@ -993,6 +1030,7 @@ pub fn recurse_left_mut_while<'s>(
|
||||
| Variant::Annotated(_)
|
||||
| Variant::OperatorFunction(_)
|
||||
| Variant::OperatorTypeSignature(_)
|
||||
| Variant::Documented(_)
|
||||
| Variant::Tuple(_) => break,
|
||||
// Optional LHS.
|
||||
Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs, .. })
|
||||
@ -1214,6 +1252,17 @@ impl<'s> span::Builder<'s> for u32 {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 'a> TreeVisitable<'s, 'a> for bool {}
|
||||
impl<'s, 'a> TreeVisitableMut<'s, 'a> for bool {}
|
||||
impl<'a, 's> SpanVisitable<'s, 'a> for bool {}
|
||||
impl<'a, 's> SpanVisitableMut<'s, 'a> for bool {}
|
||||
impl<'a, 's> ItemVisitable<'s, 'a> for bool {}
|
||||
impl<'s> span::Builder<'s> for bool {
|
||||
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
|
||||
span
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// === TreeVisitable special cases ===
|
||||
|
||||
|
@ -57,11 +57,15 @@ pub fn body_from_lines<'s>(lines: impl IntoIterator<Item = Line<'s>>) -> Tree<'s
|
||||
while let Some(line) = lines.next() {
|
||||
let mut statement = line.map_expression(expression_to_statement);
|
||||
if let Some(Tree {
|
||||
variant: box Variant::Annotated(Annotated { newlines, expression, .. }),
|
||||
variant:
|
||||
box Variant::Annotated(Annotated { newlines, expression, .. })
|
||||
| box Variant::Documented(Documented { newlines, expression, .. }),
|
||||
..
|
||||
}) = &mut statement.expression
|
||||
{
|
||||
while expression.is_none() && let Some(line) = lines.next() {
|
||||
while expression.is_none() &&
|
||||
let Some(line) = lines.next()
|
||||
{
|
||||
let statement = line.map_expression(expression_to_statement);
|
||||
newlines.push(statement.newline);
|
||||
*expression = statement.expression;
|
||||
|
Loading…
Reference in New Issue
Block a user