Lex doc comments and attach text to AST (#3795)

- New `Documented` node attaches documentation, lexed as a raw text literal, to a statement. - Handle a case of lambdas with body blocks.
2024-11-27 05:15:42 +03:00 · 2022-10-14 23:13:32 -07:00 · 2022-10-14 23:13:32 -07:00 · 2740406f93
commit 2740406f93
parent e9260227c4
10 changed files with 383 additions and 175 deletions
--- a/engine/runtime/src/test/java/org/enso/compiler/EnsoCompilerTest.java
+++ b/engine/runtime/src/test/java/org/enso/compiler/EnsoCompilerTest.java
@ -666,7 +666,6 @@ public class EnsoCompilerTest {
  }

  @Test
-  @Ignore
  public void testLambdaBody() throws Exception {
    parseTest("""
    list =
--- a/lib/rust/parser/debug/src/bin/lexer.rs
+++ b/lib/rust/parser/debug/src/bin/lexer.rs
@ -0,0 +1,32 @@
+//! Run the lexer from the command line, for understanding the early stages of the parser.
+
+// === Features ===
+#![feature(exact_size_is_empty)]
+#![feature(let_chains)]
+#![feature(if_let_guard)]
+// === Standard Linter Configuration ===
+#![deny(non_ascii_idents)]
+#![warn(unsafe_code)]
+#![allow(clippy::bool_to_int_with_if)]
+#![allow(clippy::let_and_return)]
+// === Non-Standard Linter Configuration ===
+#![allow(clippy::option_map_unit_fn)]
+#![allow(clippy::precedence)]
+#![allow(dead_code)]
+#![deny(unconditional_recursion)]
+#![warn(missing_copy_implementations)]
+#![warn(missing_debug_implementations)]
+#![warn(missing_docs)]
+#![warn(trivial_casts)]
+#![warn(trivial_numeric_casts)]
+#![warn(unused_import_braces)]
+#![warn(unused_qualifications)]
+
+
+/// Lexer main function used for ad-hoc testing during development.
+pub fn main() {
+    use std::io::Read;
+    let mut input = String::new();
+    std::io::stdin().read_to_string(&mut input).unwrap();
+    println!("{:#?}", enso_parser::lexer::run(&input));
+}
--- a/lib/rust/parser/debug/tests/parse.rs
+++ b/lib/rust/parser/debug/tests/parse.rs
@ -71,11 +71,6 @@ fn section_simple() {
    test("a +", expected_rhs);
 }

-#[test]
-fn comments() {
-    test("# a b c", block![()()]);
-}
-
 #[test]
 fn inline_if() {
    #[rustfmt::skip]
@ -102,6 +97,43 @@ fn else_block() {
 }


+// === Comments ===
+
+#[test]
+fn plain_comments() {
+    test("# a b c", block![()()]);
+}
+
+#[test]
+fn doc_comments() {
+    #[rustfmt::skip]
+    let lines = vec![
+        "## The Identity Function",
+        "",
+        "   Arguments:",
+        "   - x: value to do nothing to",
+        "id x = x",
+    ];
+    #[rustfmt::skip]
+    test(&lines.join("\n"), block![
+        (Documented
+         #((Section "# The Identity Function\n")
+           (Section "\n")
+           (Section "Arguments:\n")
+           (Section "- x: value to do nothing to"))
+         #(())
+         (Function (Ident id) #((() (Ident x) () ())) "=" (Ident x)))]);
+    #[rustfmt::skip]
+    let lines = vec![
+        " ## Test indent handling",
+        " foo",
+    ];
+    #[rustfmt::skip]
+    test(&lines.join("\n"), block![
+        (Documented #((Section "# Test indent handling")) #(()) (Ident foo))]);
+}
+
+
 // === Type Definitions ===

 #[test]
@ -491,11 +523,15 @@ fn multiple_operator_error() {

 #[test]
 fn precedence() {
-    let code = ["x * y + z"];
-    let expected = block![
-        (OprApp (OprApp (Ident x) (Ok "*") (Ident y)) (Ok "+") (Ident z))
+    #[rustfmt::skip]
+    let cases = [
+        ("x * y + z", block![(OprApp (OprApp (Ident x) (Ok "*") (Ident y)) (Ok "+") (Ident z))]),
+        ("x + y * z", block![(OprApp (Ident x) (Ok "+") (OprApp (Ident y) (Ok "*") (Ident z)))]),
+        ("w + x + y * z", block![
+            (OprApp (OprApp (Ident w) (Ok "+") (Ident x)) (Ok "+")
+                    (OprApp (Ident y) (Ok "*") (Ident z)))]),
    ];
-    test(&code.join("\n"), expected);
+    cases.into_iter().for_each(|(code, expected)| test(code, expected));
 }

 #[test]
@ -507,6 +543,15 @@ fn right_associative_operators() {
    test(&code.join("\n"), expected);
 }

+#[test]
+fn left_associative_operators() {
+    let code = ["x + y + z"];
+    let expected = block![
+        (OprApp (OprApp (Ident x) (Ok "+") (Ident y)) (Ok "+") (Ident z))
+    ];
+    test(&code.join("\n"), expected);
+}
+
 #[test]
 fn pipeline_operators() {
    test("f <| a", block![(OprApp (Ident f) (Ok "<|") (Ident a))]);
@ -841,6 +886,27 @@ x"#;
        (Ident x)
    ];
    test(code, expected);
+    let code = "  x = \"\"\"\n    Indented multiline\n  x";
+    #[rustfmt::skip]
+    let expected = block![
+        (Assignment (Ident x) "=" (TextLiteral #((Section "Indented multiline"))))
+        (Ident x)
+    ];
+    test(code, expected);
+    let code = "'''\n    \\nEscape at start\n";
+    #[rustfmt::skip]
+    let expected = block![
+        (TextLiteral #((Escape '\n') (Section "Escape at start\n")))
+    ];
+    test(code, expected);
+    let code = "x =\n x = '''\n  x\nx";
+    #[rustfmt::skip]
+    let expected = block![
+        (Function (Ident x) #() "="
+         (BodyBlock #((Assignment (Ident x) "=" (TextLiteral #((Section "x")))))))
+        (Ident x)
+    ];
+    test(code, expected);
 }

 #[test]
@ -910,6 +976,8 @@ fn old_lambdas() {
        ("x-> y", block![(OprApp (Ident x) (Ok "->") (Ident y))]),
        ("x->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]),
        ("x ->\n y", block![(OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))]),
+        ("f x->\n y", block![
+            (App (Ident f) (OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y)))))]),
    ];
    cases.into_iter().for_each(|(code, expected)| test(code, expected));
 }
--- a/lib/rust/parser/src/lexer.rs
+++ b/lib/rust/parser/src/lexer.rs
@ -622,7 +622,7 @@ fn analyze_operator(token: &str) -> token::OperatorProperties {
        // Operators that can be unary.
        "\\" =>
            return operator
-                .with_unary_prefix_mode(token::Precedence::min())
+                .with_unary_prefix_mode(token::Precedence::min_valid())
                .as_compile_time_operation(),
        "~" =>
            return operator
@ -778,8 +778,9 @@ impl<'s> Lexer<'s> {
 impl<'s> Lexer<'s> {
    /// Read a text literal.
    fn text(&mut self) {
-        let quote_char = match self.current_char {
-            Some(char @ ('"' | '\'')) => char,
+        let (quote_char, text_type) = match self.current_char {
+            Some(char @ '"') => (char, TextType::Raw),
+            Some(char @ '\'') => (char, TextType::Interpolated),
            Some('`') => {
                if let Some(state) = self.stack.pop() {
                    self.end_splice(state);
@ -791,86 +792,85 @@ impl<'s> Lexer<'s> {
            }
            _ => return,
        };
-        let indent = self.last_spaces_visible_offset;
+        let indent = self.current_block_indent;
        let open_quote_start = self.mark();
        self.last_spaces_visible_offset = VisibleOffset(0);
        self.last_spaces_offset = Bytes(0);
        self.take_next();
-        let mut multiline = false;
        // At least two quote characters.
        if let Some(char) = self.current_char && char == quote_char {
            let close_quote_start = self.mark();
            self.take_next();
+            let mut multiline = false;
            // If more than two quote characters: Start a multiline quote.
            while let Some(char) = self.current_char && char == quote_char {
                multiline = true;
                self.take_next();
            }
            if multiline {
-                while self.current_char.is_some() {
-                    let mut newline = self.take_1('\r');
-                    newline = newline || self.take_1('\n');
-                    if newline {
-                        break;
-                    }
-                }
-                let before_space = self.mark();
-                self.spaces_after_lexeme();
-                let text_start = self.mark();
-                let token = self.make_token(open_quote_start, before_space,
-                    token::Variant::TextStart(token::variant::TextStart()));
-                self.output.push(token);
-                let interpolate = quote_char == '\'';
-                self.text_content(Some(text_start), None, interpolate, State::MultilineText { indent }, Some(indent));
+                self.multiline_text(open_quote_start, indent, text_type);
                return;
            } else {
                // Exactly two quote characters: Open and shut case.
                let close_quote_end = self.mark();
                let token = self.make_token(open_quote_start, close_quote_start.clone(),
-                    token::Variant::TextStart(token::variant::TextStart()));
+                    token::Variant::text_start());
                self.output.push(token);
                let token = self.make_token(close_quote_start, close_quote_end,
-                    token::Variant::TextEnd(token::variant::TextEnd()));
+                    token::Variant::text_end());
                self.output.push(token);
            }
        } else {
            // One quote followed by non-quote character: Inline quote.
            let open_quote_end = self.mark();
            let token = self.make_token(open_quote_start, open_quote_end,
-                token::Variant::TextStart(token::variant::TextStart()));
+                token::Variant::text_start());
            self.output.push(token);
-            self.inline_quote(quote_char);
+            self.inline_quote(quote_char, text_type);
        }
        self.spaces_after_lexeme();
    }

-    fn inline_quote(&mut self, quote_char: char) {
-        if self.text_content(None, Some(quote_char), quote_char == '\'', State::InlineText, None) {
-            return;
-        }
-        if let Some(char) = self.current_char && char == quote_char {
-            let text_end = self.mark();
-            self.take_next();
-            let close_quote_end = self.mark();
-            let token = self.make_token(text_end, close_quote_end,
-                                        token::Variant::TextEnd(token::variant::TextEnd()));
-            self.output.push(token);
+    fn multiline_text(
+        &mut self,
+        open_quote_start: (Bytes, Offset<'s>),
+        indent: VisibleOffset,
+        text_type: TextType,
+    ) {
+        let open_quote_end = self.mark();
+        let token =
+            self.make_token(open_quote_start, open_quote_end.clone(), token::Variant::text_start());
+        self.output.push(token);
+        if text_type.expects_initial_newline() && let Some(newline) = self.line_break() {
+            self.output.push(newline.with_variant(token::Variant::text_initial_newline()));
        }
+        let text_start = self.mark();
+        self.text_content(
+            Some(text_start),
+            None,
+            text_type.is_interpolated(),
+            State::MultilineText { indent },
+            Some(indent),
+        );
+    }
+
+    fn inline_quote(&mut self, quote_char: char, text_type: TextType) {
+        let is_interpolated = text_type.is_interpolated();
+        self.text_content(None, quote_char.into(), is_interpolated, State::InlineText, None);
    }

    fn end_splice(&mut self, state: State) {
        let splice_quote_start = self.mark();
        self.take_next();
        let splice_quote_end = self.mark();
-        let token = self.make_token(
-            splice_quote_start,
-            splice_quote_end,
-            token::Variant::CloseSymbol(token::variant::CloseSymbol()),
-        );
+        let token =
+            self.make_token(splice_quote_start, splice_quote_end, token::Variant::close_symbol());
        self.output.push(token);
        match state {
-            State::InlineText => self.inline_quote('\''),
-            State::MultilineText { indent } => self.text_lines(indent, true),
+            State::InlineText => self.inline_quote('\'', TextType::Interpolated),
+            State::MultilineText { indent } => {
+                self.text_content(None, None, true, State::MultilineText { indent }, Some(indent));
+            }
        }
    }

@ -881,7 +881,7 @@ impl<'s> Lexer<'s> {
        interpolate: bool,
        state: State,
        multiline: Option<VisibleOffset>,
-    ) -> bool {
+    ) -> TextEndedAt {
        let mut text_start = start.unwrap_or_else(|| self.mark());
        while let Some(char) = self.current_char {
            if closing_char == Some(char) || (multiline.is_none() && is_newline_char(char)) {
@ -894,31 +894,27 @@ impl<'s> Lexer<'s> {
                let indent = multiline.unwrap();
                let text_end = self.mark();
                self.spaces_after_lexeme();
-                if let Some(char) = self.current_char {
-                    if self.last_spaces_visible_offset <= indent && !is_newline_char(char) {
+                if let Some(char) = self.current_char && !is_newline_char(char) {
+                    let block_indent = self.last_spaces_visible_offset;
+                    if block_indent <= indent {
                        let token = self.make_token(
                            text_start,
                            before_newline.clone(),
-                            token::Variant::TextSection(token::variant::TextSection()),
+                            token::Variant::text_section(),
                        );
                        if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
                            self.output.push(token);
                        }
-                        let token = self.make_token(
-                            before_newline,
-                            text_end,
-                            token::Variant::Newline(token::variant::Newline()),
-                        );
+                        self.output.push(Token::from(token::text_end("", "")));
+                        self.end_blocks(block_indent);
+                        let token =
+                            self.make_token(before_newline, text_end, token::Variant::newline());
                        self.output.push(token);
-                        self.spaces_after_lexeme();
-                        return false;
+                        return TextEndedAt::End;
                    }
                };
-                let token = self.make_token(
-                    text_start,
-                    text_end.clone(),
-                    token::Variant::TextSection(token::variant::TextSection()),
-                );
+                let token =
+                    self.make_token(text_start, text_end.clone(), token::Variant::text_section());
                if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
                    self.output.push(token);
                }
@ -926,27 +922,30 @@ impl<'s> Lexer<'s> {
                continue;
            }
            if interpolate && char == '\\' {
-                let backslash_start = self.mark();
+                let mut backslash_start = self.mark();
                self.take_next();
                if let Some(char) = self.current_char {
                    let token = self.make_token(
-                        text_start,
+                        text_start.clone(),
                        backslash_start.clone(),
-                        token::Variant::TextSection(token::variant::TextSection()),
+                        token::Variant::text_section(),
                    );
-                    if !token.code.is_empty() {
+                    if token.code.is_empty() {
+                        backslash_start = text_start.clone();
+                    } else {
                        self.output.push(token);
                    }
                    text_start = self.text_escape(backslash_start, char);
                    continue;
                }
+                continue;
            }
            if interpolate && char == '`' {
                let splice_quote_start = self.mark();
                let token = self.make_token(
                    text_start,
                    splice_quote_start.clone(),
-                    token::Variant::TextSection(token::variant::TextSection()),
+                    token::Variant::text_section(),
                );
                if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
                    self.output.push(token);
@ -956,24 +955,28 @@ impl<'s> Lexer<'s> {
                let token = self.make_token(
                    splice_quote_start,
                    splice_quote_end.clone(),
-                    token::Variant::OpenSymbol(token::variant::OpenSymbol()),
+                    token::Variant::open_symbol(),
                );
                self.output.push(token);
                self.stack.push(state);
-                return true;
+                return TextEndedAt::Splice;
            }
            self.take_next();
        }
        let text_end = self.mark();
-        let token = self.make_token(
-            text_start,
-            text_end,
-            token::Variant::TextSection(token::variant::TextSection()),
-        );
+        let token = self.make_token(text_start, text_end.clone(), token::Variant::text_section());
        if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
            self.output.push(token);
        }
-        false
+        let end_token = if self.current_char == closing_char {
+            self.take_next();
+            let close_quote_end = self.mark();
+            self.make_token(text_end, close_quote_end, token::Variant::text_end())
+        } else {
+            Token::from(token::text_end("", ""))
+        };
+        self.output.push(end_token);
+        TextEndedAt::End
    }

    fn text_escape(
@ -1011,7 +1014,7 @@ impl<'s> Lexer<'s> {
            let token = self.make_token(
                backslash_start,
                sequence_end.clone(),
-                token::Variant::TextEscape(token::variant::TextEscape(value)),
+                token::Variant::text_escape(value),
            );
            self.output.push(token);
            sequence_end
@ -1025,6 +1028,7 @@ impl<'s> Lexer<'s> {
                'r' => Some('\x0D'),
                't' => Some('\x09'),
                'v' => Some('\x0B'),
+                'e' => Some('\x1B'),
                '\\' => Some('\\'),
                '"' => Some('"'),
                '\'' => Some('\''),
@ -1036,24 +1040,13 @@ impl<'s> Lexer<'s> {
            let token = self.make_token(
                backslash_start,
                escape_end.clone(),
-                token::Variant::TextEscape(token::variant::TextEscape(value)),
+                token::Variant::text_escape(value),
            );
            self.output.push(token);
            escape_end
        }
    }

-    /// Read the lines of a text literal.
-    fn text_lines(&mut self, indent: VisibleOffset, is_interpolated: bool) {
-        self.text_content(
-            None,
-            None,
-            is_interpolated,
-            State::MultilineText { indent },
-            Some(indent),
-        );
-    }
-
    fn mark(&mut self) -> (Bytes, Offset<'s>) {
        let start = self.current_offset;
        let left_offset_start = start - self.last_spaces_offset;
@ -1078,6 +1071,29 @@ impl<'s> Lexer<'s> {
    }
 }

+#[derive(PartialEq, Eq)]
+enum TextEndedAt {
+    Splice,
+    End,
+}
+
+#[derive(PartialEq, Eq, Copy, Clone)]
+enum TextType {
+    Raw,
+    Interpolated,
+    Documentation,
+}
+
+impl TextType {
+    fn is_interpolated(self) -> bool {
+        self == TextType::Interpolated
+    }
+
+    fn expects_initial_newline(self) -> bool {
+        self != TextType::Documentation
+    }
+}
+
 /// Move whitespace characters from the end of `left` to the beginning of `right` until the visible
 /// length of `left` is not longer than `target`.
 #[allow(unsafe_code)]
@ -1137,14 +1153,16 @@ impl<'s> Lexer<'s> {
    }

    fn comment(&mut self) {
-        if let Some(current) = self.current_char {
-            if current == '#' {
-                self.submit_line_as(token::Variant::newline());
-                let initial_ident = self.current_block_indent;
-                let check_indent = |this: &mut Self| this.current_block_indent > initial_ident;
-                while self.run_and_check_if_progressed(|t| t.newline()) && check_indent(self) {
-                    self.submit_line_as(token::Variant::newline());
-                }
+        if let Some('#') = self.current_char {
+            let indent = self.current_block_indent;
+            let start = self.mark();
+            self.take_next();
+            if let Some('#') = self.current_char {
+                self.multiline_text(start, indent, TextType::Documentation);
+            } else {
+                self.take_rest_of_line();
+                let end_line = self.mark();
+                self.output.push(self.make_token(start, end_line, token::Variant::newline()));
            }
        }
    }
@ -1180,23 +1198,32 @@ impl<'s> Lexer<'s> {
                self.submit_token(block_start);
                self.start_block(block_indent);
            }
-            while block_indent < self.current_block_indent {
-                let previous_indent = self.block_indent_stack.last().copied().unwrap_or_default();
-                if block_indent > previous_indent {
-                    // The new line indent is smaller than current block but bigger than the
-                    // previous one. We are treating the line as belonging to the
-                    // block. The warning should be reported by parser.
-                    break;
-                }
-                self.end_block();
-                let block_end = self.marker_token(token::Variant::block_end());
-                self.submit_token(block_end);
-            }
+            self.end_blocks(block_indent);
            self.submit_token(token.with_variant(token::Variant::newline()));
            newlines.drain(..).for_each(|token| self.submit_token(token));
            self.token_storage.set_from(newlines);
        }
    }
+
+    fn end_blocks(&mut self, block_indent: VisibleOffset) {
+        while block_indent < self.current_block_indent {
+            let Some(previous_indent) = self.block_indent_stack.last().copied() else {
+                // If the file starts at indent > 0, we treat that as the root indent level
+                // instead of creating a sub-block. If indent then decreases below that level,
+                // there's no block to exit.
+                break
+            };
+            if block_indent > previous_indent {
+                // The new line indent is smaller than current block but bigger than the
+                // previous one. We are treating the line as belonging to the
+                // block. The warning should be reported by parser.
+                break;
+            }
+            self.end_block();
+            let block_end = self.marker_token(token::Variant::block_end());
+            self.submit_token(block_end);
+        }
+    }
 }


@ -1230,6 +1257,7 @@ impl<'s> Lexer<'s> {
    /// as start and end tokens).
    pub fn run_flat(mut self) -> ParseResult<Vec<Token<'s>>> {
        self.spaces_after_lexeme();
+        self.current_block_indent = self.last_spaces_visible_offset;
        let mut any_parser_matched = true;
        while any_parser_matched {
            any_parser_matched = false;
@ -1303,11 +1331,6 @@ pub fn build_block_hierarchy(tokens: Vec<Token<'_>>) -> Vec<Item<'_>> {
 // === Tests ===
 // =============

-/// Lexer main function used for ad-hoc testing during development.
-pub fn main() {
-    println!("{:#?}", run_flat("\n  foo\n  bar"));
-}
-
 /// Test utils for fast mock tokens creation.
 pub mod test {
    use super::*;
--- a/lib/rust/parser/src/macros/built_in.rs
+++ b/lib/rust/parser/src/macros/built_in.rs
@ -475,7 +475,7 @@ fn splice_body(segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
    let expression = segment.result.tokens();
    let expression = operator::resolve_operator_precedence_if_non_empty(expression);
    let splice = syntax::tree::TextElement::Splice { open, expression, close };
-    syntax::Tree::text_literal(default(), vec![splice], default(), default())
+    syntax::Tree::text_literal(default(), default(), vec![splice], default(), default(), default())
 }

 fn into_open_symbol(token: syntax::token::Token) -> syntax::token::OpenSymbol {
--- a/lib/rust/parser/src/main.rs
+++ b/lib/rust/parser/src/main.rs
@ -57,7 +57,18 @@ fn check_file(path: &str, mut code: &str) {
    let errors = RefCell::new(vec![]);
    ast.map(|tree| {
        if let enso_parser::syntax::tree::Variant::Invalid(err) = &*tree.variant {
-            errors.borrow_mut().push((err.clone(), tree.span.clone()));
+            let error = format!("{}: {}", err.error.message, tree.code());
+            errors.borrow_mut().push((error, tree.span.clone()));
+        } else if let enso_parser::syntax::tree::Variant::TextLiteral(text) = &*tree.variant {
+            for element in &text.elements {
+                if let enso_parser::syntax::tree::TextElement::Escape { token } = element {
+                    if token.variant.value.is_none() {
+                        let escape = token.code.to_string();
+                        let error = format!("Invalid escape sequence: {escape}");
+                        errors.borrow_mut().push((error, tree.span.clone()));
+                    }
+                }
+            }
        }
    });
    for (error, span) in &*errors.borrow() {
@ -77,9 +88,9 @@ fn check_file(path: &str, mut code: &str) {
                    char += 1;
                }
            }
-            eprintln!("{path}:{line}:{char}: {}", &error.error.message);
+            eprintln!("{path}:{line}:{char}: {}", &error);
        } else {
-            eprintln!("{path}:?:?: {}", &error.error.message);
+            eprintln!("{path}:?:?: {}", &error);
        };
    }
    for (parsed, original) in ast.code().lines().zip(code.lines()) {
--- a/lib/rust/parser/src/syntax/operator.rs
+++ b/lib/rust/parser/src/syntax/operator.rs
@ -146,27 +146,35 @@ struct ExpressionBuilder<'s> {
 impl<'s> ExpressionBuilder<'s> {
    /// Extend the expression with an operand.
    pub fn operand(&mut self, mut operand: Operand<syntax::Tree<'s>>) {
-        if self.prev_type.replace(ItemType::Ast) == Some(ItemType::Ast) {
-            if let syntax::tree::Variant::OprApp(
-                    syntax::tree::OprApp { lhs: Some(_), opr: Ok(opr), rhs: None })
-                = &*self.output.last().unwrap().value.variant
-                    && opr.properties.associativity() == token::Associativity::Right
-                    && opr.left_offset.is_empty() {
-                let syntax::Tree { span, variant: box syntax::tree::Variant::OprApp(
-                        syntax::tree::OprApp { lhs: Some(mut lhs), opr: Ok(operator), rhs: None }) }
-                    = self.output.pop().unwrap().value
-                else { unreachable!() };
-                lhs.span.left_offset += span.left_offset;
-                let precedence = operator.properties.binary_infix_precedence().unwrap();
-                let associativity = operator.properties.associativity();
-                let opr = Arity::Unary(Unary::LeftCurriedBinary { lhs, operator });
-                self.operator_stack.push(Operator { precedence, associativity, opr });
-            } else {
-                operand =
-                    self.output.pop().unwrap().map(|lhs| syntax::tree::apply(lhs, operand.into()));
-            }
+        if self.prev_type == Some(ItemType::Ast) {
+            // Application is a token-less operator implied by juxtaposition of operands.
+            let precedence = token::Precedence::application();
+            let associativity = token::Associativity::Left;
+            let arity = Arity::Binary {
+                tokens:                  default(),
+                lhs_section_termination: default(),
+            };
+            self.push_operator(precedence, associativity, arity);
+        }
+        if let box syntax::tree::Variant::OprApp(
+            syntax::tree::OprApp { lhs, opr: Ok(operator), rhs: None })
+        = &mut operand.value.variant
+            && lhs.is_some()
+            && operator.properties.associativity() == token::Associativity::Right
+            && operator.left_offset.is_empty() {
+            // Right-associative operators become unary-prefix operators when left-curried.
+            // E.g. `f = x-> y-> z` contains lambdas, not partially-applied arrow operators.
+            let mut lhs = lhs.take().unwrap();
+            lhs.span.left_offset += operand.value.span.left_offset;
+            let associativity = operator.properties.associativity();
+            let precedence = operator.properties.binary_infix_precedence().unwrap();
+            let operator = operator.clone();
+            let arity = Arity::Unary(Unary::LeftCurriedBinary { lhs, operator });
+            self.push_operator(precedence, associativity, arity);
+            return;
        }
        self.output.push(operand);
+        self.prev_type = Some(ItemType::Ast);
    }

    /// Extend the expression with an operator.
--- a/lib/rust/parser/src/syntax/token.rs
+++ b/lib/rust/parser/src/syntax/token.rs
@ -290,6 +290,7 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
            #[reflect(as = "char")]
            pub value: Option<char>,
        },
+        TextInitialNewline,
        Invalid,
    }
 }}}
@ -350,11 +351,14 @@ impl OperatorProperties {

    /// Return a copy of this operator, with the given binary infix precedence.
    pub fn with_binary_infix_precedence(self, value: usize) -> Self {
-        Self { binary_infix_precedence: Some(Precedence { value }), ..self }
+        let precedence = Precedence { value };
+        debug_assert!(precedence > Precedence::min());
+        Self { binary_infix_precedence: Some(precedence), ..self }
    }

    /// Return a copy of this operator, with unary prefix parsing allowed.
    pub fn with_unary_prefix_mode(self, precedence: Precedence) -> Self {
+        debug_assert!(precedence > Precedence::min());
        Self { unary_prefix_precedence: Some(precedence), ..self }
    }

@ -489,19 +493,29 @@ impl OperatorProperties {
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Reflect, Deserialize, PartialOrd, Ord)]
 pub struct Precedence {
    /// A numeric value determining precedence order.
-    pub value: usize,
+    value: usize,
 }

 impl Precedence {
-    /// Return a precedence that is not higher than any other precedence.
+    /// Return a precedence that is lower than the precedence of any operator.
    pub fn min() -> Self {
        Precedence { value: 0 }
    }

+    /// Return the precedence for any operator.
+    pub fn min_valid() -> Self {
+        Precedence { value: 1 }
+    }
+
    /// Return a precedence that is not lower than any other precedence.
    pub fn max() -> Self {
        Precedence { value: 100 }
    }
+
+    /// Return the precedence of application.
+    pub fn application() -> Self {
+        Precedence { value: 80 }
+    }
 }

 /// Associativity (left or right).
--- a/lib/rust/parser/src/syntax/tree.rs
+++ b/lib/rust/parser/src/syntax/tree.rs
@ -132,10 +132,17 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
        },
        TextLiteral {
            pub open:     Option<token::TextStart<'s>>,
+            /// If there is no text on the first line of a multi-line literal, the initial newline
+            /// is non-semantic and included here. If there is text on the line with the opening
+            /// quote, this will be empty and the first newline, if any, will be in a text section.
+            pub newline:  Option<token::Newline<'s>>,
            pub elements: Vec<TextElement<'s>>,
            pub close:    Option<token::TextEnd<'s>>,
            #[serde(skip)]
            #[reflect(skip)]
+            pub closed:   bool,
+            #[serde(skip)]
+            #[reflect(skip)]
            pub trim:     VisibleOffset,
        },
        /// A simple application, like `print "hello"`.
@ -327,6 +334,19 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
            pub newlines:   Vec<token::Newline<'s>>,
            pub expression: Option<Tree<'s>>,
        },
+        /// An expression preceded by a doc comment.
+        Documented {
+            pub open:       token::TextStart<'s>,
+            /// The documentation text.
+            pub elements:   Vec<TextElement<'s>>,
+            #[serde(skip)]
+            #[reflect(skip)]
+            pub trim:       VisibleOffset,
+            /// Empty lines between the comment and the item.
+            pub newlines:   Vec<token::Newline<'s>>,
+            /// The item being documented.
+            pub expression: Option<Tree<'s>>,
+        },
    }
 }};}

@ -736,8 +756,17 @@ impl<'s> span::Builder<'s> for OperatorDelimitedTree<'s> {
 /// application has special semantics.
 pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
    match (&mut *func.variant, &mut *arg.variant) {
-        (Variant::TextLiteral(lhs), Variant::TextLiteral(rhs)) if lhs.close.is_none() => {
-            join_text_literals(lhs, rhs.clone(), mem::take(&mut arg.span));
+        (Variant::TextLiteral(lhs), Variant::TextLiteral(rhs)) if !lhs.closed => {
+            join_text_literals(lhs, rhs, mem::take(&mut arg.span));
+            if lhs.open.is_some() && lhs.closed {
+                trim_text(lhs.trim, &mut lhs.elements);
+            }
+            if let TextLiteral { open: Some(open), newline: None, elements, closed: true, close: None, trim } = lhs && open.code.starts_with('#') {
+                let mut open = open.clone();
+                open.left_offset += func.span.left_offset;
+                let elements = mem::take(elements);
+                return Tree::documented(open, elements, *trim, default(), default());
+            }
            func
        }
        (Variant::Number(func_ @ Number { base: _, integer: None, fractional_digits: None }),
@ -804,8 +833,8 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
 }

 fn join_text_literals<'s>(
-    lhs: &'_ mut TextLiteral<'s>,
-    mut rhs: TextLiteral<'s>,
+    lhs: &mut TextLiteral<'s>,
+    rhs: &mut TextLiteral<'s>,
    rhs_span: Span<'s>,
 ) {
    if rhs.trim != VisibleOffset(0) && (lhs.trim == VisibleOffset(0) || rhs.trim < lhs.trim) {
@ -817,28 +846,32 @@ fn join_text_literals<'s>(
        Some(TextElement::Splice { open, .. }) => open.left_offset += rhs_span.left_offset,
        None => (),
    }
+    if let Some(newline) = rhs.newline.take() {
+        lhs.newline = newline.into();
+    }
    lhs.elements.append(&mut rhs.elements);
    lhs.close = rhs.close.take();
-    if lhs.open.is_some() {
-        let trim = lhs.trim;
-        let mut remaining = lhs.elements.len();
-        let mut carried_offset = Offset::default();
-        lhs.elements.retain_mut(|e| {
-            remaining -= 1;
-            let (offset, code) = match e {
-                TextElement::Section { text } => (&mut text.left_offset, &mut text.code),
-                TextElement::Escape { token } => (&mut token.left_offset, &mut token.code),
-                TextElement::Splice { open, .. } => (&mut open.left_offset, &mut open.code),
-            };
-            *offset += mem::take(&mut carried_offset);
-            crate::lexer::untrim(trim, offset, code);
-            if remaining != 0 && code.is_empty() {
-                carried_offset = mem::take(offset);
-                return false;
-            }
-            true
-        });
-    }
+    lhs.closed = rhs.closed;
+}
+
+fn trim_text(trim: VisibleOffset, elements: &mut Vec<TextElement>) {
+    let mut remaining = elements.len();
+    let mut carried_offset = Offset::default();
+    elements.retain_mut(|e| {
+        remaining -= 1;
+        let (offset, code) = match e {
+            TextElement::Section { text } => (&mut text.left_offset, &mut text.code),
+            TextElement::Escape { token } => (&mut token.left_offset, &mut token.code),
+            TextElement::Splice { open, .. } => (&mut open.left_offset, &mut open.code),
+        };
+        *offset += mem::take(&mut carried_offset);
+        crate::lexer::untrim(trim, offset, code);
+        if remaining != 0 && code.is_empty() {
+            carried_offset = mem::take(offset);
+            return false;
+        }
+        true
+    });
 }

 /// Join two nodes with an operator, in a way appropriate for their types.
@ -920,20 +953,24 @@ impl<'s> From<Token<'s>> for Tree<'s> {
            token::Variant::NumberBase(base) =>
                Tree::number(Some(token.with_variant(base)), None, None),
            token::Variant::TextStart(open) =>
-                Tree::text_literal(Some(token.with_variant(open)), default(), default(), default()),
+                Tree::text_literal(Some(token.with_variant(open)), default(), default(), default(), default(), default()),
            token::Variant::TextSection(section) => {
                let trim = token.left_offset.visible;
                let section = TextElement::Section { text: token.with_variant(section) };
-                Tree::text_literal(default(), vec![section], default(), trim)
+                Tree::text_literal(default(), default(), vec![section], default(), default(), trim)
            }
            token::Variant::TextEscape(escape) => {
                let trim = token.left_offset.visible;
                let token = token.with_variant(escape);
                let section = TextElement::Escape { token };
-                Tree::text_literal(default(), vec![section], default(), trim)
+                Tree::text_literal(default(), default(), vec![section], default(), default(), trim)
            }
+            token::Variant::TextEnd(_) if token.code.is_empty() =>
+                Tree::text_literal(default(), default(), default(), default(), true, default()),
            token::Variant::TextEnd(close) =>
-                Tree::text_literal(default(), default(), Some(token.with_variant(close)), default()),
+                Tree::text_literal(default(), default(), default(), Some(token.with_variant(close)), true, default()),
+            token::Variant::TextInitialNewline(_) =>
+                Tree::text_literal(default(), Some(token::newline(token.left_offset, token.code)), default(), default(), default(), default()),
            token::Variant::Wildcard(wildcard) => Tree::wildcard(token.with_variant(wildcard), default()),
            token::Variant::AutoScope(t) => Tree::auto_scope(token.with_variant(t)),
            token::Variant::OpenSymbol(s) =>
@ -993,6 +1030,7 @@ pub fn recurse_left_mut_while<'s>(
            | Variant::Annotated(_)
            | Variant::OperatorFunction(_)
            | Variant::OperatorTypeSignature(_)
+            | Variant::Documented(_)
            | Variant::Tuple(_) => break,
            // Optional LHS.
            Variant::ArgumentBlockApplication(ArgumentBlockApplication { lhs, .. })
@ -1214,6 +1252,17 @@ impl<'s> span::Builder<'s> for u32 {
    }
 }

+impl<'s, 'a> TreeVisitable<'s, 'a> for bool {}
+impl<'s, 'a> TreeVisitableMut<'s, 'a> for bool {}
+impl<'a, 's> SpanVisitable<'s, 'a> for bool {}
+impl<'a, 's> SpanVisitableMut<'s, 'a> for bool {}
+impl<'a, 's> ItemVisitable<'s, 'a> for bool {}
+impl<'s> span::Builder<'s> for bool {
+    fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
+        span
+    }
+}
+

 // === TreeVisitable special cases ===

--- a/lib/rust/parser/src/syntax/tree/block.rs
+++ b/lib/rust/parser/src/syntax/tree/block.rs
@ -57,11 +57,15 @@ pub fn body_from_lines<'s>(lines: impl IntoIterator<Item = Line<'s>>) -> Tree<'s
    while let Some(line) = lines.next() {
        let mut statement = line.map_expression(expression_to_statement);
        if let Some(Tree {
-            variant: box Variant::Annotated(Annotated { newlines, expression, .. }),
+            variant:
+                box Variant::Annotated(Annotated { newlines, expression, .. })
+                | box Variant::Documented(Documented { newlines, expression, .. }),
            ..
        }) = &mut statement.expression
        {
-            while expression.is_none() && let Some(line) = lines.next() {
+            while expression.is_none() &&
+            let Some(line) = lines.next()
+            {
                let statement = line.map_expression(expression_to_statement);
                newlines.push(statement.newline);
                *expression = statement.expression;