Parser: Named and default arguments; Text interpolation; Escape sequences (#3709)

* named and default arguments

* text interpolation and escapes

* work around a limitation of Java
This commit is contained in:
Kaz Wesley 2022-09-14 22:32:28 -07:00 committed by GitHub
parent 605bd08e8d
commit d8f274158a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 702 additions and 183 deletions

View File

@ -34,6 +34,8 @@
//! assert_eq!(s_expr, Value::cons(field_expr, Value::Null));
//! ```
// === Features ===
#![feature(let_chains)]
// === Standard Linter Configuration ===
#![deny(non_ascii_idents)]
#![warn(unsafe_code)]
@ -172,6 +174,10 @@ impl<'g> ToSExpr<'g> {
if self.skip.contains(&field.type_) {
return None;
}
if let Data::Primitive(Primitive::Option(t0)) = &self.graph[field.type_].data
&& self.skip.contains(t0) {
return None;
}
Some(if field.name.is_empty() {
value
} else {

View File

@ -18,18 +18,10 @@
fn main() {
let cases = enso_parser_generate_java::generate_testcases();
let fmt_cases = |cases: &[Vec<u8>]| {
let cases: Vec<_> = cases
.iter()
.map(|case| {
let case: Vec<_> = case.iter().map(|byte| (*byte as i8).to_string()).collect();
format!("{{{}}}", case.join(", "))
})
.collect();
cases.join(", ")
let fmt_case = |case: &[u8]| {
let case: Vec<_> = case.iter().map(|byte| (*byte as i8).to_string()).collect();
format!("{{{}}}", case.join(", "))
};
let accept = fmt_cases(&cases.accept);
let reject = fmt_cases(&cases.reject);
let package = enso_parser_generate_java::PACKAGE;
let serialization = enso_parser_generate_java::SERIALIZATION_SUPPORT;
println!("package {package};");
@ -40,12 +32,33 @@ fn main() {
println!("import java.nio.ByteOrder;");
println!();
println!("class GeneratedFormatTests {{");
println!(" private static java.util.Vector<byte[]> accept;");
println!(" private static java.util.Vector<byte[]> reject;");
for (i, case) in cases.accept.iter().enumerate() {
println!(" private static byte[] accept{i}() {{");
println!(" return new byte[] {};", fmt_case(case));
println!(" }}");
}
for (i, case) in cases.reject.iter().enumerate() {
println!(" private static byte[] reject{i}() {{");
println!(" return new byte[] {};", fmt_case(case));
println!(" }}");
}
println!(" static {{");
println!(" accept = new java.util.Vector<byte[]>();");
for (i, _) in cases.accept.iter().enumerate() {
println!(" accept.add(accept{i}());");
}
println!(" reject = new java.util.Vector<byte[]>();");
for (i, _) in cases.reject.iter().enumerate() {
println!(" reject.add(reject{i}());");
}
println!(" }}");
println!();
println!(" public static void main(String[] args) {{");
println!(" byte[][] accept = {{{accept}}};");
println!(" byte[][] reject = {{{reject}}};");
println!(" int result = 0;");
println!(" for (int i = 0; i < accept.length; i++) {{");
println!(" ByteBuffer buffer = ByteBuffer.wrap(accept[i]);");
println!(" for (byte[] testCase : accept) {{");
println!(" ByteBuffer buffer = ByteBuffer.wrap(testCase);");
println!(" buffer.order(ByteOrder.LITTLE_ENDIAN);");
println!(" CharSequence context = \"\";");
println!(" Message message = new Message(buffer, context, 0, 0);");
@ -58,8 +71,8 @@ fn main() {
println!(" result = 1;");
println!(" }}");
println!(" }}");
println!(" for (int i = 0; i < reject.length; i++) {{");
println!(" ByteBuffer buffer = ByteBuffer.wrap(reject[i]);");
println!(" for (byte[] testCase : reject) {{");
println!(" ByteBuffer buffer = ByteBuffer.wrap(testCase);");
println!(" buffer.order(ByteOrder.LITTLE_ENDIAN);");
println!(" CharSequence context = \"\";");
println!(" Message message = new Message(buffer, context, 0, 0);");

View File

@ -90,13 +90,26 @@ pub struct Lexer<'s> {
#[derive(Debug, Default)]
#[allow(missing_docs)]
pub struct LexerState {
pub current_char: Option<char>,
pub current_offset: Bytes,
pub last_spaces_offset: Bytes,
pub current_char: Option<char>,
pub current_offset: Bytes,
pub last_spaces_offset: Bytes,
pub last_spaces_visible_offset: VisibleOffset,
pub current_block_indent: VisibleOffset,
pub block_indent_stack: Vec<VisibleOffset>,
pub internal_error: Option<String>,
pub current_block_indent: VisibleOffset,
pub block_indent_stack: Vec<VisibleOffset>,
pub internal_error: Option<String>,
pub stack: Vec<State>,
}
/// Suspended states.
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum State {
/// Reading a single-line text literal.
InlineText,
/// Reading a multi-line text literal.
MultilineText {
/// Indentation level of the quote symbol introducing the block.
indent: VisibleOffset,
},
}
impl<'s> Lexer<'s> {
@ -463,6 +476,7 @@ pub struct IdentInfo {
starts_with_underscore: bool,
lift_level: usize,
starts_with_uppercase: bool,
is_default: bool,
}
impl IdentInfo {
@ -473,7 +487,8 @@ impl IdentInfo {
let lift_level = repr.chars().rev().take_while(|t| *t == '\'').count();
let starts_with_uppercase =
repr.chars().next().map(|c| c.is_uppercase()).unwrap_or_default();
Self { starts_with_underscore, lift_level, starts_with_uppercase }
let is_default = repr == "default";
Self { starts_with_underscore, lift_level, starts_with_uppercase, is_default }
}
}
@ -499,6 +514,7 @@ impl token::Variant {
info.starts_with_underscore,
info.lift_level,
info.starts_with_uppercase,
info.is_default,
)
}
@ -512,7 +528,7 @@ impl token::Variant {
} else {
let is_free = info.starts_with_underscore;
let is_type = info.starts_with_uppercase;
token::Variant::ident(is_free, info.lift_level, is_type)
token::Variant::ident(is_free, info.lift_level, is_type, info.is_default)
}
}
}
@ -734,7 +750,26 @@ impl<'s> Lexer<'s> {
impl<'s> Lexer<'s> {
/// Read a text literal.
fn text(&mut self) {
// TODO: Interpolation within "'" quotes (#182496932); for now, treat them as raw.
if self.current_char == Some('`') {
match self.stack.last().copied() {
Some(State::InlineText) | Some(State::MultilineText { .. }) => {
let splice_quote_start = self.mark();
self.take_next();
let splice_quote_end = self.mark();
let token = self.make_token(
splice_quote_start,
splice_quote_end,
token::Variant::Symbol(token::variant::Symbol()),
);
self.output.push(token);
match self.stack.pop().unwrap() {
State::InlineText => self.inline_quote('\''),
State::MultilineText { indent } => self.text_lines(indent, true),
}
}
None => return,
}
}
let quote_char = match self.current_char {
Some(char @ ('"' | '\'')) => char,
_ => return,
@ -759,14 +794,11 @@ impl<'s> Lexer<'s> {
let token = self.make_token(open_quote_start, text_start.clone(),
token::Variant::TextStart(token::variant::TextStart()));
self.output.push(token);
self.take_rest_of_line();
let next_line_start = self.mark();
let token = self.make_token(text_start, next_line_start,
token::Variant::TextSection(token::variant::TextSection()));
if !token.code.is_empty() {
self.output.push(token);
let interpolate = quote_char == '\'';
if self.text_content(None, interpolate, State::MultilineText { indent }) {
return;
}
self.text_lines(indent);
self.text_lines(indent, interpolate);
} else {
// Exactly two quote characters: Open and shut case.
let close_quote_end = self.mark();
@ -779,49 +811,179 @@ impl<'s> Lexer<'s> {
}
} else {
// One quote followed by non-quote character: Inline quote.
let mut text_start = self.mark();
let token = self.make_token(open_quote_start, text_start.clone(),
let open_quote_end = self.mark();
let token = self.make_token(open_quote_start, open_quote_end,
token::Variant::TextStart(token::variant::TextStart()));
self.output.push(token);
while let Some(char) = self.current_char {
if char == quote_char || is_newline_char(char) {
break;
}
if char == '\\' {
let escape_start = self.mark();
self.take_next();
if let Some(char) = self.current_char && char == quote_char {
let token = self.make_token(text_start, escape_start.clone(),
token::Variant::TextSection(token::variant::TextSection()));
self.output.push(token);
let escape_end = self.mark();
let token = self.make_token(escape_start, escape_end.clone(),
token::Variant::TextEscape(token::variant::TextEscape()));
self.output.push(token);
text_start = escape_end;
}
}
self.take_next();
}
let close_quote_start = self.mark();
let token = self.make_token(text_start, close_quote_start.clone(),
token::Variant::TextSection(token::variant::TextSection()));
if !token.code.is_empty() {
self.output.push(token);
}
if let Some(char) = self.current_char && char == quote_char {
self.take_next();
let close_quote_end = self.mark();
let token = self.make_token(close_quote_start, close_quote_end,
token::Variant::TextEnd(token::variant::TextEnd()));
self.output.push(token);
}
self.inline_quote(quote_char);
}
self.spaces_after_lexeme();
}
/// Read the lines of a text literal, after the initial line introducing it.
fn text_lines(&mut self, indent: VisibleOffset) {
fn inline_quote(&mut self, quote_char: char) {
if self.text_content(Some(quote_char), quote_char == '\'', State::InlineText) {
return;
}
if let Some(char) = self.current_char && char == quote_char {
let text_end = self.mark();
self.take_next();
let close_quote_end = self.mark();
let token = self.make_token(text_end, close_quote_end,
token::Variant::TextEnd(token::variant::TextEnd()));
self.output.push(token);
}
}
fn text_content(
&mut self,
closing_char: Option<char>,
interpolate: bool,
state: State,
) -> bool {
let mut text_start = self.mark();
while let Some(char) = self.current_char {
if is_newline_char(char) || closing_char == Some(char) {
break;
}
if char == '\\' {
let backslash_start = self.mark();
self.take_next();
if let Some(char) = self.current_char && (interpolate || closing_char == Some(char)) {
self.text_escape(char, interpolate, backslash_start, &mut text_start);
continue;
}
}
if interpolate && char == '`' {
let splice_quote_start = self.mark();
let token = self.make_token(
text_start,
splice_quote_start.clone(),
token::Variant::TextSection(token::variant::TextSection()),
);
if !token.code.is_empty() {
self.output.push(token);
}
self.take_next();
let splice_quote_end = self.mark();
let token = self.make_token(
splice_quote_start,
splice_quote_end.clone(),
token::Variant::Symbol(token::variant::Symbol()),
);
self.output.push(token);
self.spaces_after_lexeme();
self.stack.push(state);
return true;
}
self.take_next();
}
let text_end = self.mark();
let token = self.make_token(
text_start,
text_end,
token::Variant::TextSection(token::variant::TextSection()),
);
if !(token.code.is_empty() && token.left_offset.code.is_empty()) {
self.output.push(token);
}
false
}
fn text_escape(
&mut self,
char: char,
interpolate: bool,
backslash_start: (Bytes, Offset<'s>),
text_start: &'_ mut (Bytes, Offset<'s>),
) {
let token = self.make_token(
text_start.clone(),
backslash_start.clone(),
token::Variant::TextSection(token::variant::TextSection()),
);
if !token.code.is_empty() {
self.output.push(token);
}
if interpolate && char == 'x' || char == 'u' || char == 'U' {
self.take_next();
let leader_end = self.mark();
let token = self.make_token(
backslash_start,
leader_end.clone(),
token::Variant::TextEscapeLeader(token::variant::TextEscapeLeader()),
);
self.output.push(token);
let (mut expect_len, accepts_delimiter) = match char {
'x' => (2, false),
'u' => (4, true),
'U' => (8, false),
_ => unreachable!(),
};
let delimited = accepts_delimiter && self.current_char == Some('{');
let mut sequence_start = leader_end.clone();
if delimited {
self.take_next();
sequence_start = self.mark();
let token = self.make_token(
leader_end,
sequence_start.clone(),
token::Variant::TextEscapeSequenceStart(
token::variant::TextEscapeSequenceStart(),
),
);
self.output.push(token);
expect_len = 6;
}
for _ in 0..expect_len {
if let Some(c) = self.current_char && is_hexadecimal_digit(c) {
self.take_next();
} else {
break;
}
}
let sequence_end = self.mark();
let token = self.make_token(
sequence_start,
sequence_end.clone(),
token::Variant::TextEscapeHexDigits(token::variant::TextEscapeHexDigits()),
);
self.output.push(token);
if delimited && self.current_char == Some('}') {
self.take_next();
let close_end = self.mark();
let token = self.make_token(
sequence_end,
close_end.clone(),
token::Variant::TextEscapeSequenceEnd(token::variant::TextEscapeSequenceEnd()),
);
self.output.push(token);
*text_start = close_end;
} else {
*text_start = sequence_end;
}
return;
}
let backslash_end = self.mark();
let token = self.make_token(
backslash_start,
backslash_end.clone(),
token::Variant::TextEscapeSymbol(token::variant::TextEscapeSymbol()),
);
self.output.push(token);
self.take_next();
let escaped_end = self.mark();
let token = self.make_token(
backslash_end,
escaped_end.clone(),
token::Variant::TextEscapeChar(token::variant::TextEscapeChar()),
);
self.output.push(token);
*text_start = escaped_end;
self.take_next();
}
/// Read the lines of a text literal.
fn text_lines(&mut self, indent: VisibleOffset, is_interpolated: bool) {
while self.current_char.is_some() {
let start = self.mark();
// Consume the newline and any spaces.
@ -829,46 +991,43 @@ impl<'s> Lexer<'s> {
self.take_1('\n');
}
let before_space = self.mark();
self.spaces_after_lexeme();
let after_space = self.mark();
// Check indent, unless this is an empty line.
if let Some(char) = self.current_char {
if self.last_spaces_visible_offset <= indent && !is_newline_char(char) {
let token = self.make_token(
start,
before_space,
token::Variant::Newline(token::variant::Newline()),
);
self.output.push(token);
return;
}
};
// Output the newline as a text section.
self.last_spaces_visible_offset = VisibleOffset(0);
self.last_spaces_offset = Bytes(0);
let token = self.make_token(
start,
before_space,
token::Variant::TextSection(token::variant::TextSection()),
);
self.output.push(token);
if start != before_space {
// Create a text section for the newline.
let newline = self.make_token(
start.clone(),
before_space.clone(),
token::Variant::TextSection(token::variant::TextSection()),
);
self.spaces_after_lexeme();
// Check indent, unless this is an empty line.
if let Some(char) = self.current_char {
if self.last_spaces_visible_offset <= indent && !is_newline_char(char) {
let token = self.make_token(
start,
before_space,
token::Variant::Newline(token::variant::Newline()),
);
self.output.push(token);
self.spaces_after_lexeme();
return;
}
};
self.output.push(newline);
}
// Output the line as a text section.
self.take_rest_of_line();
let next_line_start = self.mark();
let token = self.make_token(
after_space,
next_line_start,
token::Variant::TextSection(token::variant::TextSection()),
);
self.output.push(token);
if self.text_content(None, is_interpolated, State::MultilineText { indent }) {
break;
}
}
}
fn mark(&self) -> (Bytes, Offset<'s>) {
fn mark(&mut self) -> (Bytes, Offset<'s>) {
let start = self.current_offset;
let left_offset_start = start - self.last_spaces_offset;
let offset_code = self.input.slice(left_offset_start..start);
let visible_offset = self.last_spaces_visible_offset;
self.last_spaces_visible_offset = VisibleOffset(0);
self.last_spaces_offset = Bytes(0);
(start, Offset(visible_offset, offset_code))
}
@ -1075,7 +1234,7 @@ pub mod test {
let is_free = code.starts_with('_');
let lift_level = code.chars().rev().take_while(|t| *t == '\'').count();
let is_uppercase = code.chars().next().map(|c| c.is_uppercase()).unwrap_or_default();
token::ident_(left_offset, code, is_free, lift_level, is_uppercase)
token::ident_(left_offset, code, is_free, lift_level, is_uppercase, false)
}
/// Constructor.

View File

@ -235,13 +235,7 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
_ => return tree,
};
if let OprApp { lhs: Some(lhs), opr: Ok(opr), rhs } = opr_app && opr.properties.is_assignment() {
let mut args = vec![];
let mut lhs_ = lhs.clone();
while let Tree { variant: box Variant::App(App { func, arg }), .. } = lhs_ {
lhs_ = func.clone();
args.push(arg.clone());
}
args.reverse();
let (mut lhs_, args) = collect_arguments(lhs.clone());
if let Some(rhs) = rhs {
if let Variant::Ident(ident) = &*lhs_.variant && ident.token.variant.is_type {
// If the LHS is a type, this is a (destructuring) assignment.
@ -271,6 +265,62 @@ fn expression_to_statement(mut tree: syntax::Tree<'_>) -> syntax::Tree<'_> {
tree
}
fn collect_arguments(
mut lhs_: syntax::Tree,
) -> (syntax::Tree, Vec<syntax::tree::ArgumentDefinition>) {
use syntax::tree::*;
let mut args = vec![];
loop {
match lhs_.variant {
box Variant::App(App { func, arg }) => {
lhs_ = func.clone();
match &arg.variant {
box Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) })
if opr.properties.is_assignment() => {
let equals = opr.clone();
let open = default();
let close = default();
let mut pattern = lhs.clone();
pattern.span.left_offset += arg.span.left_offset.clone();
let default = Some(ArgumentDefault { equals, expression: rhs.clone() });
args.push(ArgumentDefinition { open, pattern, default, close });
}
box Variant::Group(Group { open, body: Some(body), close }) if let box
Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }) = &body.variant
&& opr.properties.is_assignment() => {
let equals = opr.clone();
let mut open = open.clone();
open.left_offset += arg.span.left_offset.clone();
let open = Some(open);
let close = Some(close.clone());
let default = Some(ArgumentDefault { equals, expression: rhs.clone() });
let pattern = lhs.clone();
args.push(ArgumentDefinition { open, pattern, default, close });
}
_ => {
let open = default();
let close = default();
let default = default();
args.push(ArgumentDefinition { open, pattern: arg.clone(), default, close });
}
}
}
box Variant::NamedApp(NamedApp { func, open, name, equals, arg, close }) => {
lhs_ = func.clone();
let open = open.clone();
let close = close.clone();
let equals = equals.clone();
let pattern = Tree::ident(name);
let default = Some(ArgumentDefault { equals, expression: arg.clone() });
args.push(ArgumentDefinition { open, pattern, default, close });
}
_ => break,
}
}
args.reverse();
(lhs_, args)
}
/// Return whether the expression is a body block.
fn is_body_block(expression: &syntax::tree::Tree<'_>) -> bool {
matches!(&*expression.variant, syntax::tree::Variant::BodyBlock { .. })

View File

@ -24,6 +24,7 @@ pub fn all() -> resolver::SegmentMap<'static> {
macro_map.register(case());
macro_map.register(array());
macro_map.register(tuple());
macro_map.register(splice());
macro_map
}
@ -132,17 +133,10 @@ pub fn group<'s>() -> Definition<'s> {
fn group_body(segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
use operator::resolve_operator_precedence_if_non_empty;
use syntax::token;
macro_rules! into_symbol {
($token:expr) => {{
let token::Token { left_offset, code, .. } = $token;
token::symbol(left_offset, code)
}};
}
let (close, mut segments) = segments.pop();
let close = into_symbol!(close.header);
let close = into_symbol(close.header);
let segment = segments.pop().unwrap();
let open = into_symbol!(segment.header);
let open = into_symbol(segment.header);
let body = segment.result.tokens();
let body = resolve_operator_precedence_if_non_empty(body);
syntax::Tree::group(open, body, close)
@ -195,7 +189,7 @@ fn type_def_body(matched_segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree
match name {
Some(name) => syntax::Tree::type_def(segment.header, name, params, constructors, body),
None => {
let name = syntax::Tree::ident(syntax::token::ident("", "", false, 0, false));
let name = syntax::Tree::ident(syntax::token::ident("", "", false, 0, false, false));
let result = syntax::Tree::type_def(segment.header, name, params, constructors, body);
result.with_error("Expected identifier after `type` keyword.")
}
@ -320,7 +314,7 @@ fn case_body(segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
use syntax::tree::*;
let into_ident = |token| {
let token::Token { left_offset, code, .. } = token;
token::ident(left_offset, code, false, 0, false)
token::ident(left_offset, code, false, 0, false, false)
};
let (of, mut rest) = segments.pop();
let case = rest.pop().unwrap();
@ -381,12 +375,7 @@ struct GroupedSequence<'s> {
fn grouped_sequence(segments: NonEmptyVec<MatchedSegment>) -> GroupedSequence {
use operator::resolve_operator_precedence_if_non_empty;
use syntax::token;
use syntax::tree::*;
let into_symbol = |token| {
let token::Token { left_offset, code, .. } = token;
token::symbol(left_offset, code)
};
let (right, mut rest) = segments.pop();
let right_ = into_symbol(right.header);
let left = rest.pop().unwrap();
@ -406,3 +395,24 @@ fn grouped_sequence(segments: NonEmptyVec<MatchedSegment>) -> GroupedSequence {
let first = lhs_.clone();
GroupedSequence { left: left_, first, rest, right: right_ }
}
fn splice<'s>() -> Definition<'s> {
crate::macro_definition! {("`", everything(), "`", nothing()) splice_body}
}
fn splice_body(segments: NonEmptyVec<MatchedSegment>) -> syntax::Tree {
use operator::resolve_operator_precedence_if_non_empty;
let (close, mut segments) = segments.pop();
let close = into_symbol(close.header);
let segment = segments.pop().unwrap();
let open = into_symbol(segment.header);
let expression = segment.result.tokens();
let expression = resolve_operator_precedence_if_non_empty(expression);
let splice = syntax::tree::TextElement::Splice { open, expression, close };
syntax::Tree::text_literal(default(), vec![splice], default(), default())
}
fn into_symbol(token: syntax::token::Token) -> syntax::token::Symbol {
let syntax::token::Token { left_offset, code, .. } = token;
syntax::token::symbol(left_offset, code)
}

View File

@ -61,12 +61,59 @@ impl<'s> Item<'s> {
let section = tree::TextElement::Section { text: token.with_variant(section) };
Tree::text_literal(default(), vec![section], default(), trim)
}
token::Variant::TextEscape(escape) => {
token::Variant::TextEscapeSymbol(escape) => {
let trim = token.left_offset.visible;
let backslash = token.with_variant(escape);
let section = tree::TextElement::Escape { backslash };
let backslash = Some(token.with_variant(escape));
let section = tree::TextElement::EscapeChar { backslash, char: None };
Tree::text_literal(default(), vec![section], default(), trim)
}
token::Variant::TextEscapeChar(escape) => {
let trim = token.left_offset.visible;
let char = Some(token.with_variant(escape));
let section = tree::TextElement::EscapeChar { backslash: None, char };
Tree::text_literal(default(), vec![section], default(), trim)
}
token::Variant::TextEscapeLeader(leader) => {
let trim = token.left_offset.visible;
let leader = Some(token.with_variant(leader));
let section = tree::TextElement::EscapeSequence {
leader,
open: None,
digits: None,
close: None,
};
Tree::text_literal(default(), vec![section], default(), trim)
}
token::Variant::TextEscapeHexDigits(digits) => {
let digits = Some(token.with_variant(digits));
let section = tree::TextElement::EscapeSequence {
leader: None,
open: None,
digits,
close: None,
};
Tree::text_literal(default(), vec![section], default(), default())
}
token::Variant::TextEscapeSequenceStart(t) => {
let open = Some(token.with_variant(t));
let section = tree::TextElement::EscapeSequence {
leader: None,
open,
digits: None,
close: None,
};
Tree::text_literal(default(), vec![section], default(), default())
}
token::Variant::TextEscapeSequenceEnd(t) => {
let close = Some(token.with_variant(t));
let section = tree::TextElement::EscapeSequence {
leader: None,
open: None,
digits: None,
close,
};
Tree::text_literal(default(), vec![section], default(), default())
}
token::Variant::TextEnd(close) => Tree::text_literal(
default(),
default(),
@ -77,8 +124,9 @@ impl<'s> Item<'s> {
token::Variant::AutoScope(t) => Tree::auto_scope(token.with_variant(t)),
_ => {
let message = format!("to_ast: Item::Token({token:?})");
let value =
Tree::ident(token.with_variant(token::variant::Ident(false, 0, false)));
let value = Tree::ident(
token.with_variant(token::variant::Ident(false, 0, false, false)),
);
Tree::with_unsupported(value, message)
}
},

View File

@ -262,6 +262,7 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
pub is_free: bool,
pub lift_level: usize,
pub is_type: bool,
pub is_default: bool,
},
Operator {
pub properties: OperatorProperties,
@ -275,14 +276,21 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
TextStart,
TextEnd,
TextSection,
TextEscape,
TextEscapeSymbol,
TextEscapeChar,
TextEscapeLeader,
TextEscapeHexDigits,
TextEscapeSequenceStart,
TextEscapeSequenceEnd,
}
}}}
impl Variant {
/// Return whether this token can introduce a macro invocation.
pub fn can_start_macro(&self) -> bool {
!matches!(self, Variant::TextEscape(_))
// Prevent macro interpretation of symbols that have been lexically contextualized as text
// escape control characters.
!matches!(self, Variant::TextEscapeSymbol(_) | Variant::TextEscapeSequenceStart(_))
}
}

View File

@ -132,19 +132,30 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
pub token: token::AutoScope<'s>,
},
TextLiteral {
pub open_quote: Option<token::TextStart<'s>>,
pub open: Option<token::TextStart<'s>>,
pub elements: Vec<TextElement<'s>>,
/// Conditions when this is `None`:
/// - Block literal: Always.
/// - Inline literal: On error: EOL or EOF occurred without the string being closed.
pub close_quote: Option<token::TextEnd<'s>>,
pub trim: VisibleOffset,
pub close: Option<token::TextEnd<'s>>,
pub trim: VisibleOffset,
},
/// A simple application, like `print "hello"`.
App {
pub func: Tree<'s>,
pub arg: Tree<'s>,
},
/// An application using an argument name, like `summarize_transaction (price = 100)`.
NamedApp {
pub func: Tree<'s>,
pub open: Option<token::Symbol<'s>>,
pub name: token::Ident<'s>,
pub equals: token::Operator<'s>,
pub arg: Tree<'s>,
pub close: Option<token::Symbol<'s>>,
},
/// Application using the `default` keyword.
DefaultApp {
pub func: Tree<'s>,
pub default: token::Ident<'s>,
},
/// Application of an operator, like `a + b`. The left or right operands might be missing,
/// thus creating an operator section like `a +`, `+ b`, or simply `+`. See the
/// [`OprSectionBoundary`] variant to learn more about operator section scope.
@ -203,7 +214,7 @@ macro_rules! with_ast_definition { ($f:ident ($($args:tt)*)) => { $f! { $($args)
/// The identifier to which the function should be bound.
pub name: token::Ident<'s>,
/// The argument patterns.
pub args: Vec<Tree<'s>>,
pub args: Vec<ArgumentDefinition<'s>>,
/// The `=` token.
pub equals: token::Operator<'s>,
/// The body, which will typically be an inline expression or a `BodyBlock` expression.
@ -414,10 +425,32 @@ pub enum TextElement<'s> {
/// The text content.
text: token::TextSection<'s>,
},
/// A \ character.
Escape {
/// An escaped character.
EscapeChar {
/// The \ character.
backslash: token::TextEscape<'s>,
backslash: Option<token::TextEscapeSymbol<'s>>,
/// The escaped character.
char: Option<token::TextEscapeChar<'s>>,
},
/// A unicode escape sequence.
EscapeSequence {
/// The backslash and format characters.
leader: Option<token::TextEscapeLeader<'s>>,
/// The opening delimiter, if present.
open: Option<token::TextEscapeSequenceStart<'s>>,
/// The hex digits.
digits: Option<token::TextEscapeHexDigits<'s>>,
/// The closing delimiter, if present.
close: Option<token::TextEscapeSequenceEnd<'s>>,
},
/// An interpolated section within a text literal.
Splice {
/// The opening ` character.
open: token::Symbol<'s>,
/// The interpolated expression.
expression: Option<Tree<'s>>,
/// The closing ` character.
close: token::Symbol<'s>,
},
}
@ -425,7 +458,11 @@ impl<'s> span::Builder<'s> for TextElement<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
match self {
TextElement::Section { text } => text.add_to_span(span),
TextElement::Escape { backslash } => backslash.add_to_span(span),
TextElement::EscapeChar { backslash, char } => span.add(backslash).add(char),
TextElement::EscapeSequence { leader, open, digits, close } =>
span.add(leader).add(open).add(digits).add(close),
TextElement::Splice { open, expression, close } =>
span.add(open).add(expression).add(close),
}
}
}
@ -459,6 +496,43 @@ impl<'s> span::Builder<'s> for FractionalDigits<'s> {
}
// === Functions ===
/// A function argument definition.
#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)]
pub struct ArgumentDefinition<'s> {
/// Closing parenthesis (only present when a default is provided).
pub open: Option<token::Symbol<'s>>,
/// The pattern being bound to an argument.
pub pattern: Tree<'s>,
/// An optional default value for an argument.
pub default: Option<ArgumentDefault<'s>>,
/// Closing parenthesis (only present when a default is provided).
pub close: Option<token::Symbol<'s>>,
}
impl<'s> span::Builder<'s> for ArgumentDefinition<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span.add(&mut self.pattern).add(&mut self.default)
}
}
/// A default value specification in a function argument definition.
#[derive(Clone, Debug, Eq, PartialEq, Visitor, Serialize, Reflect, Deserialize)]
pub struct ArgumentDefault<'s> {
/// The `=` token.
pub equals: token::Operator<'s>,
/// The default value.
pub expression: Tree<'s>,
}
impl<'s> span::Builder<'s> for ArgumentDefault<'s> {
fn add_to_span(&mut self, span: Span<'s>) -> Span<'s> {
span.add(&mut self.equals).add(&mut self.expression)
}
}
// === OprApp ===
/// Operator or [`MultipleOperatorError`].
@ -553,18 +627,40 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
return func;
}
Variant::TextLiteral(lhs) if let Variant::TextLiteral(rhs) = &mut *arg.variant
&& lhs.close_quote.is_none() && rhs.open_quote.is_none() => {
match rhs.elements.first_mut() {
Some(TextElement::Section { text }) => text.left_offset = arg.span.left_offset,
Some(TextElement::Escape { backslash }) =>
backslash.left_offset = arg.span.left_offset,
None => (),
}
lhs.elements.append(&mut rhs.elements);
lhs.close_quote = rhs.close_quote.take();
&& lhs.close.is_none() && rhs.open.is_none() => {
if rhs.trim != VisibleOffset(0) && (lhs.trim == VisibleOffset(0) || rhs.trim < lhs.trim) {
lhs.trim = rhs.trim;
}
match rhs.elements.first_mut() {
Some(TextElement::Section { text }) => text.left_offset = arg.span.left_offset,
Some(TextElement::EscapeChar { char: char_, .. }) => {
if let Some(char__) = char_ {
char__.left_offset = arg.span.left_offset;
if let Some(TextElement::EscapeChar { char, .. }) = lhs.elements.last_mut()
&& char.is_none() {
*char = mem::take(char_);
return func;
}
}
}
Some(TextElement::EscapeSequence { open: open_, digits: digits_, close: close_, .. }) => {
if let Some(TextElement::EscapeSequence { open, digits, close, .. })
= lhs.elements.last_mut() {
if open.is_none() {
*open = open_.clone();
}
if digits.is_none() {
*digits = digits_.clone();
}
*close = close_.clone();
return func;
}
}
Some(TextElement::Splice { open, .. }) => open.left_offset = arg.span.left_offset,
None => (),
}
lhs.elements.append(&mut rhs.elements);
lhs.close = rhs.close.take();
return func;
}
Variant::Number(func_ @ Number { base: _, integer: None, fractional_digits: None })
@ -586,7 +682,27 @@ pub fn apply<'s>(mut func: Tree<'s>, mut arg: Tree<'s>) -> Tree<'s> {
block.lhs = Some(func);
arg
}
_ => Tree::app(func, arg),
Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) })
if opr.properties.is_assignment() && let Variant::Ident(lhs) = &*lhs.variant => {
let mut lhs = lhs.token.clone();
lhs.left_offset += arg.span.left_offset.clone();
Tree::named_app(func, None, lhs, opr.clone(), rhs.clone(), None)
}
Variant::Group(Group { open, body: Some(body), close }) if let box
Variant::OprApp(OprApp { lhs: Some(lhs), opr: Ok(opr), rhs: Some(rhs) }) = &body.variant
&& opr.properties.is_assignment() && let Variant::Ident(lhs) = &*lhs.variant => {
let mut open = open.clone();
open.left_offset += arg.span.left_offset.clone();
let open = Some(open);
let close = Some(close.clone());
Tree::named_app(func, open, lhs.token.clone(), opr.clone(), rhs.clone(), close)
}
Variant::Ident(Ident { token }) if token.is_default => {
let mut token = token.clone();
token.left_offset += arg.span.left_offset.clone();
Tree::default_app(func, token)
}
_ => Tree::app(func, arg)
}
}

View File

@ -119,7 +119,7 @@ fn type_methods() {
let expected = block![
(TypeDef (Ident type) (Ident Geo) #() #()
#((Function number #() "=" (BodyBlock #((Ident x))))
(Function area #((Ident self)) "=" (OprApp (Ident x) (Ok "+") (Ident x)))))
(Function area #((() (Ident self) () ())) "=" (OprApp (Ident x) (Ok "+") (Ident x)))))
];
test(&code.join("\n"), expected);
}
@ -146,7 +146,7 @@ fn type_def_full() {
((Point #() #()))
(()))
#((Function number #() "=" (BodyBlock #((Ident x))))
(Function area #((Ident self)) "=" (OprApp (Ident x) (Ok "+") (Ident x)))))
(Function area #((() (Ident self) () ())) "=" (OprApp (Ident x) (Ok "+") (Ident x)))))
];
test(&code.join("\n"), expected);
}
@ -181,9 +181,17 @@ fn assignment_simple() {
#[test]
fn function_inline_simple_args() {
test("foo a = x", block![(Function foo #((Ident a)) "=" (Ident x))]);
test("foo a b = x", block![(Function foo #((Ident a) (Ident b)) "=" (Ident x))]);
test("foo a b c = x", block![(Function foo #((Ident a) (Ident b) (Ident c)) "=" (Ident x))]);
test("foo a = x", block![(Function foo #((() (Ident a) () ())) "=" (Ident x))]);
#[rustfmt::skip]
test("foo a b = x",
block![(Function foo #((() (Ident a) () ()) (() (Ident b) () ())) "=" (Ident x))]);
#[rustfmt::skip]
test(
"foo a b c = x", block![
(Function foo
#((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ()))
"=" (Ident x))],
);
}
#[test]
@ -193,9 +201,57 @@ fn function_block_noargs() {
#[test]
fn function_block_simple_args() {
test("foo a =", block![(Function foo #((Ident a)) "=" ())]);
test("foo a b =", block![(Function foo #((Ident a) (Ident b)) "=" ())]);
test("foo a b c =", block![(Function foo #((Ident a) (Ident b) (Ident c)) "=" ())]);
test("foo a =", block![(Function foo #((() (Ident a) () ())) "=" ())]);
test("foo a b =", block![(Function foo #((() (Ident a) () ()) (() (Ident b) () ())) "=" ())]);
#[rustfmt::skip]
test(
"foo a b c =", block![
(Function foo
#((() (Ident a) () ()) (() (Ident b) () ()) (() (Ident c) () ()))
"=" ())],
);
}
// === Named arguments ===
#[test]
fn named_arguments() {
let cases = [
("f x=y", block![(NamedApp (Ident f) () x "=" (Ident y) ())]),
("f (x = y)", block![(NamedApp (Ident f) "(" x "=" (Ident y) ")")]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
}
// === Default arguments ===
#[test]
fn default_app() {
let cases = [("f default", block![(DefaultApp (Ident f) default)])];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
}
#[test]
fn default_arguments() {
#[rustfmt::skip]
let cases = [
("f x=1 = x",
block![(Function f #((() (Ident x) ("=" (Number () "1" ())) ())) "=" (Ident x))]),
("f (x = 1) = x",
block![(Function f #(("(" (Ident x) ("=" (Number () "1" ())) ")")) "=" (Ident x))]),
// Pattern in LHS:
("f ~x=1 = x", block![
(Function f
#((() (UnaryOprApp "~" (Ident x)) ("=" (Number () "1" ())) ()))
"=" (Ident x))]),
("f (~x = 1) = x", block![
(Function f
#(("(" (UnaryOprApp "~" (Ident x)) ("=" (Number () "1" ())) ")"))
"=" (Ident x))]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
}
@ -361,7 +417,7 @@ fn pipeline_operators() {
fn unevaluated_argument() {
let code = ["main ~foo = x"];
let expected = block![
(Function main #((UnaryOprApp "~" (Ident foo))) "=" (Ident x))
(Function main #((() (UnaryOprApp "~" (Ident foo)) () ())) "=" (Ident x))
];
test(&code.join("\n"), expected);
}
@ -370,7 +426,7 @@ fn unevaluated_argument() {
fn unary_operator_missing_operand() {
let code = ["main ~ = x"];
let expected = block![
(Function main #((UnaryOprApp "~" ())) "=" (Ident x))
(Function main #((() (UnaryOprApp "~" ()) () ())) "=" (Ident x))
];
test(&code.join("\n"), expected);
}
@ -585,26 +641,25 @@ fn inline_text_literals() {
#[rustfmt::skip]
let cases = [
(r#""I'm an inline raw text!""#, block![
(TextLiteral "\"" #((Section "I'm an inline raw text!")) "\"" 0)]),
(TextLiteral #((Section "I'm an inline raw text!")) 0)]),
(r#"zero_length = """#, block![
(Assignment (Ident zero_length) "=" (TextLiteral "\"" #() "\"" 0))]),
(r#"unclosed = ""#, block![(Assignment (Ident unclosed) "=" (TextLiteral "\"" #() () 0))]),
(Assignment (Ident zero_length) "=" (TextLiteral #() 0))]),
(r#"unclosed = ""#, block![(Assignment (Ident unclosed) "=" (TextLiteral #() 0))]),
(r#"unclosed = "a"#, block![
(Assignment (Ident unclosed) "=" (TextLiteral "\"" #((Section "a")) () 0))]),
(r#"'Other quote type'"#, block![(TextLiteral "'" #((Section "Other quote type")) "'" 0)]),
(r#""Non-escape: \n""#, block![(TextLiteral "\"" #((Section "Non-escape: \\n")) "\"" 0)]),
(Assignment (Ident unclosed) "=" (TextLiteral #((Section "a")) 0))]),
(r#"'Other quote type'"#, block![(TextLiteral #((Section "Other quote type")) 0)]),
(r#""Non-escape: \n""#, block![(TextLiteral #((Section "Non-escape: \\n")) 0)]),
(r#""String with \" escape""#, block![
(TextLiteral
"\""
#((Section "String with ") (Escape "\\") (Section "\" escape"))
"\"" 0)]),
#((Section "String with ") (EscapeChar "\"") (Section " escape"))
0)]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
}
#[test]
fn multiline_text_literals() {
test("'''", block![(TextLiteral "'''" #() () 0)]);
test("'''", block![(TextLiteral #() 0)]);
const CODE: &str = r#"'''
part of the string
3-spaces indented line, part of the Text Block
@ -616,19 +671,60 @@ x"#;
#[rustfmt::skip]
let expected = block![
(TextLiteral
"'''"
#((Section "\n") (Section "part of the string")
(Section "\n") (Section "3-spaces indented line, part of the Text Block")
(Section "\n") (Section "this does not end the string -> '''")
(Section "\n") (Section "")
(Section "\n")
(Section "\n") (Section "also part of the string")
(Section "\n") (Section ""))
() 4)
(Section "\n"))
4)
(Ident x)
];
test(CODE, expected);
}
#[test]
fn interpolated_literals_in_inline_text() {
#[rustfmt::skip]
let cases = [
(r#"'Simple case.'"#, block![(TextLiteral #((Section "Simple case.")) 0)]),
(r#"'With a `splice`.'"#, block![(TextLiteral
#((Section "With a ")
(Splice "`" (Ident splice) "`")
(Section "."))
0)]),
(r#"'String with \n escape'"#, block![
(TextLiteral
#((Section "String with ") (EscapeChar "n") (Section " escape"))
0)]),
(r#"'\x0Aescape'"#, block![
(TextLiteral #((EscapeSequence "0A") (Section "escape")) 0)]),
(r#"'\u000Aescape'"#, block![
(TextLiteral #((EscapeSequence "000A") (Section "escape")) 0)]),
(r#"'\u{0000A}escape'"#, block![
(TextLiteral #((EscapeSequence "0000A") (Section "escape")) 0)]),
(r#"'\U0000000Aescape'"#, block![
(TextLiteral #((EscapeSequence "0000000A") (Section "escape")) 0)]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
}
#[test]
fn interpolated_literals_in_multiline_text() {
const CODE: &str = r#"'''
text with a `splice`
and some \u000Aescapes\'"#;
#[rustfmt::skip]
let expected = block![
(TextLiteral
#((Section "\n") (Section "text with a ") (Splice "`" (Ident splice) "`")
(Section "\n") (Section "and some ")
(EscapeSequence "000A")
(Section "escapes")
(EscapeChar "'")) 4)];
test(CODE, expected);
}
// === Lambdas ===
@ -806,7 +902,15 @@ where T: serde::Serialize + Reflect {
let newline_token = rust_to_meta[&token::variant::Newline::reflect().id];
let text_start_token = rust_to_meta[&token::variant::TextStart::reflect().id];
let text_end_token = rust_to_meta[&token::variant::TextEnd::reflect().id];
let text_escape_token = rust_to_meta[&token::variant::TextEscape::reflect().id];
let text_escape_symbol_token = rust_to_meta[&token::variant::TextEscapeSymbol::reflect().id];
let text_escape_char_token = rust_to_meta[&token::variant::TextEscapeChar::reflect().id];
let text_escape_leader_token = rust_to_meta[&token::variant::TextEscapeLeader::reflect().id];
let text_escape_hex_digits_token =
rust_to_meta[&token::variant::TextEscapeHexDigits::reflect().id];
let text_escape_sequence_start_token =
rust_to_meta[&token::variant::TextEscapeSequenceStart::reflect().id];
let text_escape_sequence_end_token =
rust_to_meta[&token::variant::TextEscapeSequenceEnd::reflect().id];
let text_section_token = rust_to_meta[&token::variant::TextSection::reflect().id];
let wildcard_token = rust_to_meta[&token::variant::Wildcard::reflect().id];
let autoscope_token = rust_to_meta[&token::variant::AutoScope::reflect().id];
@ -823,11 +927,10 @@ where T: serde::Serialize + Reflect {
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(symbol_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(text_start_token, move |token| Value::string(token_to_str_(token)));
to_s_expr.mapper(text_escape_char_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(text_end_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(text_escape_token, move |token| Value::string(token_to_str_(token)));
to_s_expr
.mapper(text_escape_hex_digits_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
to_s_expr.mapper(text_section_token, move |token| Value::string(token_to_str_(token)));
let token_to_str_ = token_to_str.clone();
@ -863,6 +966,12 @@ where T: serde::Serialize + Reflect {
to_s_expr.skip(newline_token);
to_s_expr.skip(wildcard_token);
to_s_expr.skip(autoscope_token);
to_s_expr.skip(text_escape_symbol_token);
to_s_expr.skip(text_escape_leader_token);
to_s_expr.skip(text_escape_sequence_start_token);
to_s_expr.skip(text_escape_sequence_end_token);
to_s_expr.skip(text_start_token);
to_s_expr.skip(text_end_token);
tuplify(to_s_expr.value(ast_ty, &value))
}