mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 05:41:32 +03:00
Parser: Newline normalization in text literals (#3903)
- Newlines in text literals are now normalized to `\n` when producing IR representation. - Re-enabled tests that were dependent on the old behavior.
This commit is contained in:
parent
1b52ae239f
commit
336bbf505c
@ -1006,6 +1006,7 @@ final class TreeToIr {
|
||||
sb.appendCodePoint(val);
|
||||
}
|
||||
}
|
||||
case TextElement.Newline n -> sb.append('\n');
|
||||
default -> throw new UnhandledEntity(t, "buildTextConstant");
|
||||
}
|
||||
}
|
||||
|
@ -125,9 +125,9 @@ fn doc_comments() {
|
||||
#[rustfmt::skip]
|
||||
test(&lines.join("\n"), block![
|
||||
(Documented
|
||||
(#((Section " The Identity Function") (Section "\n")
|
||||
(Section "\n")
|
||||
(Section "Arguments:") (Section "\n")
|
||||
(#((Section " The Identity Function") (Newline)
|
||||
(Newline)
|
||||
(Section "Arguments:") (Newline)
|
||||
(Section "- x: value to do nothing to"))
|
||||
#(()))
|
||||
(Function (Ident id) #((() (Ident x) () ())) "=" (Ident x)))]);
|
||||
@ -919,10 +919,10 @@ x"#;
|
||||
#[rustfmt::skip]
|
||||
let expected = block![
|
||||
(TextLiteral
|
||||
#((Section "part of the string") (Section "\n")
|
||||
(Section " 3-spaces indented line, part of the Text Block") (Section "\n")
|
||||
(Section "this does not end the string -> '''") (Section "\n")
|
||||
(Section "\n")
|
||||
#((Section "part of the string") (Newline)
|
||||
(Section " 3-spaces indented line, part of the Text Block") (Newline)
|
||||
(Section "this does not end the string -> '''") (Newline)
|
||||
(Newline)
|
||||
(Section "`also` part of the string")))
|
||||
()
|
||||
(Ident x)
|
||||
@ -959,7 +959,7 @@ x"#;
|
||||
(Assignment (Ident foo) "=" (App (Ident bar) (TextLiteral #((Section "baz"))))));
|
||||
test!("'''\n \\t'", (TextLiteral #((Escape '\t') (Section "'"))));
|
||||
test!("'''\n x\n \\t'",
|
||||
(TextLiteral #((Section "x") (Section "\n") (Escape '\t') (Section "'"))));
|
||||
(TextLiteral #((Section "x") (Newline) (Escape '\t') (Section "'"))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -1003,7 +1003,7 @@ fn interpolated_literals_in_multiline_text() {
|
||||
#[rustfmt::skip]
|
||||
let expected = block![
|
||||
(TextLiteral
|
||||
#((Section "text with a ") (Splice (Ident splice)) (Section "\n")
|
||||
#((Section "text with a ") (Splice (Ident splice)) (Newline)
|
||||
(Section "and some ") (Escape '\n') (Section "escapes") (Escape '\'')))];
|
||||
test(code, expected);
|
||||
}
|
||||
|
@ -1008,7 +1008,7 @@ impl<'s> Lexer<'s> {
|
||||
}
|
||||
let newlines = newlines
|
||||
.into_iter()
|
||||
.map(|token| token.with_variant(token::Variant::text_section()));
|
||||
.map(|token| token.with_variant(token::Variant::text_newline()));
|
||||
self.output.extend(newlines);
|
||||
continue;
|
||||
}
|
||||
|
@ -480,7 +480,7 @@ impl<'s> Resolver<'s> {
|
||||
syntax::Item::Tree(tree) => body.push_str(&tree.code()),
|
||||
}
|
||||
}
|
||||
let header0 = syntax::Tree::from(header0).with_error("Invalid macro invocation.");
|
||||
let header0 = syntax::tree::to_ast(header0).with_error("Invalid macro invocation.");
|
||||
(header0, items)
|
||||
}
|
||||
}
|
||||
|
@ -50,15 +50,6 @@ impl<'s> Item<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert this item to a [`Tree`].
|
||||
pub fn to_ast(self) -> Tree<'s> {
|
||||
match self {
|
||||
Item::Token(token) => token.into(),
|
||||
Item::Tree(ast) => ast,
|
||||
Item::Block(items) => build_block(items),
|
||||
}
|
||||
}
|
||||
|
||||
/// If this item is an [`Item::Tree`], apply the given function to the contained [`Tree`] and
|
||||
/// return the result.
|
||||
pub fn map_tree<'t: 's, F>(self, f: F) -> Self
|
||||
@ -90,7 +81,7 @@ impl<'s> TryAsRef<Item<'s>> for Item<'s> {
|
||||
|
||||
/// Given a sequence of [`Line`]s belonging to one block, create an AST block node, of a type
|
||||
/// determined by the syntax of the lines in the block.
|
||||
fn build_block<'s>(lines: impl IntoIterator<Item = Line<'s>>) -> Tree<'s> {
|
||||
pub fn build_block<'s>(lines: impl IntoIterator<Item = Line<'s>>) -> Tree<'s> {
|
||||
let mut block_builder = tree::block::Builder::new();
|
||||
let mut precedence = operator::Precedence::new();
|
||||
for Line { newline, items } in lines {
|
||||
|
@ -65,9 +65,10 @@ impl<'s> Precedence<'s> {
|
||||
code,
|
||||
}) => self.nospace_builder.operator(Token(left_offset, code, opr)),
|
||||
syntax::Item::Token(token) =>
|
||||
self.nospace_builder.operand(syntax::Tree::from(token).into()),
|
||||
self.nospace_builder.operand(syntax::tree::to_ast(token).into()),
|
||||
syntax::Item::Tree(tree) => self.nospace_builder.operand(tree.into()),
|
||||
syntax::Item::Block(_) => self.nospace_builder.operand(item.to_ast().into()),
|
||||
syntax::Item::Block(lines) =>
|
||||
self.nospace_builder.operand(syntax::item::build_block(lines).into()),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -291,6 +291,7 @@ macro_rules! with_token_definition { ($f:ident ($($args:tt)*)) => { $f! { $($arg
|
||||
pub value: Option<char>,
|
||||
},
|
||||
TextInitialNewline,
|
||||
TextNewline,
|
||||
Invalid,
|
||||
}
|
||||
}}}
|
||||
|
@ -505,6 +505,12 @@ pub enum TextElement<'s> {
|
||||
/// The escape sequence.
|
||||
token: token::TextEscape<'s>,
|
||||
},
|
||||
/// A logical newline.
|
||||
Newline {
|
||||
/// The newline token. The semantics of a logical newline are independent of the specific
|
||||
/// characters in the input, which are generally platform-dependent.
|
||||
newline: token::Newline<'s>,
|
||||
},
|
||||
/// An interpolated section within a text literal.
|
||||
Splice {
|
||||
/// The opening ` character.
|
||||
@ -523,6 +529,7 @@ impl<'s> span::Builder<'s> for TextElement<'s> {
|
||||
TextElement::Escape { token } => span.add(token),
|
||||
TextElement::Splice { open, expression, close } =>
|
||||
span.add(open).add(expression).add(close),
|
||||
TextElement::Newline { newline } => span.add(newline),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -856,6 +863,7 @@ pub fn join_text_literals<'s>(
|
||||
Some(TextElement::Section { text }) => text.left_offset += rhs_span.left_offset,
|
||||
Some(TextElement::Escape { token }) => token.left_offset += rhs_span.left_offset,
|
||||
Some(TextElement::Splice { open, .. }) => open.left_offset += rhs_span.left_offset,
|
||||
Some(TextElement::Newline { newline }) => newline.left_offset += rhs_span.left_offset,
|
||||
None => (),
|
||||
}
|
||||
if let Some(newline) = rhs.newline.take() {
|
||||
@ -939,52 +947,56 @@ pub fn apply_unary_operator<'s>(opr: token::Operator<'s>, rhs: Option<Tree<'s>>)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<Token<'s>> for Tree<'s> {
|
||||
fn from(token: Token<'s>) -> Self {
|
||||
match token.variant {
|
||||
token::Variant::Ident(ident) => token.with_variant(ident).into(),
|
||||
token::Variant::Digits(number) =>
|
||||
Tree::number(None, Some(token.with_variant(number)), None),
|
||||
token::Variant::NumberBase(base) =>
|
||||
Tree::number(Some(token.with_variant(base)), None, None),
|
||||
token::Variant::TextStart(open) =>
|
||||
Tree::text_literal(Some(token.with_variant(open)), default(), default(), default(), default()),
|
||||
token::Variant::TextSection(section) => {
|
||||
let section = TextElement::Section { text: token.with_variant(section) };
|
||||
Tree::text_literal(default(), default(), vec![section], default(), default())
|
||||
}
|
||||
token::Variant::TextEscape(escape) => {
|
||||
let token = token.with_variant(escape);
|
||||
let section = TextElement::Escape { token };
|
||||
Tree::text_literal(default(), default(), vec![section], default(), default())
|
||||
}
|
||||
token::Variant::TextEnd(_) if token.code.is_empty() =>
|
||||
Tree::text_literal(default(), default(), default(), default(), true),
|
||||
token::Variant::TextEnd(close) =>
|
||||
Tree::text_literal(default(), default(), default(), Some(token.with_variant(close)), true),
|
||||
token::Variant::TextInitialNewline(_) =>
|
||||
Tree::text_literal(default(), Some(token::newline(token.left_offset, token.code)), default(), default(), default()),
|
||||
token::Variant::Wildcard(wildcard) => Tree::wildcard(token.with_variant(wildcard), default()),
|
||||
token::Variant::AutoScope(t) => Tree::auto_scope(token.with_variant(t)),
|
||||
token::Variant::OpenSymbol(s) =>
|
||||
Tree::group(Some(token.with_variant(s)), default(), default()).with_error("Unmatched delimiter"),
|
||||
token::Variant::CloseSymbol(s) =>
|
||||
Tree::group(default(), default(), Some(token.with_variant(s))).with_error("Unmatched delimiter"),
|
||||
// These should be unreachable: They are handled when assembling items into blocks,
|
||||
// before parsing proper.
|
||||
token::Variant::Newline(_)
|
||||
| token::Variant::BlockStart(_)
|
||||
| token::Variant::BlockEnd(_)
|
||||
// This should be unreachable: `resolve_operator_precedence` doesn't calls `to_ast` for
|
||||
// operators.
|
||||
| token::Variant::Operator(_)
|
||||
// Map an error case in the lexer to an error in the AST.
|
||||
| token::Variant::Invalid(_) => {
|
||||
let message = format!("Unexpected token: {token:?}");
|
||||
let ident = token::variant::Ident(false, 0, false, false, false);
|
||||
let value = Tree::ident(token.with_variant(ident));
|
||||
Tree::with_error(value, message)
|
||||
}
|
||||
/// Create an AST node for a token.
|
||||
pub fn to_ast(token: Token) -> Tree {
|
||||
match token.variant {
|
||||
token::Variant::Ident(ident) => token.with_variant(ident).into(),
|
||||
token::Variant::Digits(number) =>
|
||||
Tree::number(None, Some(token.with_variant(number)), None),
|
||||
token::Variant::NumberBase(base) =>
|
||||
Tree::number(Some(token.with_variant(base)), None, None),
|
||||
token::Variant::TextStart(open) =>
|
||||
Tree::text_literal(Some(token.with_variant(open)), default(), default(), default(), default()),
|
||||
token::Variant::TextSection(section) => {
|
||||
let section = TextElement::Section { text: token.with_variant(section) };
|
||||
Tree::text_literal(default(), default(), vec![section], default(), default())
|
||||
}
|
||||
token::Variant::TextEscape(escape) => {
|
||||
let token = token.with_variant(escape);
|
||||
let section = TextElement::Escape { token };
|
||||
Tree::text_literal(default(), default(), vec![section], default(), default())
|
||||
}
|
||||
token::Variant::TextEnd(_) if token.code.is_empty() =>
|
||||
Tree::text_literal(default(), default(), default(), default(), true),
|
||||
token::Variant::TextEnd(close) =>
|
||||
Tree::text_literal(default(), default(), default(), Some(token.with_variant(close)), true),
|
||||
token::Variant::TextInitialNewline(_) =>
|
||||
Tree::text_literal(default(), Some(token::newline(token.left_offset, token.code)), default(), default(), default()),
|
||||
token::Variant::TextNewline(_) => {
|
||||
let newline = token::newline(token.left_offset, token.code);
|
||||
let newline = TextElement::Newline { newline };
|
||||
Tree::text_literal(default(), default(), vec![newline], default(), default())
|
||||
}
|
||||
token::Variant::Wildcard(wildcard) => Tree::wildcard(token.with_variant(wildcard), default()),
|
||||
token::Variant::AutoScope(t) => Tree::auto_scope(token.with_variant(t)),
|
||||
token::Variant::OpenSymbol(s) =>
|
||||
Tree::group(Some(token.with_variant(s)), default(), default()).with_error("Unmatched delimiter"),
|
||||
token::Variant::CloseSymbol(s) =>
|
||||
Tree::group(default(), default(), Some(token.with_variant(s))).with_error("Unmatched delimiter"),
|
||||
// These should be unreachable: They are handled when assembling items into blocks,
|
||||
// before parsing proper.
|
||||
token::Variant::Newline(_)
|
||||
| token::Variant::BlockStart(_)
|
||||
| token::Variant::BlockEnd(_)
|
||||
// This should be unreachable: `resolve_operator_precedence` doesn't calls `to_ast` for
|
||||
// operators.
|
||||
| token::Variant::Operator(_)
|
||||
// Map an error case in the lexer to an error in the AST.
|
||||
| token::Variant::Invalid(_) => {
|
||||
let message = format!("Unexpected token: {token:?}");
|
||||
let ident = token::variant::Ident(false, 0, false, false, false);
|
||||
let value = Tree::ident(token.with_variant(ident));
|
||||
Tree::with_error(value, message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -246,8 +246,8 @@ spec =
|
||||
Test.specify "should escape special characters when debug-printing text" <|
|
||||
text_1 = '''
|
||||
foo
|
||||
bar\tbaz
|
||||
(text_1.replace '\r' "").to_text.should_equal "'foo\nbar\tbaz'"
|
||||
bar\r\tbaz
|
||||
text_1.to_text.should_equal "'foo\nbar\r\tbaz'"
|
||||
text_2 = '\n\t\a\b\f\r\v\e\''
|
||||
text_2.to_text.should_equal "'\n\t\a\b\f\r\v\e\''"
|
||||
|
||||
@ -830,8 +830,8 @@ spec =
|
||||
long_text = """
|
||||
Hello from a long text. EOL
|
||||
SOL Hmm...
|
||||
(long_text.replace '\r' "") . contains "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=True) . should_be_true
|
||||
(long_text.replace '\r' "") . contains "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=False) . should_be_false
|
||||
long_text . contains "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=True) . should_be_true
|
||||
long_text . contains "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=False) . should_be_false
|
||||
|
||||
Test.specify "should check for starts_with using Unicode normalization" <|
|
||||
"Hello".starts_with "He" . should_be_true
|
||||
@ -917,8 +917,8 @@ spec =
|
||||
long_text = """
|
||||
EOL
|
||||
SOL Hmm...
|
||||
(long_text.replace '\r' "") . starts_with "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=True) . should_be_true
|
||||
(long_text.replace '\r' "") . starts_with "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=False) . should_be_false
|
||||
long_text . starts_with "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=True) . should_be_true
|
||||
long_text . starts_with "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data case_sensitivity=Case_Sensitivity.Sensitive dot_matches_newline=False) . should_be_false
|
||||
|
||||
"aaazzz" . starts_with "a|b" (Regex_Matcher.Regex_Matcher_Data case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true
|
||||
"bbbzzz" . starts_with "a|b" (Regex_Matcher.Regex_Matcher_Data case_sensitivity=Case_Sensitivity.Sensitive) . should_be_true
|
||||
@ -997,8 +997,8 @@ spec =
|
||||
long_text = """
|
||||
Hnnnn EOL
|
||||
SOL
|
||||
(long_text.replace '\r' "") . ends_with "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data dot_matches_newline=True) . should_be_true
|
||||
(long_text.replace '\r' "") . ends_with "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data dot_matches_newline=False) . should_be_false
|
||||
long_text . ends_with "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data dot_matches_newline=True) . should_be_true
|
||||
long_text . ends_with "EOL.SOL" (Regex_Matcher.Regex_Matcher_Data dot_matches_newline=False) . should_be_false
|
||||
|
||||
"zzzaaa" . ends_with "a|b" Regex_Matcher.Regex_Matcher_Data . should_be_true
|
||||
"zzzbbb" . ends_with "a|b" Regex_Matcher.Regex_Matcher_Data . should_be_true
|
||||
@ -1604,7 +1604,7 @@ spec =
|
||||
text = """
|
||||
Foo
|
||||
bar
|
||||
r4 = text.replace "(\n|\r)" "" matcher=(Regex_Matcher.Regex_Matcher_Data multiline=True)
|
||||
r4 = text.replace '\n' "" matcher=(Regex_Matcher.Regex_Matcher_Data multiline=True)
|
||||
r4 . should_equal "Foobar"
|
||||
|
||||
r5 = "ababd".replace "b\w # Replacing a `b` followed by any word character" "a" matcher=(Regex_Matcher.Regex_Matcher_Data comments=True)
|
||||
|
Loading…
Reference in New Issue
Block a user