Merge pull request #7004 from a-lavis/format-invisible-chars

Format invisible unicode chars to unicode escapes
This commit is contained in:
Joshua Warner 2024-08-17 20:27:46 -07:00 committed by GitHub
commit e3d47b386e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 109 additions and 6 deletions

View File

@ -624,6 +624,22 @@ fn starts_with_newline(expr: &Expr) -> bool {
}
}
fn fmt_str_body(body: &str, buf: &mut Buf) {
for c in body.chars() {
match c {
// Format blank characters as unicode escapes
'\u{200a}' => buf.push_str("\\u(200a)"),
'\u{200b}' => buf.push_str("\\u(200b)"),
'\u{200c}' => buf.push_str("\\u(200c)"),
'\u{feff}' => buf.push_str("\\u(feff)"),
// Don't change anything else in the string
' ' => buf.push_str_allow_spaces(" "),
'\n' => buf.push_str_allow_spaces("\n"),
_ => buf.push(c),
}
}
}
fn format_str_segment(seg: &StrSegment, buf: &mut Buf, indent: u16) {
use StrSegment::*;
@ -633,10 +649,10 @@ fn format_str_segment(seg: &StrSegment, buf: &mut Buf, indent: u16) {
// a line break in the input string
match string.strip_suffix('\n') {
Some(string_without_newline) => {
buf.push_str_allow_spaces(string_without_newline);
fmt_str_body(string_without_newline, buf);
buf.newline();
}
None => buf.push_str_allow_spaces(string),
None => fmt_str_body(string, buf),
}
}
Unicode(loc_str) => {
@ -696,7 +712,7 @@ pub fn fmt_str_literal(buf: &mut Buf, literal: StrLiteral, indent: u16) {
buf.push_newline_literal();
for line in string.split('\n') {
buf.indent(indent);
buf.push_str_allow_spaces(line);
fmt_str_body(line, buf);
buf.push_newline_literal();
}
buf.indent(indent);
@ -704,7 +720,7 @@ pub fn fmt_str_literal(buf: &mut Buf, literal: StrLiteral, indent: u16) {
} else {
buf.indent(indent);
buf.push('"');
buf.push_str_allow_spaces(string);
fmt_str_body(string, buf);
buf.push('"');
};
}

View File

@ -624,7 +624,7 @@ impl<'a> Normalize<'a> for StrLiteral<'a> {
new_segments.push(StrSegment::Plaintext(last_text.into_bump_str()));
}
StrLiteral::Line(new_segments.into_bump_slice())
normalize_str_line(new_segments)
}
StrLiteral::Block(t) => {
let mut new_segments = Vec::new_in(arena);
@ -636,10 +636,20 @@ impl<'a> Normalize<'a> for StrLiteral<'a> {
new_segments.push(StrSegment::Plaintext(last_text.into_bump_str()));
}
normalize_str_line(new_segments)
}
}
}
}
fn normalize_str_line<'a>(new_segments: Vec<'a, StrSegment<'a>>) -> StrLiteral<'a> {
if new_segments.len() == 1 {
if let StrSegment::Plaintext(t) = new_segments[0] {
return StrLiteral::PlainLine(t);
}
}
StrLiteral::Line(new_segments.into_bump_slice())
}
}
}
}
fn normalize_str_segments<'a>(

View File

@ -6333,6 +6333,83 @@ mod test_fmt {
);
}
#[test]
fn keep_explicit_blank_chars() {
expr_formats_same(indoc!(
r#"
x = "a\u(200a)b\u(200b)c\u(200c)d\u(feff)e"
x
"#
));
}
#[test]
fn make_blank_chars_explicit() {
expr_formats_to(
indoc!(
"
x = \"a\u{200A}b\u{200B}c\u{200C}d\u{FEFF}e\"
x
"
),
indoc!(
r#"
x = "a\u(200a)b\u(200b)c\u(200c)d\u(feff)e"
x
"#
),
);
}
#[test]
fn make_blank_chars_explicit_when_interpolating() {
expr_formats_to(
indoc!(
"
x = \"foo:\u{200B} $(bar).\"
x
"
),
indoc!(
r#"
x = "foo:\u(200b) $(bar)."
x
"#
),
);
}
#[test]
fn make_blank_chars_explicit_in_multiline_string() {
expr_formats_to(
indoc!(
"
x =
\"\"\"
foo:\u{200B} $(bar).
\"\"\"
x
"
),
indoc!(
r#"
x =
"""
foo:\u(200b) $(bar).
"""
x
"#
),
);
}
#[test]
fn preserve_multiline_string_trailing_whitespace() {
expr_formats_same(indoc!(
"x =\n \"\"\"\n foo\n bar \n baz\n \"\"\"\nx"
));
}
// this is a parse error atm
// #[test]
// fn multiline_apply() {