Merge pull request #7004 from a-lavis/format-invisible-chars

Format invisible unicode chars to unicode escapes
This commit is contained in:
Joshua Warner 2024-08-17 20:27:46 -07:00 committed by GitHub
commit e3d47b386e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 109 additions and 6 deletions

View File

@ -624,6 +624,22 @@ fn starts_with_newline(expr: &Expr) -> bool {
} }
} }
fn fmt_str_body(body: &str, buf: &mut Buf) {
for c in body.chars() {
match c {
// Format blank characters as unicode escapes
'\u{200a}' => buf.push_str("\\u(200a)"),
'\u{200b}' => buf.push_str("\\u(200b)"),
'\u{200c}' => buf.push_str("\\u(200c)"),
'\u{feff}' => buf.push_str("\\u(feff)"),
// Don't change anything else in the string
' ' => buf.push_str_allow_spaces(" "),
'\n' => buf.push_str_allow_spaces("\n"),
_ => buf.push(c),
}
}
}
fn format_str_segment(seg: &StrSegment, buf: &mut Buf, indent: u16) { fn format_str_segment(seg: &StrSegment, buf: &mut Buf, indent: u16) {
use StrSegment::*; use StrSegment::*;
@ -633,10 +649,10 @@ fn format_str_segment(seg: &StrSegment, buf: &mut Buf, indent: u16) {
// a line break in the input string // a line break in the input string
match string.strip_suffix('\n') { match string.strip_suffix('\n') {
Some(string_without_newline) => { Some(string_without_newline) => {
buf.push_str_allow_spaces(string_without_newline); fmt_str_body(string_without_newline, buf);
buf.newline(); buf.newline();
} }
None => buf.push_str_allow_spaces(string), None => fmt_str_body(string, buf),
} }
} }
Unicode(loc_str) => { Unicode(loc_str) => {
@ -696,7 +712,7 @@ pub fn fmt_str_literal(buf: &mut Buf, literal: StrLiteral, indent: u16) {
buf.push_newline_literal(); buf.push_newline_literal();
for line in string.split('\n') { for line in string.split('\n') {
buf.indent(indent); buf.indent(indent);
buf.push_str_allow_spaces(line); fmt_str_body(line, buf);
buf.push_newline_literal(); buf.push_newline_literal();
} }
buf.indent(indent); buf.indent(indent);
@ -704,7 +720,7 @@ pub fn fmt_str_literal(buf: &mut Buf, literal: StrLiteral, indent: u16) {
} else { } else {
buf.indent(indent); buf.indent(indent);
buf.push('"'); buf.push('"');
buf.push_str_allow_spaces(string); fmt_str_body(string, buf);
buf.push('"'); buf.push('"');
}; };
} }

View File

@ -624,7 +624,7 @@ impl<'a> Normalize<'a> for StrLiteral<'a> {
new_segments.push(StrSegment::Plaintext(last_text.into_bump_str())); new_segments.push(StrSegment::Plaintext(last_text.into_bump_str()));
} }
StrLiteral::Line(new_segments.into_bump_slice()) normalize_str_line(new_segments)
} }
StrLiteral::Block(t) => { StrLiteral::Block(t) => {
let mut new_segments = Vec::new_in(arena); let mut new_segments = Vec::new_in(arena);
@ -636,10 +636,20 @@ impl<'a> Normalize<'a> for StrLiteral<'a> {
new_segments.push(StrSegment::Plaintext(last_text.into_bump_str())); new_segments.push(StrSegment::Plaintext(last_text.into_bump_str()));
} }
normalize_str_line(new_segments)
}
}
}
}
fn normalize_str_line<'a>(new_segments: Vec<'a, StrSegment<'a>>) -> StrLiteral<'a> {
if new_segments.len() == 1 {
if let StrSegment::Plaintext(t) = new_segments[0] {
return StrLiteral::PlainLine(t);
}
}
StrLiteral::Line(new_segments.into_bump_slice()) StrLiteral::Line(new_segments.into_bump_slice())
}
}
}
} }
fn normalize_str_segments<'a>( fn normalize_str_segments<'a>(

View File

@ -6333,6 +6333,83 @@ mod test_fmt {
); );
} }
#[test]
fn keep_explicit_blank_chars() {
expr_formats_same(indoc!(
r#"
x = "a\u(200a)b\u(200b)c\u(200c)d\u(feff)e"
x
"#
));
}
#[test]
fn make_blank_chars_explicit() {
expr_formats_to(
indoc!(
"
x = \"a\u{200A}b\u{200B}c\u{200C}d\u{FEFF}e\"
x
"
),
indoc!(
r#"
x = "a\u(200a)b\u(200b)c\u(200c)d\u(feff)e"
x
"#
),
);
}
#[test]
fn make_blank_chars_explicit_when_interpolating() {
expr_formats_to(
indoc!(
"
x = \"foo:\u{200B} $(bar).\"
x
"
),
indoc!(
r#"
x = "foo:\u(200b) $(bar)."
x
"#
),
);
}
#[test]
fn make_blank_chars_explicit_in_multiline_string() {
expr_formats_to(
indoc!(
"
x =
\"\"\"
foo:\u{200B} $(bar).
\"\"\"
x
"
),
indoc!(
r#"
x =
"""
foo:\u(200b) $(bar).
"""
x
"#
),
);
}
#[test]
fn preserve_multiline_string_trailing_whitespace() {
expr_formats_same(indoc!(
"x =\n \"\"\"\n foo\n bar \n baz\n \"\"\"\nx"
));
}
// this is a parse error atm // this is a parse error atm
// #[test] // #[test]
// fn multiline_apply() { // fn multiline_apply() {