fix(es/codegen): Fix LineCol calculation of printed files (#6763)

**Description:**

There were several issues with the way we updated the current `LineCol` position during the printing of the generated file:

- We used `chars` and `char_indices` (UTF-32) instead of `encode_utf16` (UTF-16) chars.
- JS uses UCS-2 (basically UTF-16) for its strings, and source maps default to that implicitly.
- `\r` was incorrectly handled
  - it didn't add a `line_start`; only `\n` did
- `\r\n` was incorrectly handled
- It was trying to let the `\n` path handle the `line_start`, but it called
`chars.next()` which ate the `\n` char.

I also took the opportunity to avoid the `Vec` allocations and reduced some code duplication.

See the
[before](https://evanw.github.io/source-map-visualization/#ODEzAC8qKgogKiBmb28KICogQHBhcmFtIGRhdGEgZm9vCiAqIEByZXR1cm5zIGZvbwogKi8gZXhwb3J0IGNvbnN0IGZpeHVwUmlza0NvbmZpZ0RhdGEgPSAoZGF0YSk9PnsKICAgIGlmICh4KSB7CiAgICAgICAgcmV0dXJuIDEyMzsKICAgIH0gZWxzZSB7CiAgICAgICAgcmV0dXJuIDQ1NjsKICAgIH0KfTsKCi8vIyBzb3VyY2VNYXBwaW5nVVJMPWRhdGE6YXBwbGljYXRpb24vanNvbjtiYXNlNjQsZXlKMlpYSnphVzl1SWpvekxDSnpiM1Z5WTJWeklqcGJJbWx1Y0hWMExuUnpJbDBzSW5OdmRYSmpaWE5EYjI1MFpXNTBJanBiSWk4cUtseHlYRzRnS2lCbWIyOWNjbHh1SUNvZ1FIQmhjbUZ0SUdSaGRHRWdabTl2WEhKY2JpQXFJRUJ5WlhSMWNtNXpJR1p2YjF4eVhHNGdLaTljY2x4dVpYaHdiM0owSUdOdmJuTjBJR1pwZUhWd1VtbHphME52Ym1acFowUmhkR0VnUFNBb1pHRjBZVG9nWVc1NUtUb2dkSGx3WlhNdVVtbHphME52Ym1acFoxUjVjR1VnUFQ0Z2UxeHlYRzRnSUdsbUlDaDRLU0I3WEhKY2JpQWdJQ0J5WlhSMWNtNGdNVEl6TzF4eVhHNGdJSDBnWld4elpTQjdYSEpjYmlBZ0lDQnlaWFIxY200Z05EVTJPMXh5WEc0Z0lIMWNjbHh1ZlRzaVhTd2libUZ0WlhNaU9sc2labWw0ZFhCU2FYTnJRMjl1Wm1sblJHRjBZU0lzSW1SaGRHRWlMQ0o0SWwwc0ltMWhjSEJwYm1keklqb2lRVUZCUVN4dFJFRkpReXhIUVVORUxFOUJRVThzVFVGQlRVRXNjMEpCUVhOQ0xFTkJRVU5ETEU5QlFXOURPMGxCUTNSRkxFbEJRVWxETEVkQlFVYzdVVUZEVEN4UFFVRlBPMGxCUTFRc1QwRkJUenRSUVVOTUxFOUJRVTg3U1VGRFZDeERRVUZETzBGQlEwZ3NSVUZCUlNKOTQ0NAB7InZlcnNpb24iOjMsInNvdXJjZXMiOlsiaW5wdXQudHMiXSwic291cmNlc0NvbnRlbnQiOlsiLyoqXHJcbiAqIGZvb1xyXG4gKiBAcGFyYW0gZGF0YSBmb29cclxuICogQHJldHVybnMgZm9vXHJcbiAqL1xyXG5leHBvcnQgY29uc3QgZml4dXBSaXNrQ29uZmlnRGF0YSA9IChkYXRhOiBhbnkpOiB0eXBlcy5SaXNrQ29uZmlnVHlwZSA9PiB7XHJcbiAgaWYgKHgpIHtcclxuICAgIHJldHVybiAxMjM7XHJcbiAgfSBlbHNlIHtcclxuICAgIHJldHVybiA0NTY7XHJcbiAgfVxyXG59OyJdLCJuYW1lcyI6WyJmaXh1cFJpc2tDb25maWdEYXRhIiwiZGF0YSIsIngiXSwibWFwcGluZ3MiOiJBQUFBLG1EQUlDLEdBQ0QsT0FBTyxNQUFNQSxzQkFBc0IsQ0FBQ0MsT0FBb0M7SUFDdEUsSUFBSUMsR0FBRztRQUNMLE9BQU87SUFDVCxPQUFPO1FBQ0wsT0FBTztJQUNULENBQUM7QUFDSCxFQUFFIn0=)
and
[after](https://evanw.github.io/source-map-visualization/#ODIyAC8qKgogKiBmb28KICogQHBhcmFtIGRhdGEgZm9vCiAqIEByZXR1cm5zIGZvbwogKi8gZXhwb3J0IHZhciBmaXh1cFJpc2tDb25maWdEYXRhID0gZnVuY3Rpb24oZGF0YSkgewogICAgaWYgKHgpIHsKICAgICAgICByZXR1cm4gMTIzOwogICAgfSBlbHNlIHsKICAgICAgICByZXR1cm4gNDU2OwogICAgfQp9OwoKLy8jIHNvdXJjZU1hcHBpbmdVUkw9ZGF0YTphcHBsaWNhdGlvbi9qc29uO2Jhc2U2NCxleUoyWlhKemFXOXVJam96TENKemIzVnlZMlZ6SWpwYklpNHZZWEJ3TG1weklsMHNJbk52ZFhKalpYTkRiMjUwWlc1MElqcGJJaThxS2x4eVhHNGdLaUJtYjI5Y2NseHVJQ29nUUhCaGNtRnRJR1JoZEdFZ1ptOXZYSEpjYmlBcUlFQnlaWFIxY201eklHWnZiMXh5WEc0Z0tpOWNjbHh1Wlhod2IzSjBJR052Ym5OMElHWnBlSFZ3VW1semEwTnZibVpwWjBSaGRHRWdQU0FvWkdGMFlUb2dZVzU1S1RvZ2RIbHdaWE11VW1semEwTnZibVpwWjFSNWNHVWdQVDRnZTF4eVhHNGdJR2xtSUNoNEtTQjdYSEpjYmlBZ0lDQnlaWFIxY200Z01USXpPMXh5WEc0Z0lIMGdaV3h6WlNCN1hISmNiaUFnSUNCeVpYUjFjbTRnTkRVMk8xeHlYRzRnSUgxY2NseHVmVHNpWFN3aWJtRnRaWE1pT2xzaVptbDRkWEJTYVhOclEyOXVabWxuUkdGMFlTSXNJbVJoZEdFaUxDSjRJbDBzSW0xaGNIQnBibWR6SWpvaVFVRkJRVHM3T3p0RFFVbERMRWRCUTBRc1QwRkJUeXhKUVVGTlFTeHpRa0ZCYzBJc1UwRkJRME1zVFVGQmIwTTdTVUZEZEVVc1NVRkJTVU1zUjBGQlJ6dFJRVU5NTEU5QlFVODdTVUZEVkN4UFFVRlBPMUZCUTB3c1QwRkJUenRKUVVOVUxFTkJRVU03UVVGRFNDeEZRVUZGSW4wPTQ0NgB7InZlcnNpb24iOjMsInNvdXJjZXMiOlsiLi9hcHAuanMiXSwic291cmNlc0NvbnRlbnQiOlsiLyoqXHJcbiAqIGZvb1xyXG4gKiBAcGFyYW0gZGF0YSBmb29cclxuICogQHJldHVybnMgZm9vXHJcbiAqL1xyXG5leHBvcnQgY29uc3QgZml4dXBSaXNrQ29uZmlnRGF0YSA9IChkYXRhOiBhbnkpOiB0eXBlcy5SaXNrQ29uZmlnVHlwZSA9PiB7XHJcbiAgaWYgKHgpIHtcclxuICAgIHJldHVybiAxMjM7XHJcbiAgfSBlbHNlIHtcclxuICAgIHJldHVybiA0NTY7XHJcbiAgfVxyXG59OyJdLCJuYW1lcyI6WyJmaXh1cFJpc2tDb25maWdEYXRhIiwiZGF0YSIsIngiXSwibWFwcGluZ3MiOiJBQUFBOzs7O0NBSUMsR0FDRCxPQUFPLElBQU1BLHNCQUFzQixTQUFDQyxNQUFvQztJQUN0RSxJQUFJQyxHQUFHO1FBQ0wsT0FBTztJQUNULE9BQU87UUFDTCxPQUFPO0lBQ1QsQ0FBQztBQUNILEVBQUUifQ==)


**Related issue:**

 - Closes https://github.com/swc-project/swc/issues/6694.
This commit is contained in:
Justin Ridgewell 2023-01-07 23:57:26 -05:00 committed by GitHub
parent fb6770f649
commit 2b503c16d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 158 additions and 53 deletions

View File

@ -16,6 +16,7 @@ use swc::{
},
Compiler,
};
use swc_ecma_parser::Syntax;
use testing::{assert_eq, NormalizedOutput, StdErr, Tester};
use walkdir::WalkDir;
@ -426,3 +427,87 @@ fn issue_4578() {
)
.unwrap();
}
#[test]
fn issue_6694() {
Tester::new().print_errors(|cm, handler| {
let c = Compiler::new(cm.clone());
let fm = cm.new_source_file(
swc_common::FileName::Real("./app.js".into()),
r#"/**
* foo
* @param data foo
* @returns foo
*/
export const fixupRiskConfigData = (data: any): types.RiskConfigType => {
if (x) {
return 123;
} else {
return 456;
}
};"#
.replace('\n', "\r\n"),
);
let result = c.process_js_file(
fm,
&handler,
&Options {
swcrc: false,
source_maps: Some(SourceMapsConfig::Bool(true)),
config: Config {
jsc: JscConfig {
target: Some(swc_ecma_ast::EsVersion::Es5),
syntax: Some(Syntax::Typescript(Default::default())),
..Default::default()
},
is_module: IsModule::Bool(true),
inline_sources_content: true.into(),
emit_source_map_columns: true.into(),
..Default::default()
},
..Default::default()
},
);
fn line_col(needle: &str, haystack: &str) -> Option<(u32, u32)> {
let lines = haystack.lines().enumerate();
for (i, line) in lines {
if let Some(c) = line.find(needle) {
return Some((i as _, c as _));
}
}
None
}
match result {
Ok(result) => {
assert!(result.map.is_some());
let map = result.map.unwrap();
let source_map = sourcemap::SourceMap::from_slice(map.as_bytes())
.expect("failed to deserialize sourcemap");
// "export"
let export_line_col =
line_col("export", &result.code).expect("failed to find `export`");
let token = source_map
.lookup_token(export_line_col.0, export_line_col.1)
.expect("failed to find token");
assert_eq!(token.get_src(), (5, 0));
// "if"
let if_line_col = line_col("if", &result.code).expect("failed to find `export`");
let token = source_map
.lookup_token(if_line_col.0, export_line_col.1)
.expect("failed to find token");
assert_eq!(token.get_src(), (6, 2));
}
Err(err) => {
panic!("Error: {:#?}", err);
}
}
Ok(())
});
}

View File

@ -1220,7 +1220,10 @@ pub struct LineInfo {
/// Used to create a `.map` file.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct LineCol {
/// Index of line, starting from 0.
pub line: u32,
/// UTF-16 column in line, starting from 0.
pub col: u32,
}

View File

@ -283,10 +283,28 @@ mod tests {
);
test!(
case: windows_crlf,
text: "012345678\r\nabcdef012345678\r\na",
case: unix_lf,
text: "/**\n * foo\n */\n012345678\nabcdef012345678\na",
source_file_start_pos: 0,
lines: vec![0, 11, 28],
lines: vec![0, 4, 11, 15, 25, 41],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
);
test!(
case: windows_cr,
text: "/**\r * foo\r */\r012345678\rabcdef012345678\ra",
source_file_start_pos: 0,
lines: vec![0, 4, 11, 15, 25, 41],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
);
test!(
case: windows_crlf,
text: "/**\r\n * foo\r\n */\r\n012345678\r\nabcdef012345678\r\na",
source_file_start_pos: 0,
lines: vec![0, 5, 13, 18, 29, 46],
multi_byte_chars: vec![],
non_narrow_chars: vec![],
);

View File

@ -51,6 +51,9 @@ impl<'a, W: Write> JsWriter<'a, W> {
for _ in 0..self.indent {
self.raw_write(INDENT)?;
}
if self.srcmap.is_some() {
self.line_pos += INDENT.len() * self.indent;
}
Ok(())
}
@ -59,11 +62,7 @@ impl<'a, W: Write> JsWriter<'a, W> {
fn raw_write(&mut self, data: &str) -> Result {
// #[cfg(debug_assertions)]
// tracing::trace!("Write: `{}`", data);
self.wr.write_all(data.as_bytes())?;
if self.srcmap.is_some() {
self.line_pos += data.chars().count();
}
Ok(())
}
@ -86,6 +85,7 @@ impl<'a, W: Write> JsWriter<'a, W> {
}
self.raw_write(data)?;
self.update_pos(data);
if let Some(span) = span {
self.srcmap(span.hi());
@ -95,6 +95,21 @@ impl<'a, W: Write> JsWriter<'a, W> {
Ok(())
}
#[inline]
fn update_pos(&mut self, s: &str) {
if self.srcmap.is_some() {
let line_start_of_s = compute_line_starts(s);
self.line_count += line_start_of_s.line_count;
let chars = s[line_start_of_s.byte_pos..].encode_utf16().count();
if line_start_of_s.line_count > 0 {
self.line_pos = chars;
} else {
self.line_pos += chars;
}
}
}
#[inline]
fn srcmap(&mut self, byte_pos: BytePos) {
if byte_pos.is_dummy() && byte_pos != BytePos(u32::MAX) {
@ -183,8 +198,10 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> {
let pending = self.pending_srcmap.take();
if !self.line_start {
self.raw_write(self.new_line)?;
self.line_count += 1;
self.line_pos = 0;
if self.srcmap.is_some() {
self.line_count += 1;
self.line_pos = 0;
}
self.line_start = true;
if let Some(pending) = pending {
@ -200,18 +217,7 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> {
fn write_lit(&mut self, span: Span, s: &str) -> Result {
if !s.is_empty() {
self.srcmap(span.lo());
self.write(None, s)?;
if self.srcmap.is_some() {
let line_start_of_s = compute_line_starts(s);
if line_start_of_s.len() > 1 {
self.line_count = self.line_count + line_start_of_s.len() - 1;
let last_line_byte_index = line_start_of_s.last().cloned().unwrap_or(0);
self.line_pos = s[last_line_byte_index..].chars().count();
}
}
self.srcmap(span.hi());
}
@ -222,14 +228,6 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> {
#[cfg_attr(debug_assertions, tracing::instrument(skip_all))]
fn write_comment(&mut self, s: &str) -> Result {
self.write(None, s)?;
if self.srcmap.is_some() {
let line_start_of_s = compute_line_starts(s);
if line_start_of_s.len() > 1 {
self.line_count = self.line_count + line_start_of_s.len() - 1;
let last_line_byte_index = line_start_of_s.last().cloned().unwrap_or(0);
self.line_pos = s[last_line_byte_index..].chars().count();
}
}
Ok(())
}
@ -239,16 +237,6 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> {
if !s.is_empty() {
self.srcmap(span.lo());
self.write(None, s)?;
if self.srcmap.is_some() {
let line_start_of_s = compute_line_starts(s);
if line_start_of_s.len() > 1 {
self.line_count = self.line_count + line_start_of_s.len() - 1;
let last_line_byte_index = line_start_of_s.last().cloned().unwrap_or(0);
self.line_pos = s[last_line_byte_index..].chars().count();
}
}
self.srcmap(span.hi());
}
@ -285,10 +273,12 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> {
#[inline]
#[cfg_attr(debug_assertions, tracing::instrument(skip_all))]
fn add_srcmap(&mut self, pos: BytePos) -> Result {
if self.line_start {
self.pending_srcmap = Some(pos);
} else {
self.srcmap(pos);
if self.srcmap.is_some() {
if self.line_start {
self.pending_srcmap = Some(pos);
} else {
self.srcmap(pos);
}
}
Ok(())
}
@ -300,23 +290,31 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> {
}
}
fn compute_line_starts(s: &str) -> Vec<usize> {
let mut res = vec![];
#[derive(Debug)]
struct LineStart {
line_count: usize,
byte_pos: usize,
}
fn compute_line_starts(s: &str) -> LineStart {
let mut count = 0;
let mut line_start = 0;
let mut chars = s.char_indices().peekable();
let mut chars = s.as_bytes().iter().enumerate().peekable();
while let Some((pos, c)) = chars.next() {
match c {
'\r' => {
if let Some(&(_, '\n')) = chars.peek() {
b'\r' => {
count += 1;
if let Some(&(_, b'\n')) = chars.peek() {
let _ = chars.next();
line_start = pos + 2
} else {
line_start = pos + 1
}
}
'\n' => {
res.push(line_start);
b'\n' => {
count += 1;
line_start = pos + 1;
}
@ -324,7 +322,8 @@ fn compute_line_starts(s: &str) -> Vec<usize> {
}
}
// Last line.
res.push(line_start);
res
LineStart {
line_count: count,
byte_pos: line_start,
}
}